LLVM 19.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
33#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/InstrTypes.h"
40#include <cmath>
41#include <optional>
42#include <tuple>
43
44#define DEBUG_TYPE "gi-combiner"
45
46using namespace llvm;
47using namespace MIPatternMatch;
48
49// Option to allow testing of the combiner while no targets know about indexed
50// addressing.
51static cl::opt<bool>
52 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
53 cl::desc("Force all indexed operations to be "
54 "legal for the GlobalISel combiner"));
55
57 MachineIRBuilder &B, bool IsPreLegalize,
59 const LegalizerInfo *LI)
60 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
61 MDT(MDT), IsPreLegalize(IsPreLegalize), LI(LI),
62 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
63 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
64 (void)this->KB;
65}
66
69}
70
71/// \returns The little endian in-memory byte position of byte \p I in a
72/// \p ByteWidth bytes wide type.
73///
74/// E.g. Given a 4-byte type x, x[0] -> byte 0
75static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
76 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
77 return I;
78}
79
80/// Determines the LogBase2 value for a non-null input value using the
81/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
83 auto &MRI = *MIB.getMRI();
84 LLT Ty = MRI.getType(V);
85 auto Ctlz = MIB.buildCTLZ(Ty, V);
86 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
87 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
88}
89
90/// \returns The big endian in-memory byte position of byte \p I in a
91/// \p ByteWidth bytes wide type.
92///
93/// E.g. Given a 4-byte type x, x[0] -> byte 3
94static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
95 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
96 return ByteWidth - I - 1;
97}
98
99/// Given a map from byte offsets in memory to indices in a load/store,
100/// determine if that map corresponds to a little or big endian byte pattern.
101///
102/// \param MemOffset2Idx maps memory offsets to address offsets.
103/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
104///
105/// \returns true if the map corresponds to a big endian byte pattern, false if
106/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
107///
108/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
109/// are as follows:
110///
111/// AddrOffset Little endian Big endian
112/// 0 0 3
113/// 1 1 2
114/// 2 2 1
115/// 3 3 0
116static std::optional<bool>
118 int64_t LowestIdx) {
119 // Need at least two byte positions to decide on endianness.
120 unsigned Width = MemOffset2Idx.size();
121 if (Width < 2)
122 return std::nullopt;
123 bool BigEndian = true, LittleEndian = true;
124 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
125 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
126 if (MemOffsetAndIdx == MemOffset2Idx.end())
127 return std::nullopt;
128 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
129 assert(Idx >= 0 && "Expected non-negative byte offset?");
130 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
131 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
132 if (!BigEndian && !LittleEndian)
133 return std::nullopt;
134 }
135
136 assert((BigEndian != LittleEndian) &&
137 "Pattern cannot be both big and little endian!");
138 return BigEndian;
139}
140
142
143bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
144 assert(LI && "Must have LegalizerInfo to query isLegal!");
145 return LI->getAction(Query).Action == LegalizeActions::Legal;
146}
147
149 const LegalityQuery &Query) const {
150 return isPreLegalize() || isLegal(Query);
151}
152
154 if (!Ty.isVector())
155 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
156 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
157 if (isPreLegalize())
158 return true;
159 LLT EltTy = Ty.getElementType();
160 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
161 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
162}
163
165 Register ToReg) const {
167
168 if (MRI.constrainRegAttrs(ToReg, FromReg))
169 MRI.replaceRegWith(FromReg, ToReg);
170 else
171 Builder.buildCopy(ToReg, FromReg);
172
174}
175
177 MachineOperand &FromRegOp,
178 Register ToReg) const {
179 assert(FromRegOp.getParent() && "Expected an operand in an MI");
180 Observer.changingInstr(*FromRegOp.getParent());
181
182 FromRegOp.setReg(ToReg);
183
184 Observer.changedInstr(*FromRegOp.getParent());
185}
186
188 unsigned ToOpcode) const {
189 Observer.changingInstr(FromMI);
190
191 FromMI.setDesc(Builder.getTII().get(ToOpcode));
192
193 Observer.changedInstr(FromMI);
194}
195
197 return RBI->getRegBank(Reg, MRI, *TRI);
198}
199
201 if (RegBank)
202 MRI.setRegBank(Reg, *RegBank);
203}
204
206 if (matchCombineCopy(MI)) {
208 return true;
209 }
210 return false;
211}
213 if (MI.getOpcode() != TargetOpcode::COPY)
214 return false;
215 Register DstReg = MI.getOperand(0).getReg();
216 Register SrcReg = MI.getOperand(1).getReg();
217 return canReplaceReg(DstReg, SrcReg, MRI);
218}
220 Register DstReg = MI.getOperand(0).getReg();
221 Register SrcReg = MI.getOperand(1).getReg();
222 MI.eraseFromParent();
223 replaceRegWith(MRI, DstReg, SrcReg);
224}
225
228 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
229 "Invalid instruction");
230 bool IsUndef = true;
231 MachineInstr *Undef = nullptr;
232
233 // Walk over all the operands of concat vectors and check if they are
234 // build_vector themselves or undef.
235 // Then collect their operands in Ops.
236 for (const MachineOperand &MO : MI.uses()) {
237 Register Reg = MO.getReg();
238 MachineInstr *Def = MRI.getVRegDef(Reg);
239 assert(Def && "Operand not defined");
240 if (!MRI.hasOneNonDBGUse(Reg))
241 return false;
242 switch (Def->getOpcode()) {
243 case TargetOpcode::G_BUILD_VECTOR:
244 IsUndef = false;
245 // Remember the operands of the build_vector to fold
246 // them into the yet-to-build flattened concat vectors.
247 for (const MachineOperand &BuildVecMO : Def->uses())
248 Ops.push_back(BuildVecMO.getReg());
249 break;
250 case TargetOpcode::G_IMPLICIT_DEF: {
251 LLT OpType = MRI.getType(Reg);
252 // Keep one undef value for all the undef operands.
253 if (!Undef) {
254 Builder.setInsertPt(*MI.getParent(), MI);
255 Undef = Builder.buildUndef(OpType.getScalarType());
256 }
257 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
258 OpType.getScalarType() &&
259 "All undefs should have the same type");
260 // Break the undef vector in as many scalar elements as needed
261 // for the flattening.
262 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
263 EltIdx != EltEnd; ++EltIdx)
264 Ops.push_back(Undef->getOperand(0).getReg());
265 break;
266 }
267 default:
268 return false;
269 }
270 }
271
272 // Check if the combine is illegal
273 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
275 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
276 return false;
277 }
278
279 if (IsUndef)
280 Ops.clear();
281
282 return true;
283}
286 // We determined that the concat_vectors can be flatten.
287 // Generate the flattened build_vector.
288 Register DstReg = MI.getOperand(0).getReg();
289 Builder.setInsertPt(*MI.getParent(), MI);
290 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
291
292 // Note: IsUndef is sort of redundant. We could have determine it by
293 // checking that at all Ops are undef. Alternatively, we could have
294 // generate a build_vector of undefs and rely on another combine to
295 // clean that up. For now, given we already gather this information
296 // in matchCombineConcatVectors, just save compile time and issue the
297 // right thing.
298 if (Ops.empty())
299 Builder.buildUndef(NewDstReg);
300 else
301 Builder.buildBuildVector(NewDstReg, Ops);
302 MI.eraseFromParent();
303 replaceRegWith(MRI, DstReg, NewDstReg);
304}
305
308 if (matchCombineShuffleVector(MI, Ops)) {
310 return true;
311 }
312 return false;
313}
314
317 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
318 "Invalid instruction kind");
319 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
320 Register Src1 = MI.getOperand(1).getReg();
321 LLT SrcType = MRI.getType(Src1);
322 // As bizarre as it may look, shuffle vector can actually produce
323 // scalar! This is because at the IR level a <1 x ty> shuffle
324 // vector is perfectly valid.
325 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
326 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
327
328 // If the resulting vector is smaller than the size of the source
329 // vectors being concatenated, we won't be able to replace the
330 // shuffle vector into a concat_vectors.
331 //
332 // Note: We may still be able to produce a concat_vectors fed by
333 // extract_vector_elt and so on. It is less clear that would
334 // be better though, so don't bother for now.
335 //
336 // If the destination is a scalar, the size of the sources doesn't
337 // matter. we will lower the shuffle to a plain copy. This will
338 // work only if the source and destination have the same size. But
339 // that's covered by the next condition.
340 //
341 // TODO: If the size between the source and destination don't match
342 // we could still emit an extract vector element in that case.
343 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
344 return false;
345
346 // Check that the shuffle mask can be broken evenly between the
347 // different sources.
348 if (DstNumElts % SrcNumElts != 0)
349 return false;
350
351 // Mask length is a multiple of the source vector length.
352 // Check if the shuffle is some kind of concatenation of the input
353 // vectors.
354 unsigned NumConcat = DstNumElts / SrcNumElts;
355 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
356 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
357 for (unsigned i = 0; i != DstNumElts; ++i) {
358 int Idx = Mask[i];
359 // Undef value.
360 if (Idx < 0)
361 continue;
362 // Ensure the indices in each SrcType sized piece are sequential and that
363 // the same source is used for the whole piece.
364 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
365 (ConcatSrcs[i / SrcNumElts] >= 0 &&
366 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
367 return false;
368 // Remember which source this index came from.
369 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
370 }
371
372 // The shuffle is concatenating multiple vectors together.
373 // Collect the different operands for that.
374 Register UndefReg;
375 Register Src2 = MI.getOperand(2).getReg();
376 for (auto Src : ConcatSrcs) {
377 if (Src < 0) {
378 if (!UndefReg) {
379 Builder.setInsertPt(*MI.getParent(), MI);
380 UndefReg = Builder.buildUndef(SrcType).getReg(0);
381 }
382 Ops.push_back(UndefReg);
383 } else if (Src == 0)
384 Ops.push_back(Src1);
385 else
386 Ops.push_back(Src2);
387 }
388 return true;
389}
390
392 const ArrayRef<Register> Ops) {
393 Register DstReg = MI.getOperand(0).getReg();
394 Builder.setInsertPt(*MI.getParent(), MI);
395 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
396
397 if (Ops.size() == 1)
398 Builder.buildCopy(NewDstReg, Ops[0]);
399 else
400 Builder.buildMergeLikeInstr(NewDstReg, Ops);
401
402 MI.eraseFromParent();
403 replaceRegWith(MRI, DstReg, NewDstReg);
404}
405
407 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
408 "Invalid instruction kind");
409
410 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
411 return Mask.size() == 1;
412}
413
415 Register DstReg = MI.getOperand(0).getReg();
416 Builder.setInsertPt(*MI.getParent(), MI);
417
418 int I = MI.getOperand(3).getShuffleMask()[0];
419 Register Src1 = MI.getOperand(1).getReg();
420 LLT Src1Ty = MRI.getType(Src1);
421 int Src1NumElts = Src1Ty.isVector() ? Src1Ty.getNumElements() : 1;
422 Register SrcReg;
423 if (I >= Src1NumElts) {
424 SrcReg = MI.getOperand(2).getReg();
425 I -= Src1NumElts;
426 } else if (I >= 0)
427 SrcReg = Src1;
428
429 if (I < 0)
430 Builder.buildUndef(DstReg);
431 else if (!MRI.getType(SrcReg).isVector())
432 Builder.buildCopy(DstReg, SrcReg);
433 else
435
436 MI.eraseFromParent();
437}
438
439namespace {
440
441/// Select a preference between two uses. CurrentUse is the current preference
442/// while *ForCandidate is attributes of the candidate under consideration.
443PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
444 PreferredTuple &CurrentUse,
445 const LLT TyForCandidate,
446 unsigned OpcodeForCandidate,
447 MachineInstr *MIForCandidate) {
448 if (!CurrentUse.Ty.isValid()) {
449 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
450 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
451 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
452 return CurrentUse;
453 }
454
455 // We permit the extend to hoist through basic blocks but this is only
456 // sensible if the target has extending loads. If you end up lowering back
457 // into a load and extend during the legalizer then the end result is
458 // hoisting the extend up to the load.
459
460 // Prefer defined extensions to undefined extensions as these are more
461 // likely to reduce the number of instructions.
462 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
463 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
464 return CurrentUse;
465 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
466 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
467 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
468
469 // Prefer sign extensions to zero extensions as sign-extensions tend to be
470 // more expensive. Don't do this if the load is already a zero-extend load
471 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
472 // later.
473 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
474 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
475 OpcodeForCandidate == TargetOpcode::G_ZEXT)
476 return CurrentUse;
477 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
478 OpcodeForCandidate == TargetOpcode::G_SEXT)
479 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
480 }
481
482 // This is potentially target specific. We've chosen the largest type
483 // because G_TRUNC is usually free. One potential catch with this is that
484 // some targets have a reduced number of larger registers than smaller
485 // registers and this choice potentially increases the live-range for the
486 // larger value.
487 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
488 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
489 }
490 return CurrentUse;
491}
492
493/// Find a suitable place to insert some instructions and insert them. This
494/// function accounts for special cases like inserting before a PHI node.
495/// The current strategy for inserting before PHI's is to duplicate the
496/// instructions for each predecessor. However, while that's ok for G_TRUNC
497/// on most targets since it generally requires no code, other targets/cases may
498/// want to try harder to find a dominating block.
499static void InsertInsnsWithoutSideEffectsBeforeUse(
502 MachineOperand &UseMO)>
503 Inserter) {
504 MachineInstr &UseMI = *UseMO.getParent();
505
506 MachineBasicBlock *InsertBB = UseMI.getParent();
507
508 // If the use is a PHI then we want the predecessor block instead.
509 if (UseMI.isPHI()) {
510 MachineOperand *PredBB = std::next(&UseMO);
511 InsertBB = PredBB->getMBB();
512 }
513
514 // If the block is the same block as the def then we want to insert just after
515 // the def instead of at the start of the block.
516 if (InsertBB == DefMI.getParent()) {
518 Inserter(InsertBB, std::next(InsertPt), UseMO);
519 return;
520 }
521
522 // Otherwise we want the start of the BB
523 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
524}
525} // end anonymous namespace
526
528 PreferredTuple Preferred;
529 if (matchCombineExtendingLoads(MI, Preferred)) {
530 applyCombineExtendingLoads(MI, Preferred);
531 return true;
532 }
533 return false;
534}
535
536static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
537 unsigned CandidateLoadOpc;
538 switch (ExtOpc) {
539 case TargetOpcode::G_ANYEXT:
540 CandidateLoadOpc = TargetOpcode::G_LOAD;
541 break;
542 case TargetOpcode::G_SEXT:
543 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
544 break;
545 case TargetOpcode::G_ZEXT:
546 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
547 break;
548 default:
549 llvm_unreachable("Unexpected extend opc");
550 }
551 return CandidateLoadOpc;
552}
553
555 PreferredTuple &Preferred) {
556 // We match the loads and follow the uses to the extend instead of matching
557 // the extends and following the def to the load. This is because the load
558 // must remain in the same position for correctness (unless we also add code
559 // to find a safe place to sink it) whereas the extend is freely movable.
560 // It also prevents us from duplicating the load for the volatile case or just
561 // for performance.
562 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
563 if (!LoadMI)
564 return false;
565
566 Register LoadReg = LoadMI->getDstReg();
567
568 LLT LoadValueTy = MRI.getType(LoadReg);
569 if (!LoadValueTy.isScalar())
570 return false;
571
572 // Most architectures are going to legalize <s8 loads into at least a 1 byte
573 // load, and the MMOs can only describe memory accesses in multiples of bytes.
574 // If we try to perform extload combining on those, we can end up with
575 // %a(s8) = extload %ptr (load 1 byte from %ptr)
576 // ... which is an illegal extload instruction.
577 if (LoadValueTy.getSizeInBits() < 8)
578 return false;
579
580 // For non power-of-2 types, they will very likely be legalized into multiple
581 // loads. Don't bother trying to match them into extending loads.
582 if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))
583 return false;
584
585 // Find the preferred type aside from the any-extends (unless it's the only
586 // one) and non-extending ops. We'll emit an extending load to that type and
587 // and emit a variant of (extend (trunc X)) for the others according to the
588 // relative type sizes. At the same time, pick an extend to use based on the
589 // extend involved in the chosen type.
590 unsigned PreferredOpcode =
591 isa<GLoad>(&MI)
592 ? TargetOpcode::G_ANYEXT
593 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
594 Preferred = {LLT(), PreferredOpcode, nullptr};
595 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
596 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
597 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
598 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
599 const auto &MMO = LoadMI->getMMO();
600 // For atomics, only form anyextending loads.
601 if (MMO.isAtomic() && UseMI.getOpcode() != TargetOpcode::G_ANYEXT)
602 continue;
603 // Check for legality.
604 if (!isPreLegalize()) {
605 LegalityQuery::MemDesc MMDesc(MMO);
606 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
607 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
608 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
609 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
610 .Action != LegalizeActions::Legal)
611 continue;
612 }
613 Preferred = ChoosePreferredUse(MI, Preferred,
614 MRI.getType(UseMI.getOperand(0).getReg()),
615 UseMI.getOpcode(), &UseMI);
616 }
617 }
618
619 // There were no extends
620 if (!Preferred.MI)
621 return false;
622 // It should be impossible to chose an extend without selecting a different
623 // type since by definition the result of an extend is larger.
624 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
625
626 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
627 return true;
628}
629
631 PreferredTuple &Preferred) {
632 // Rewrite the load to the chosen extending load.
633 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
634
635 // Inserter to insert a truncate back to the original type at a given point
636 // with some basic CSE to limit truncate duplication to one per BB.
638 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
639 MachineBasicBlock::iterator InsertBefore,
640 MachineOperand &UseMO) {
641 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
642 if (PreviouslyEmitted) {
644 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
646 return;
647 }
648
649 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
650 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
651 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
652 EmittedInsns[InsertIntoBB] = NewMI;
653 replaceRegOpWith(MRI, UseMO, NewDstReg);
654 };
655
657 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
658 MI.setDesc(Builder.getTII().get(LoadOpc));
659
660 // Rewrite all the uses to fix up the types.
661 auto &LoadValue = MI.getOperand(0);
663 for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
664 Uses.push_back(&UseMO);
665
666 for (auto *UseMO : Uses) {
667 MachineInstr *UseMI = UseMO->getParent();
668
669 // If the extend is compatible with the preferred extend then we should fix
670 // up the type and extend so that it uses the preferred use.
671 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
672 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
673 Register UseDstReg = UseMI->getOperand(0).getReg();
674 MachineOperand &UseSrcMO = UseMI->getOperand(1);
675 const LLT UseDstTy = MRI.getType(UseDstReg);
676 if (UseDstReg != ChosenDstReg) {
677 if (Preferred.Ty == UseDstTy) {
678 // If the use has the same type as the preferred use, then merge
679 // the vregs and erase the extend. For example:
680 // %1:_(s8) = G_LOAD ...
681 // %2:_(s32) = G_SEXT %1(s8)
682 // %3:_(s32) = G_ANYEXT %1(s8)
683 // ... = ... %3(s32)
684 // rewrites to:
685 // %2:_(s32) = G_SEXTLOAD ...
686 // ... = ... %2(s32)
687 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
689 UseMO->getParent()->eraseFromParent();
690 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
691 // If the preferred size is smaller, then keep the extend but extend
692 // from the result of the extending load. For example:
693 // %1:_(s8) = G_LOAD ...
694 // %2:_(s32) = G_SEXT %1(s8)
695 // %3:_(s64) = G_ANYEXT %1(s8)
696 // ... = ... %3(s64)
697 /// rewrites to:
698 // %2:_(s32) = G_SEXTLOAD ...
699 // %3:_(s64) = G_ANYEXT %2:_(s32)
700 // ... = ... %3(s64)
701 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
702 } else {
703 // If the preferred size is large, then insert a truncate. For
704 // example:
705 // %1:_(s8) = G_LOAD ...
706 // %2:_(s64) = G_SEXT %1(s8)
707 // %3:_(s32) = G_ZEXT %1(s8)
708 // ... = ... %3(s32)
709 /// rewrites to:
710 // %2:_(s64) = G_SEXTLOAD ...
711 // %4:_(s8) = G_TRUNC %2:_(s32)
712 // %3:_(s64) = G_ZEXT %2:_(s8)
713 // ... = ... %3(s64)
714 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
715 InsertTruncAt);
716 }
717 continue;
718 }
719 // The use is (one of) the uses of the preferred use we chose earlier.
720 // We're going to update the load to def this value later so just erase
721 // the old extend.
723 UseMO->getParent()->eraseFromParent();
724 continue;
725 }
726
727 // The use isn't an extend. Truncate back to the type we originally loaded.
728 // This is free on many targets.
729 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
730 }
731
732 MI.getOperand(0).setReg(ChosenDstReg);
734}
735
737 BuildFnTy &MatchInfo) {
738 assert(MI.getOpcode() == TargetOpcode::G_AND);
739
740 // If we have the following code:
741 // %mask = G_CONSTANT 255
742 // %ld = G_LOAD %ptr, (load s16)
743 // %and = G_AND %ld, %mask
744 //
745 // Try to fold it into
746 // %ld = G_ZEXTLOAD %ptr, (load s8)
747
748 Register Dst = MI.getOperand(0).getReg();
749 if (MRI.getType(Dst).isVector())
750 return false;
751
752 auto MaybeMask =
753 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
754 if (!MaybeMask)
755 return false;
756
757 APInt MaskVal = MaybeMask->Value;
758
759 if (!MaskVal.isMask())
760 return false;
761
762 Register SrcReg = MI.getOperand(1).getReg();
763 // Don't use getOpcodeDef() here since intermediate instructions may have
764 // multiple users.
765 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
766 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
767 return false;
768
769 Register LoadReg = LoadMI->getDstReg();
770 LLT RegTy = MRI.getType(LoadReg);
771 Register PtrReg = LoadMI->getPointerReg();
772 unsigned RegSize = RegTy.getSizeInBits();
773 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
774 unsigned MaskSizeBits = MaskVal.countr_one();
775
776 // The mask may not be larger than the in-memory type, as it might cover sign
777 // extended bits
778 if (MaskSizeBits > LoadSizeBits.getValue())
779 return false;
780
781 // If the mask covers the whole destination register, there's nothing to
782 // extend
783 if (MaskSizeBits >= RegSize)
784 return false;
785
786 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
787 // at least byte loads. Avoid creating such loads here
788 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
789 return false;
790
791 const MachineMemOperand &MMO = LoadMI->getMMO();
792 LegalityQuery::MemDesc MemDesc(MMO);
793
794 // Don't modify the memory access size if this is atomic/volatile, but we can
795 // still adjust the opcode to indicate the high bit behavior.
796 if (LoadMI->isSimple())
797 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
798 else if (LoadSizeBits.getValue() > MaskSizeBits ||
799 LoadSizeBits.getValue() == RegSize)
800 return false;
801
802 // TODO: Could check if it's legal with the reduced or original memory size.
804 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
805 return false;
806
807 MatchInfo = [=](MachineIRBuilder &B) {
808 B.setInstrAndDebugLoc(*LoadMI);
809 auto &MF = B.getMF();
810 auto PtrInfo = MMO.getPointerInfo();
811 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
812 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
813 LoadMI->eraseFromParent();
814 };
815 return true;
816}
817
819 const MachineInstr &UseMI) {
820 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
821 "shouldn't consider debug uses");
822 assert(DefMI.getParent() == UseMI.getParent());
823 if (&DefMI == &UseMI)
824 return true;
825 const MachineBasicBlock &MBB = *DefMI.getParent();
826 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
827 return &MI == &DefMI || &MI == &UseMI;
828 });
829 if (DefOrUse == MBB.end())
830 llvm_unreachable("Block must contain both DefMI and UseMI!");
831 return &*DefOrUse == &DefMI;
832}
833
835 const MachineInstr &UseMI) {
836 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
837 "shouldn't consider debug uses");
838 if (MDT)
839 return MDT->dominates(&DefMI, &UseMI);
840 else if (DefMI.getParent() != UseMI.getParent())
841 return false;
842
843 return isPredecessor(DefMI, UseMI);
844}
845
847 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
848 Register SrcReg = MI.getOperand(1).getReg();
849 Register LoadUser = SrcReg;
850
851 if (MRI.getType(SrcReg).isVector())
852 return false;
853
854 Register TruncSrc;
855 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
856 LoadUser = TruncSrc;
857
858 uint64_t SizeInBits = MI.getOperand(2).getImm();
859 // If the source is a G_SEXTLOAD from the same bit width, then we don't
860 // need any extend at all, just a truncate.
861 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
862 // If truncating more than the original extended value, abort.
863 auto LoadSizeBits = LoadMI->getMemSizeInBits();
864 if (TruncSrc &&
865 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
866 return false;
867 if (LoadSizeBits == SizeInBits)
868 return true;
869 }
870 return false;
871}
872
874 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
876 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
877 MI.eraseFromParent();
878}
879
881 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
882 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
883
884 Register DstReg = MI.getOperand(0).getReg();
885 LLT RegTy = MRI.getType(DstReg);
886
887 // Only supports scalars for now.
888 if (RegTy.isVector())
889 return false;
890
891 Register SrcReg = MI.getOperand(1).getReg();
892 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
893 if (!LoadDef || !MRI.hasOneNonDBGUse(DstReg))
894 return false;
895
896 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
897
898 // If the sign extend extends from a narrower width than the load's width,
899 // then we can narrow the load width when we combine to a G_SEXTLOAD.
900 // Avoid widening the load at all.
901 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
902
903 // Don't generate G_SEXTLOADs with a < 1 byte width.
904 if (NewSizeBits < 8)
905 return false;
906 // Don't bother creating a non-power-2 sextload, it will likely be broken up
907 // anyway for most targets.
908 if (!isPowerOf2_32(NewSizeBits))
909 return false;
910
911 const MachineMemOperand &MMO = LoadDef->getMMO();
912 LegalityQuery::MemDesc MMDesc(MMO);
913
914 // Don't modify the memory access size if this is atomic/volatile, but we can
915 // still adjust the opcode to indicate the high bit behavior.
916 if (LoadDef->isSimple())
917 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
918 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
919 return false;
920
921 // TODO: Could check if it's legal with the reduced or original memory size.
922 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
923 {MRI.getType(LoadDef->getDstReg()),
924 MRI.getType(LoadDef->getPointerReg())},
925 {MMDesc}}))
926 return false;
927
928 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
929 return true;
930}
931
933 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
934 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
935 Register LoadReg;
936 unsigned ScalarSizeBits;
937 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
938 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
939
940 // If we have the following:
941 // %ld = G_LOAD %ptr, (load 2)
942 // %ext = G_SEXT_INREG %ld, 8
943 // ==>
944 // %ld = G_SEXTLOAD %ptr (load 1)
945
946 auto &MMO = LoadDef->getMMO();
948 auto &MF = Builder.getMF();
949 auto PtrInfo = MMO.getPointerInfo();
950 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
951 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
952 LoadDef->getPointerReg(), *NewMMO);
953 MI.eraseFromParent();
954}
955
957 if (Ty.isVector())
959 Ty.getNumElements());
960 return IntegerType::get(C, Ty.getSizeInBits());
961}
962
963/// Return true if 'MI' is a load or a store that may be fold it's address
964/// operand into the load / store addressing mode.
968 auto *MF = MI->getMF();
969 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
970 if (!Addr)
971 return false;
972
973 AM.HasBaseReg = true;
974 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
975 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
976 else
977 AM.Scale = 1; // [reg +/- reg]
978
979 return TLI.isLegalAddressingMode(
980 MF->getDataLayout(), AM,
981 getTypeForLLT(MI->getMMO().getMemoryType(),
982 MF->getFunction().getContext()),
983 MI->getMMO().getAddrSpace());
984}
985
986static unsigned getIndexedOpc(unsigned LdStOpc) {
987 switch (LdStOpc) {
988 case TargetOpcode::G_LOAD:
989 return TargetOpcode::G_INDEXED_LOAD;
990 case TargetOpcode::G_STORE:
991 return TargetOpcode::G_INDEXED_STORE;
992 case TargetOpcode::G_ZEXTLOAD:
993 return TargetOpcode::G_INDEXED_ZEXTLOAD;
994 case TargetOpcode::G_SEXTLOAD:
995 return TargetOpcode::G_INDEXED_SEXTLOAD;
996 default:
997 llvm_unreachable("Unexpected opcode");
998 }
999}
1000
1001bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1002 // Check for legality.
1003 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1004 LLT Ty = MRI.getType(LdSt.getReg(0));
1005 LLT MemTy = LdSt.getMMO().getMemoryType();
1007 {{MemTy, MemTy.getSizeInBits(), AtomicOrdering::NotAtomic}});
1008 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1009 SmallVector<LLT> OpTys;
1010 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1011 OpTys = {PtrTy, Ty, Ty};
1012 else
1013 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1014
1015 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1016 return isLegal(Q);
1017}
1018
1020 "post-index-use-threshold", cl::Hidden, cl::init(32),
1021 cl::desc("Number of uses of a base pointer to check before it is no longer "
1022 "considered for post-indexing."));
1023
1024bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1026 bool &RematOffset) {
1027 // We're looking for the following pattern, for either load or store:
1028 // %baseptr:_(p0) = ...
1029 // G_STORE %val(s64), %baseptr(p0)
1030 // %offset:_(s64) = G_CONSTANT i64 -256
1031 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1032 const auto &TLI = getTargetLowering();
1033
1034 Register Ptr = LdSt.getPointerReg();
1035 // If the store is the only use, don't bother.
1036 if (MRI.hasOneNonDBGUse(Ptr))
1037 return false;
1038
1039 if (!isIndexedLoadStoreLegal(LdSt))
1040 return false;
1041
1042 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1043 return false;
1044
1045 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1046 auto *PtrDef = MRI.getVRegDef(Ptr);
1047
1048 unsigned NumUsesChecked = 0;
1049 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1050 if (++NumUsesChecked > PostIndexUseThreshold)
1051 return false; // Try to avoid exploding compile time.
1052
1053 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1054 // The use itself might be dead. This can happen during combines if DCE
1055 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1056 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1057 continue;
1058
1059 // Check the user of this isn't the store, otherwise we'd be generate a
1060 // indexed store defining its own use.
1061 if (StoredValDef == &Use)
1062 continue;
1063
1064 Offset = PtrAdd->getOffsetReg();
1065 if (!ForceLegalIndexing &&
1066 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1067 /*IsPre*/ false, MRI))
1068 continue;
1069
1070 // Make sure the offset calculation is before the potentially indexed op.
1071 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1072 RematOffset = false;
1073 if (!dominates(*OffsetDef, LdSt)) {
1074 // If the offset however is just a G_CONSTANT, we can always just
1075 // rematerialize it where we need it.
1076 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1077 continue;
1078 RematOffset = true;
1079 }
1080
1081 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1082 if (&BasePtrUse == PtrDef)
1083 continue;
1084
1085 // If the user is a later load/store that can be post-indexed, then don't
1086 // combine this one.
1087 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1088 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1089 dominates(LdSt, *BasePtrLdSt) &&
1090 isIndexedLoadStoreLegal(*BasePtrLdSt))
1091 return false;
1092
1093 // Now we're looking for the key G_PTR_ADD instruction, which contains
1094 // the offset add that we want to fold.
1095 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1096 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1097 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1098 // If the use is in a different block, then we may produce worse code
1099 // due to the extra register pressure.
1100 if (BaseUseUse.getParent() != LdSt.getParent())
1101 return false;
1102
1103 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1104 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1105 return false;
1106 }
1107 if (!dominates(LdSt, BasePtrUse))
1108 return false; // All use must be dominated by the load/store.
1109 }
1110 }
1111
1112 Addr = PtrAdd->getReg(0);
1113 Base = PtrAdd->getBaseReg();
1114 return true;
1115 }
1116
1117 return false;
1118}
1119
1120bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1122 auto &MF = *LdSt.getParent()->getParent();
1123 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1124
1125 Addr = LdSt.getPointerReg();
1128 return false;
1129
1130 if (!ForceLegalIndexing &&
1131 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1132 return false;
1133
1134 if (!isIndexedLoadStoreLegal(LdSt))
1135 return false;
1136
1138 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1139 return false;
1140
1141 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1142 // Would require a copy.
1143 if (Base == St->getValueReg())
1144 return false;
1145
1146 // We're expecting one use of Addr in MI, but it could also be the
1147 // value stored, which isn't actually dominated by the instruction.
1148 if (St->getValueReg() == Addr)
1149 return false;
1150 }
1151
1152 // Avoid increasing cross-block register pressure.
1153 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1154 if (AddrUse.getParent() != LdSt.getParent())
1155 return false;
1156
1157 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1158 // That might allow us to end base's liveness here by adjusting the constant.
1159 bool RealUse = false;
1160 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1161 if (!dominates(LdSt, AddrUse))
1162 return false; // All use must be dominated by the load/store.
1163
1164 // If Ptr may be folded in addressing mode of other use, then it's
1165 // not profitable to do this transformation.
1166 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1167 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1168 RealUse = true;
1169 } else {
1170 RealUse = true;
1171 }
1172 }
1173 return RealUse;
1174}
1175
1177 BuildFnTy &MatchInfo) {
1178 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1179
1180 // Check if there is a load that defines the vector being extracted from.
1181 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1182 if (!LoadMI)
1183 return false;
1184
1185 Register Vector = MI.getOperand(1).getReg();
1186 LLT VecEltTy = MRI.getType(Vector).getElementType();
1187
1188 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1189
1190 // Checking whether we should reduce the load width.
1192 return false;
1193
1194 // Check if the defining load is simple.
1195 if (!LoadMI->isSimple())
1196 return false;
1197
1198 // If the vector element type is not a multiple of a byte then we are unable
1199 // to correctly compute an address to load only the extracted element as a
1200 // scalar.
1201 if (!VecEltTy.isByteSized())
1202 return false;
1203
1204 // Check for load fold barriers between the extraction and the load.
1205 if (MI.getParent() != LoadMI->getParent())
1206 return false;
1207 const unsigned MaxIter = 20;
1208 unsigned Iter = 0;
1209 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1210 if (II->isLoadFoldBarrier())
1211 return false;
1212 if (Iter++ == MaxIter)
1213 return false;
1214 }
1215
1216 // Check if the new load that we are going to create is legal
1217 // if we are in the post-legalization phase.
1218 MachineMemOperand MMO = LoadMI->getMMO();
1219 Align Alignment = MMO.getAlign();
1220 MachinePointerInfo PtrInfo;
1222
1223 // Finding the appropriate PtrInfo if offset is a known constant.
1224 // This is required to create the memory operand for the narrowed load.
1225 // This machine memory operand object helps us infer about legality
1226 // before we proceed to combine the instruction.
1227 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1228 int Elt = CVal->getZExtValue();
1229 // FIXME: should be (ABI size)*Elt.
1230 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1231 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1232 } else {
1233 // Discard the pointer info except the address space because the memory
1234 // operand can't represent this new access since the offset is variable.
1235 Offset = VecEltTy.getSizeInBits() / 8;
1237 }
1238
1239 Alignment = commonAlignment(Alignment, Offset);
1240
1241 Register VecPtr = LoadMI->getPointerReg();
1242 LLT PtrTy = MRI.getType(VecPtr);
1243
1244 MachineFunction &MF = *MI.getMF();
1245 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1246
1247 LegalityQuery::MemDesc MMDesc(*NewMMO);
1248
1249 LegalityQuery Q = {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}};
1250
1252 return false;
1253
1254 // Load must be allowed and fast on the target.
1256 auto &DL = MF.getDataLayout();
1257 unsigned Fast = 0;
1258 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1259 &Fast) ||
1260 !Fast)
1261 return false;
1262
1263 Register Result = MI.getOperand(0).getReg();
1264 Register Index = MI.getOperand(2).getReg();
1265
1266 MatchInfo = [=](MachineIRBuilder &B) {
1267 GISelObserverWrapper DummyObserver;
1268 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1269 //// Get pointer to the vector element.
1270 Register finalPtr = Helper.getVectorElementPointer(
1271 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1272 Index);
1273 // New G_LOAD instruction.
1274 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1275 // Remove original GLOAD instruction.
1276 LoadMI->eraseFromParent();
1277 };
1278
1279 return true;
1280}
1281
1284 auto &LdSt = cast<GLoadStore>(MI);
1285
1286 if (LdSt.isAtomic())
1287 return false;
1288
1289 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1290 MatchInfo.Offset);
1291 if (!MatchInfo.IsPre &&
1292 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1293 MatchInfo.Offset, MatchInfo.RematOffset))
1294 return false;
1295
1296 return true;
1297}
1298
1301 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1303 unsigned Opcode = MI.getOpcode();
1304 bool IsStore = Opcode == TargetOpcode::G_STORE;
1305 unsigned NewOpcode = getIndexedOpc(Opcode);
1306
1307 // If the offset constant didn't happen to dominate the load/store, we can
1308 // just clone it as needed.
1309 if (MatchInfo.RematOffset) {
1310 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1311 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1312 *OldCst->getOperand(1).getCImm());
1313 MatchInfo.Offset = NewCst.getReg(0);
1314 }
1315
1316 auto MIB = Builder.buildInstr(NewOpcode);
1317 if (IsStore) {
1318 MIB.addDef(MatchInfo.Addr);
1319 MIB.addUse(MI.getOperand(0).getReg());
1320 } else {
1321 MIB.addDef(MI.getOperand(0).getReg());
1322 MIB.addDef(MatchInfo.Addr);
1323 }
1324
1325 MIB.addUse(MatchInfo.Base);
1326 MIB.addUse(MatchInfo.Offset);
1327 MIB.addImm(MatchInfo.IsPre);
1328 MIB->cloneMemRefs(*MI.getMF(), MI);
1329 MI.eraseFromParent();
1330 AddrDef.eraseFromParent();
1331
1332 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1333}
1334
1336 MachineInstr *&OtherMI) {
1337 unsigned Opcode = MI.getOpcode();
1338 bool IsDiv, IsSigned;
1339
1340 switch (Opcode) {
1341 default:
1342 llvm_unreachable("Unexpected opcode!");
1343 case TargetOpcode::G_SDIV:
1344 case TargetOpcode::G_UDIV: {
1345 IsDiv = true;
1346 IsSigned = Opcode == TargetOpcode::G_SDIV;
1347 break;
1348 }
1349 case TargetOpcode::G_SREM:
1350 case TargetOpcode::G_UREM: {
1351 IsDiv = false;
1352 IsSigned = Opcode == TargetOpcode::G_SREM;
1353 break;
1354 }
1355 }
1356
1357 Register Src1 = MI.getOperand(1).getReg();
1358 unsigned DivOpcode, RemOpcode, DivremOpcode;
1359 if (IsSigned) {
1360 DivOpcode = TargetOpcode::G_SDIV;
1361 RemOpcode = TargetOpcode::G_SREM;
1362 DivremOpcode = TargetOpcode::G_SDIVREM;
1363 } else {
1364 DivOpcode = TargetOpcode::G_UDIV;
1365 RemOpcode = TargetOpcode::G_UREM;
1366 DivremOpcode = TargetOpcode::G_UDIVREM;
1367 }
1368
1369 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1370 return false;
1371
1372 // Combine:
1373 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1374 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1375 // into:
1376 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1377
1378 // Combine:
1379 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1380 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1381 // into:
1382 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1383
1384 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1385 if (MI.getParent() == UseMI.getParent() &&
1386 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1387 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1388 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1389 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1390 OtherMI = &UseMI;
1391 return true;
1392 }
1393 }
1394
1395 return false;
1396}
1397
1399 MachineInstr *&OtherMI) {
1400 unsigned Opcode = MI.getOpcode();
1401 assert(OtherMI && "OtherMI shouldn't be empty.");
1402
1403 Register DestDivReg, DestRemReg;
1404 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1405 DestDivReg = MI.getOperand(0).getReg();
1406 DestRemReg = OtherMI->getOperand(0).getReg();
1407 } else {
1408 DestDivReg = OtherMI->getOperand(0).getReg();
1409 DestRemReg = MI.getOperand(0).getReg();
1410 }
1411
1412 bool IsSigned =
1413 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1414
1415 // Check which instruction is first in the block so we don't break def-use
1416 // deps by "moving" the instruction incorrectly. Also keep track of which
1417 // instruction is first so we pick it's operands, avoiding use-before-def
1418 // bugs.
1419 MachineInstr *FirstInst;
1420 if (dominates(MI, *OtherMI)) {
1422 FirstInst = &MI;
1423 } else {
1424 Builder.setInstrAndDebugLoc(*OtherMI);
1425 FirstInst = OtherMI;
1426 }
1427
1428 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1429 : TargetOpcode::G_UDIVREM,
1430 {DestDivReg, DestRemReg},
1431 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1432 MI.eraseFromParent();
1433 OtherMI->eraseFromParent();
1434}
1435
1437 MachineInstr *&BrCond) {
1438 assert(MI.getOpcode() == TargetOpcode::G_BR);
1439
1440 // Try to match the following:
1441 // bb1:
1442 // G_BRCOND %c1, %bb2
1443 // G_BR %bb3
1444 // bb2:
1445 // ...
1446 // bb3:
1447
1448 // The above pattern does not have a fall through to the successor bb2, always
1449 // resulting in a branch no matter which path is taken. Here we try to find
1450 // and replace that pattern with conditional branch to bb3 and otherwise
1451 // fallthrough to bb2. This is generally better for branch predictors.
1452
1453 MachineBasicBlock *MBB = MI.getParent();
1455 if (BrIt == MBB->begin())
1456 return false;
1457 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1458
1459 BrCond = &*std::prev(BrIt);
1460 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1461 return false;
1462
1463 // Check that the next block is the conditional branch target. Also make sure
1464 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1465 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1466 return BrCondTarget != MI.getOperand(0).getMBB() &&
1467 MBB->isLayoutSuccessor(BrCondTarget);
1468}
1469
1471 MachineInstr *&BrCond) {
1472 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1474 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1475 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1476 // this to i1 only since we might not know for sure what kind of
1477 // compare generated the condition value.
1478 auto True = Builder.buildConstant(
1479 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1480 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1481
1482 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1484 MI.getOperand(0).setMBB(FallthroughBB);
1486
1487 // Change the conditional branch to use the inverted condition and
1488 // new target block.
1489 Observer.changingInstr(*BrCond);
1490 BrCond->getOperand(0).setReg(Xor.getReg(0));
1491 BrCond->getOperand(1).setMBB(BrTarget);
1492 Observer.changedInstr(*BrCond);
1493}
1494
1495
1497 MachineIRBuilder HelperBuilder(MI);
1498 GISelObserverWrapper DummyObserver;
1499 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1500 return Helper.lowerMemcpyInline(MI) ==
1502}
1503
1505 MachineIRBuilder HelperBuilder(MI);
1506 GISelObserverWrapper DummyObserver;
1507 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1508 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1510}
1511
1513 const MachineRegisterInfo &MRI,
1514 const APFloat &Val) {
1515 APFloat Result(Val);
1516 switch (MI.getOpcode()) {
1517 default:
1518 llvm_unreachable("Unexpected opcode!");
1519 case TargetOpcode::G_FNEG: {
1520 Result.changeSign();
1521 return Result;
1522 }
1523 case TargetOpcode::G_FABS: {
1524 Result.clearSign();
1525 return Result;
1526 }
1527 case TargetOpcode::G_FPTRUNC: {
1528 bool Unused;
1529 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1531 &Unused);
1532 return Result;
1533 }
1534 case TargetOpcode::G_FSQRT: {
1535 bool Unused;
1537 &Unused);
1538 Result = APFloat(sqrt(Result.convertToDouble()));
1539 break;
1540 }
1541 case TargetOpcode::G_FLOG2: {
1542 bool Unused;
1544 &Unused);
1545 Result = APFloat(log2(Result.convertToDouble()));
1546 break;
1547 }
1548 }
1549 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1550 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1551 // `G_FLOG2` reach here.
1552 bool Unused;
1553 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1554 return Result;
1555}
1556
1558 const ConstantFP *Cst) {
1560 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1561 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1562 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1563 MI.eraseFromParent();
1564}
1565
1567 PtrAddChain &MatchInfo) {
1568 // We're trying to match the following pattern:
1569 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1570 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1571 // -->
1572 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1573
1574 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1575 return false;
1576
1577 Register Add2 = MI.getOperand(1).getReg();
1578 Register Imm1 = MI.getOperand(2).getReg();
1579 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1580 if (!MaybeImmVal)
1581 return false;
1582
1583 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1584 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1585 return false;
1586
1587 Register Base = Add2Def->getOperand(1).getReg();
1588 Register Imm2 = Add2Def->getOperand(2).getReg();
1589 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1590 if (!MaybeImm2Val)
1591 return false;
1592
1593 // Check if the new combined immediate forms an illegal addressing mode.
1594 // Do not combine if it was legal before but would get illegal.
1595 // To do so, we need to find a load/store user of the pointer to get
1596 // the access type.
1597 Type *AccessTy = nullptr;
1598 auto &MF = *MI.getMF();
1599 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1600 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1601 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1602 MF.getFunction().getContext());
1603 break;
1604 }
1605 }
1607 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1608 AMNew.BaseOffs = CombinedImm.getSExtValue();
1609 if (AccessTy) {
1610 AMNew.HasBaseReg = true;
1612 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1613 AMOld.HasBaseReg = true;
1614 unsigned AS = MRI.getType(Add2).getAddressSpace();
1615 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1616 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1617 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1618 return false;
1619 }
1620
1621 // Pass the combined immediate to the apply function.
1622 MatchInfo.Imm = AMNew.BaseOffs;
1623 MatchInfo.Base = Base;
1624 MatchInfo.Bank = getRegBank(Imm2);
1625 return true;
1626}
1627
1629 PtrAddChain &MatchInfo) {
1630 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1631 MachineIRBuilder MIB(MI);
1632 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1633 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1634 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1636 MI.getOperand(1).setReg(MatchInfo.Base);
1637 MI.getOperand(2).setReg(NewOffset.getReg(0));
1639}
1640
1642 RegisterImmPair &MatchInfo) {
1643 // We're trying to match the following pattern with any of
1644 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1645 // %t1 = SHIFT %base, G_CONSTANT imm1
1646 // %root = SHIFT %t1, G_CONSTANT imm2
1647 // -->
1648 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1649
1650 unsigned Opcode = MI.getOpcode();
1651 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1652 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1653 Opcode == TargetOpcode::G_USHLSAT) &&
1654 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1655
1656 Register Shl2 = MI.getOperand(1).getReg();
1657 Register Imm1 = MI.getOperand(2).getReg();
1658 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1659 if (!MaybeImmVal)
1660 return false;
1661
1662 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1663 if (Shl2Def->getOpcode() != Opcode)
1664 return false;
1665
1666 Register Base = Shl2Def->getOperand(1).getReg();
1667 Register Imm2 = Shl2Def->getOperand(2).getReg();
1668 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1669 if (!MaybeImm2Val)
1670 return false;
1671
1672 // Pass the combined immediate to the apply function.
1673 MatchInfo.Imm =
1674 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1675 MatchInfo.Reg = Base;
1676
1677 // There is no simple replacement for a saturating unsigned left shift that
1678 // exceeds the scalar size.
1679 if (Opcode == TargetOpcode::G_USHLSAT &&
1680 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1681 return false;
1682
1683 return true;
1684}
1685
1687 RegisterImmPair &MatchInfo) {
1688 unsigned Opcode = MI.getOpcode();
1689 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1690 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1691 Opcode == TargetOpcode::G_USHLSAT) &&
1692 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1693
1695 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1696 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1697 auto Imm = MatchInfo.Imm;
1698
1699 if (Imm >= ScalarSizeInBits) {
1700 // Any logical shift that exceeds scalar size will produce zero.
1701 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1702 Builder.buildConstant(MI.getOperand(0), 0);
1703 MI.eraseFromParent();
1704 return;
1705 }
1706 // Arithmetic shift and saturating signed left shift have no effect beyond
1707 // scalar size.
1708 Imm = ScalarSizeInBits - 1;
1709 }
1710
1711 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1712 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1714 MI.getOperand(1).setReg(MatchInfo.Reg);
1715 MI.getOperand(2).setReg(NewImm);
1717}
1718
1720 ShiftOfShiftedLogic &MatchInfo) {
1721 // We're trying to match the following pattern with any of
1722 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1723 // with any of G_AND/G_OR/G_XOR logic instructions.
1724 // %t1 = SHIFT %X, G_CONSTANT C0
1725 // %t2 = LOGIC %t1, %Y
1726 // %root = SHIFT %t2, G_CONSTANT C1
1727 // -->
1728 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1729 // %t4 = SHIFT %Y, G_CONSTANT C1
1730 // %root = LOGIC %t3, %t4
1731 unsigned ShiftOpcode = MI.getOpcode();
1732 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1733 ShiftOpcode == TargetOpcode::G_ASHR ||
1734 ShiftOpcode == TargetOpcode::G_LSHR ||
1735 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1736 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1737 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1738
1739 // Match a one-use bitwise logic op.
1740 Register LogicDest = MI.getOperand(1).getReg();
1741 if (!MRI.hasOneNonDBGUse(LogicDest))
1742 return false;
1743
1744 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1745 unsigned LogicOpcode = LogicMI->getOpcode();
1746 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1747 LogicOpcode != TargetOpcode::G_XOR)
1748 return false;
1749
1750 // Find a matching one-use shift by constant.
1751 const Register C1 = MI.getOperand(2).getReg();
1752 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1753 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1754 return false;
1755
1756 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1757
1758 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1759 // Shift should match previous one and should be a one-use.
1760 if (MI->getOpcode() != ShiftOpcode ||
1761 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1762 return false;
1763
1764 // Must be a constant.
1765 auto MaybeImmVal =
1766 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1767 if (!MaybeImmVal)
1768 return false;
1769
1770 ShiftVal = MaybeImmVal->Value.getSExtValue();
1771 return true;
1772 };
1773
1774 // Logic ops are commutative, so check each operand for a match.
1775 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1776 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1777 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1778 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1779 uint64_t C0Val;
1780
1781 if (matchFirstShift(LogicMIOp1, C0Val)) {
1782 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1783 MatchInfo.Shift2 = LogicMIOp1;
1784 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1785 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1786 MatchInfo.Shift2 = LogicMIOp2;
1787 } else
1788 return false;
1789
1790 MatchInfo.ValSum = C0Val + C1Val;
1791
1792 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1793 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1794 return false;
1795
1796 MatchInfo.Logic = LogicMI;
1797 return true;
1798}
1799
1801 ShiftOfShiftedLogic &MatchInfo) {
1802 unsigned Opcode = MI.getOpcode();
1803 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1804 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1805 Opcode == TargetOpcode::G_SSHLSAT) &&
1806 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1807
1808 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1809 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1811
1812 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1813
1814 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1815 Register Shift1 =
1816 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1817
1818 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
1819 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
1820 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
1821 // remove old shift1. And it will cause crash later. So erase it earlier to
1822 // avoid the crash.
1823 MatchInfo.Shift2->eraseFromParent();
1824
1825 Register Shift2Const = MI.getOperand(2).getReg();
1826 Register Shift2 = Builder
1827 .buildInstr(Opcode, {DestType},
1828 {MatchInfo.LogicNonShiftReg, Shift2Const})
1829 .getReg(0);
1830
1831 Register Dest = MI.getOperand(0).getReg();
1832 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1833
1834 // This was one use so it's safe to remove it.
1835 MatchInfo.Logic->eraseFromParent();
1836
1837 MI.eraseFromParent();
1838}
1839
1841 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
1842 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
1843 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
1844 auto &Shl = cast<GenericMachineInstr>(MI);
1845 Register DstReg = Shl.getReg(0);
1846 Register SrcReg = Shl.getReg(1);
1847 Register ShiftReg = Shl.getReg(2);
1848 Register X, C1;
1849
1850 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
1851 return false;
1852
1853 if (!mi_match(SrcReg, MRI,
1855 m_GOr(m_Reg(X), m_Reg(C1))))))
1856 return false;
1857
1858 APInt C1Val, C2Val;
1859 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
1860 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
1861 return false;
1862
1863 auto *SrcDef = MRI.getVRegDef(SrcReg);
1864 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
1865 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
1866 LLT SrcTy = MRI.getType(SrcReg);
1867 MatchInfo = [=](MachineIRBuilder &B) {
1868 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
1869 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
1870 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
1871 };
1872 return true;
1873}
1874
1876 unsigned &ShiftVal) {
1877 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1878 auto MaybeImmVal =
1879 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1880 if (!MaybeImmVal)
1881 return false;
1882
1883 ShiftVal = MaybeImmVal->Value.exactLogBase2();
1884 return (static_cast<int32_t>(ShiftVal) != -1);
1885}
1886
1888 unsigned &ShiftVal) {
1889 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1890 MachineIRBuilder MIB(MI);
1891 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1892 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1894 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1895 MI.getOperand(2).setReg(ShiftCst.getReg(0));
1897}
1898
1899// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
1901 RegisterImmPair &MatchData) {
1902 assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
1903 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
1904 return false;
1905
1906 Register LHS = MI.getOperand(1).getReg();
1907
1908 Register ExtSrc;
1909 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
1910 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
1911 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
1912 return false;
1913
1914 Register RHS = MI.getOperand(2).getReg();
1915 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
1916 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
1917 if (!MaybeShiftAmtVal)
1918 return false;
1919
1920 if (LI) {
1921 LLT SrcTy = MRI.getType(ExtSrc);
1922
1923 // We only really care about the legality with the shifted value. We can
1924 // pick any type the constant shift amount, so ask the target what to
1925 // use. Otherwise we would have to guess and hope it is reported as legal.
1926 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
1927 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
1928 return false;
1929 }
1930
1931 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
1932 MatchData.Reg = ExtSrc;
1933 MatchData.Imm = ShiftAmt;
1934
1935 unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();
1936 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
1937 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
1938}
1939
1941 const RegisterImmPair &MatchData) {
1942 Register ExtSrcReg = MatchData.Reg;
1943 int64_t ShiftAmtVal = MatchData.Imm;
1944
1945 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
1947 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
1948 auto NarrowShift =
1949 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
1950 Builder.buildZExt(MI.getOperand(0), NarrowShift);
1951 MI.eraseFromParent();
1952}
1953
1955 Register &MatchInfo) {
1956 GMerge &Merge = cast<GMerge>(MI);
1957 SmallVector<Register, 16> MergedValues;
1958 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
1959 MergedValues.emplace_back(Merge.getSourceReg(I));
1960
1961 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
1962 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
1963 return false;
1964
1965 for (unsigned I = 0; I < MergedValues.size(); ++I)
1966 if (MergedValues[I] != Unmerge->getReg(I))
1967 return false;
1968
1969 MatchInfo = Unmerge->getSourceReg();
1970 return true;
1971}
1972
1974 const MachineRegisterInfo &MRI) {
1975 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
1976 ;
1977
1978 return Reg;
1979}
1980
1983 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1984 "Expected an unmerge");
1985 auto &Unmerge = cast<GUnmerge>(MI);
1986 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
1987
1988 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
1989 if (!SrcInstr)
1990 return false;
1991
1992 // Check the source type of the merge.
1993 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
1994 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
1995 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
1996 if (SrcMergeTy != Dst0Ty && !SameSize)
1997 return false;
1998 // They are the same now (modulo a bitcast).
1999 // We can collect all the src registers.
2000 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2001 Operands.push_back(SrcInstr->getSourceReg(Idx));
2002 return true;
2003}
2004
2007 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2008 "Expected an unmerge");
2009 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2010 "Not enough operands to replace all defs");
2011 unsigned NumElems = MI.getNumOperands() - 1;
2012
2013 LLT SrcTy = MRI.getType(Operands[0]);
2014 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2015 bool CanReuseInputDirectly = DstTy == SrcTy;
2017 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2018 Register DstReg = MI.getOperand(Idx).getReg();
2019 Register SrcReg = Operands[Idx];
2020
2021 // This combine may run after RegBankSelect, so we need to be aware of
2022 // register banks.
2023 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2024 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2025 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2026 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2027 }
2028
2029 if (CanReuseInputDirectly)
2030 replaceRegWith(MRI, DstReg, SrcReg);
2031 else
2032 Builder.buildCast(DstReg, SrcReg);
2033 }
2034 MI.eraseFromParent();
2035}
2036
2038 SmallVectorImpl<APInt> &Csts) {
2039 unsigned SrcIdx = MI.getNumOperands() - 1;
2040 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2041 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2042 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2043 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2044 return false;
2045 // Break down the big constant in smaller ones.
2046 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2047 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2048 ? CstVal.getCImm()->getValue()
2049 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2050
2051 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2052 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2053 // Unmerge a constant.
2054 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2055 Csts.emplace_back(Val.trunc(ShiftAmt));
2056 Val = Val.lshr(ShiftAmt);
2057 }
2058
2059 return true;
2060}
2061
2063 SmallVectorImpl<APInt> &Csts) {
2064 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2065 "Expected an unmerge");
2066 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2067 "Not enough operands to replace all defs");
2068 unsigned NumElems = MI.getNumOperands() - 1;
2070 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2071 Register DstReg = MI.getOperand(Idx).getReg();
2072 Builder.buildConstant(DstReg, Csts[Idx]);
2073 }
2074
2075 MI.eraseFromParent();
2076}
2077
2079 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
2080 unsigned SrcIdx = MI.getNumOperands() - 1;
2081 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2082 MatchInfo = [&MI](MachineIRBuilder &B) {
2083 unsigned NumElems = MI.getNumOperands() - 1;
2084 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2085 Register DstReg = MI.getOperand(Idx).getReg();
2086 B.buildUndef(DstReg);
2087 }
2088 };
2089 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2090}
2091
2093 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2094 "Expected an unmerge");
2095 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2096 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2097 return false;
2098 // Check that all the lanes are dead except the first one.
2099 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2100 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2101 return false;
2102 }
2103 return true;
2104}
2105
2108 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2109 Register Dst0Reg = MI.getOperand(0).getReg();
2110 Builder.buildTrunc(Dst0Reg, SrcReg);
2111 MI.eraseFromParent();
2112}
2113
2115 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2116 "Expected an unmerge");
2117 Register Dst0Reg = MI.getOperand(0).getReg();
2118 LLT Dst0Ty = MRI.getType(Dst0Reg);
2119 // G_ZEXT on vector applies to each lane, so it will
2120 // affect all destinations. Therefore we won't be able
2121 // to simplify the unmerge to just the first definition.
2122 if (Dst0Ty.isVector())
2123 return false;
2124 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2125 LLT SrcTy = MRI.getType(SrcReg);
2126 if (SrcTy.isVector())
2127 return false;
2128
2129 Register ZExtSrcReg;
2130 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2131 return false;
2132
2133 // Finally we can replace the first definition with
2134 // a zext of the source if the definition is big enough to hold
2135 // all of ZExtSrc bits.
2136 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2137 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2138}
2139
2141 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2142 "Expected an unmerge");
2143
2144 Register Dst0Reg = MI.getOperand(0).getReg();
2145
2146 MachineInstr *ZExtInstr =
2147 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2148 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2149 "Expecting a G_ZEXT");
2150
2151 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2152 LLT Dst0Ty = MRI.getType(Dst0Reg);
2153 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2154
2156
2157 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2158 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2159 } else {
2160 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2161 "ZExt src doesn't fit in destination");
2162 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2163 }
2164
2165 Register ZeroReg;
2166 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2167 if (!ZeroReg)
2168 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2169 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2170 }
2171 MI.eraseFromParent();
2172}
2173
2175 unsigned TargetShiftSize,
2176 unsigned &ShiftVal) {
2177 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2178 MI.getOpcode() == TargetOpcode::G_LSHR ||
2179 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2180
2181 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2182 if (Ty.isVector()) // TODO:
2183 return false;
2184
2185 // Don't narrow further than the requested size.
2186 unsigned Size = Ty.getSizeInBits();
2187 if (Size <= TargetShiftSize)
2188 return false;
2189
2190 auto MaybeImmVal =
2191 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2192 if (!MaybeImmVal)
2193 return false;
2194
2195 ShiftVal = MaybeImmVal->Value.getSExtValue();
2196 return ShiftVal >= Size / 2 && ShiftVal < Size;
2197}
2198
2200 const unsigned &ShiftVal) {
2201 Register DstReg = MI.getOperand(0).getReg();
2202 Register SrcReg = MI.getOperand(1).getReg();
2203 LLT Ty = MRI.getType(SrcReg);
2204 unsigned Size = Ty.getSizeInBits();
2205 unsigned HalfSize = Size / 2;
2206 assert(ShiftVal >= HalfSize);
2207
2208 LLT HalfTy = LLT::scalar(HalfSize);
2209
2211 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2212 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2213
2214 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2215 Register Narrowed = Unmerge.getReg(1);
2216
2217 // dst = G_LSHR s64:x, C for C >= 32
2218 // =>
2219 // lo, hi = G_UNMERGE_VALUES x
2220 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2221
2222 if (NarrowShiftAmt != 0) {
2223 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2224 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2225 }
2226
2227 auto Zero = Builder.buildConstant(HalfTy, 0);
2228 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2229 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2230 Register Narrowed = Unmerge.getReg(0);
2231 // dst = G_SHL s64:x, C for C >= 32
2232 // =>
2233 // lo, hi = G_UNMERGE_VALUES x
2234 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2235 if (NarrowShiftAmt != 0) {
2236 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2237 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2238 }
2239
2240 auto Zero = Builder.buildConstant(HalfTy, 0);
2241 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2242 } else {
2243 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2244 auto Hi = Builder.buildAShr(
2245 HalfTy, Unmerge.getReg(1),
2246 Builder.buildConstant(HalfTy, HalfSize - 1));
2247
2248 if (ShiftVal == HalfSize) {
2249 // (G_ASHR i64:x, 32) ->
2250 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2251 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2252 } else if (ShiftVal == Size - 1) {
2253 // Don't need a second shift.
2254 // (G_ASHR i64:x, 63) ->
2255 // %narrowed = (G_ASHR hi_32(x), 31)
2256 // G_MERGE_VALUES %narrowed, %narrowed
2257 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2258 } else {
2259 auto Lo = Builder.buildAShr(
2260 HalfTy, Unmerge.getReg(1),
2261 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2262
2263 // (G_ASHR i64:x, C) ->, for C >= 32
2264 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2265 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2266 }
2267 }
2268
2269 MI.eraseFromParent();
2270}
2271
2273 unsigned TargetShiftAmount) {
2274 unsigned ShiftAmt;
2275 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2276 applyCombineShiftToUnmerge(MI, ShiftAmt);
2277 return true;
2278 }
2279
2280 return false;
2281}
2282
2284 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2285 Register DstReg = MI.getOperand(0).getReg();
2286 LLT DstTy = MRI.getType(DstReg);
2287 Register SrcReg = MI.getOperand(1).getReg();
2288 return mi_match(SrcReg, MRI,
2289 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2290}
2291
2293 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2294 Register DstReg = MI.getOperand(0).getReg();
2296 Builder.buildCopy(DstReg, Reg);
2297 MI.eraseFromParent();
2298}
2299
2301 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2302 Register DstReg = MI.getOperand(0).getReg();
2304 Builder.buildZExtOrTrunc(DstReg, Reg);
2305 MI.eraseFromParent();
2306}
2307
2309 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2310 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2311 Register LHS = MI.getOperand(1).getReg();
2312 Register RHS = MI.getOperand(2).getReg();
2313 LLT IntTy = MRI.getType(LHS);
2314
2315 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2316 // instruction.
2317 PtrReg.second = false;
2318 for (Register SrcReg : {LHS, RHS}) {
2319 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2320 // Don't handle cases where the integer is implicitly converted to the
2321 // pointer width.
2322 LLT PtrTy = MRI.getType(PtrReg.first);
2323 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2324 return true;
2325 }
2326
2327 PtrReg.second = true;
2328 }
2329
2330 return false;
2331}
2332
2334 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2335 Register Dst = MI.getOperand(0).getReg();
2336 Register LHS = MI.getOperand(1).getReg();
2337 Register RHS = MI.getOperand(2).getReg();
2338
2339 const bool DoCommute = PtrReg.second;
2340 if (DoCommute)
2341 std::swap(LHS, RHS);
2342 LHS = PtrReg.first;
2343
2344 LLT PtrTy = MRI.getType(LHS);
2345
2347 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2348 Builder.buildPtrToInt(Dst, PtrAdd);
2349 MI.eraseFromParent();
2350}
2351
2353 APInt &NewCst) {
2354 auto &PtrAdd = cast<GPtrAdd>(MI);
2355 Register LHS = PtrAdd.getBaseReg();
2356 Register RHS = PtrAdd.getOffsetReg();
2358
2359 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2360 APInt Cst;
2361 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2362 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2363 // G_INTTOPTR uses zero-extension
2364 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2365 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2366 return true;
2367 }
2368 }
2369
2370 return false;
2371}
2372
2374 APInt &NewCst) {
2375 auto &PtrAdd = cast<GPtrAdd>(MI);
2376 Register Dst = PtrAdd.getReg(0);
2377
2379 Builder.buildConstant(Dst, NewCst);
2380 PtrAdd.eraseFromParent();
2381}
2382
2384 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2385 Register DstReg = MI.getOperand(0).getReg();
2386 Register SrcReg = MI.getOperand(1).getReg();
2387 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2388 if (OriginalSrcReg.isValid())
2389 SrcReg = OriginalSrcReg;
2390 LLT DstTy = MRI.getType(DstReg);
2391 return mi_match(SrcReg, MRI,
2392 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2393}
2394
2396 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2397 Register DstReg = MI.getOperand(0).getReg();
2398 Register SrcReg = MI.getOperand(1).getReg();
2399 LLT DstTy = MRI.getType(DstReg);
2400 if (mi_match(SrcReg, MRI,
2401 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2402 unsigned DstSize = DstTy.getScalarSizeInBits();
2403 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2404 return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2405 }
2406 return false;
2407}
2408
2410 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2411 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2412 MI.getOpcode() == TargetOpcode::G_SEXT ||
2413 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2414 "Expected a G_[ASZ]EXT");
2415 Register SrcReg = MI.getOperand(1).getReg();
2416 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2417 if (OriginalSrcReg.isValid())
2418 SrcReg = OriginalSrcReg;
2419 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2420 // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2421 unsigned Opc = MI.getOpcode();
2422 unsigned SrcOpc = SrcMI->getOpcode();
2423 if (Opc == SrcOpc ||
2424 (Opc == TargetOpcode::G_ANYEXT &&
2425 (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
2426 (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
2427 MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
2428 return true;
2429 }
2430 return false;
2431}
2432
2434 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2435 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2436 MI.getOpcode() == TargetOpcode::G_SEXT ||
2437 MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2438 "Expected a G_[ASZ]EXT");
2439
2440 Register Reg = std::get<0>(MatchInfo);
2441 unsigned SrcExtOp = std::get<1>(MatchInfo);
2442
2443 // Combine exts with the same opcode.
2444 if (MI.getOpcode() == SrcExtOp) {
2446 MI.getOperand(1).setReg(Reg);
2448 return;
2449 }
2450
2451 // Combine:
2452 // - anyext([sz]ext x) to [sz]ext x
2453 // - sext(zext x) to zext x
2454 if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2455 (MI.getOpcode() == TargetOpcode::G_SEXT &&
2456 SrcExtOp == TargetOpcode::G_ZEXT)) {
2457 Register DstReg = MI.getOperand(0).getReg();
2459 Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
2460 MI.eraseFromParent();
2461 }
2462}
2463
2465 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2466 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2467 Register SrcReg = MI.getOperand(1).getReg();
2468 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2469 unsigned SrcOpc = SrcMI->getOpcode();
2470 if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
2471 SrcOpc == TargetOpcode::G_ZEXT) {
2472 MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
2473 return true;
2474 }
2475 return false;
2476}
2477
2479 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2480 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2481 Register SrcReg = MatchInfo.first;
2482 unsigned SrcExtOp = MatchInfo.second;
2483 Register DstReg = MI.getOperand(0).getReg();
2484 LLT SrcTy = MRI.getType(SrcReg);
2485 LLT DstTy = MRI.getType(DstReg);
2486 if (SrcTy == DstTy) {
2487 MI.eraseFromParent();
2488 replaceRegWith(MRI, DstReg, SrcReg);
2489 return;
2490 }
2492 if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
2493 Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
2494 else
2495 Builder.buildTrunc(DstReg, SrcReg);
2496 MI.eraseFromParent();
2497}
2498
2500 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2501 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2502
2503 // ShiftTy > 32 > TruncTy -> 32
2504 if (ShiftSize > 32 && TruncSize < 32)
2505 return ShiftTy.changeElementSize(32);
2506
2507 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2508 // Some targets like it, some don't, some only like it under certain
2509 // conditions/processor versions, etc.
2510 // A TL hook might be needed for this.
2511
2512 // Don't combine
2513 return ShiftTy;
2514}
2515
2517 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2518 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2519 Register DstReg = MI.getOperand(0).getReg();
2520 Register SrcReg = MI.getOperand(1).getReg();
2521
2522 if (!MRI.hasOneNonDBGUse(SrcReg))
2523 return false;
2524
2525 LLT SrcTy = MRI.getType(SrcReg);
2526 LLT DstTy = MRI.getType(DstReg);
2527
2528 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2529 const auto &TL = getTargetLowering();
2530
2531 LLT NewShiftTy;
2532 switch (SrcMI->getOpcode()) {
2533 default:
2534 return false;
2535 case TargetOpcode::G_SHL: {
2536 NewShiftTy = DstTy;
2537
2538 // Make sure new shift amount is legal.
2539 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2540 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2541 return false;
2542 break;
2543 }
2544 case TargetOpcode::G_LSHR:
2545 case TargetOpcode::G_ASHR: {
2546 // For right shifts, we conservatively do not do the transform if the TRUNC
2547 // has any STORE users. The reason is that if we change the type of the
2548 // shift, we may break the truncstore combine.
2549 //
2550 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2551 for (auto &User : MRI.use_instructions(DstReg))
2552 if (User.getOpcode() == TargetOpcode::G_STORE)
2553 return false;
2554
2555 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2556 if (NewShiftTy == SrcTy)
2557 return false;
2558
2559 // Make sure we won't lose information by truncating the high bits.
2560 KnownBits Known = KB->getKnownBits(SrcMI->getOperand(2).getReg());
2561 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2562 DstTy.getScalarSizeInBits()))
2563 return false;
2564 break;
2565 }
2566 }
2567
2569 {SrcMI->getOpcode(),
2570 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2571 return false;
2572
2573 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2574 return true;
2575}
2576
2578 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) {
2580
2581 MachineInstr *ShiftMI = MatchInfo.first;
2582 LLT NewShiftTy = MatchInfo.second;
2583
2584 Register Dst = MI.getOperand(0).getReg();
2585 LLT DstTy = MRI.getType(Dst);
2586
2587 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2588 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2589 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2590
2591 Register NewShift =
2592 Builder
2593 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2594 .getReg(0);
2595
2596 if (NewShiftTy == DstTy)
2597 replaceRegWith(MRI, Dst, NewShift);
2598 else
2599 Builder.buildTrunc(Dst, NewShift);
2600
2601 eraseInst(MI);
2602}
2603
2605 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2606 return MO.isReg() &&
2607 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2608 });
2609}
2610
2612 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2613 return !MO.isReg() ||
2614 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2615 });
2616}
2617
2619 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2620 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2621 return all_of(Mask, [](int Elt) { return Elt < 0; });
2622}
2623
2625 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2626 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2627 MRI);
2628}
2629
2631 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2632 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2633 MRI);
2634}
2635
2637 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2638 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2639 "Expected an insert/extract element op");
2640 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2641 unsigned IdxIdx =
2642 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2643 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2644 if (!Idx)
2645 return false;
2646 return Idx->getZExtValue() >= VecTy.getNumElements();
2647}
2648
2650 GSelect &SelMI = cast<GSelect>(MI);
2651 auto Cst =
2653 if (!Cst)
2654 return false;
2655 OpIdx = Cst->isZero() ? 3 : 2;
2656 return true;
2657}
2658
2659void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); }
2660
2662 const MachineOperand &MOP2) {
2663 if (!MOP1.isReg() || !MOP2.isReg())
2664 return false;
2665 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2666 if (!InstAndDef1)
2667 return false;
2668 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2669 if (!InstAndDef2)
2670 return false;
2671 MachineInstr *I1 = InstAndDef1->MI;
2672 MachineInstr *I2 = InstAndDef2->MI;
2673
2674 // Handle a case like this:
2675 //
2676 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2677 //
2678 // Even though %0 and %1 are produced by the same instruction they are not
2679 // the same values.
2680 if (I1 == I2)
2681 return MOP1.getReg() == MOP2.getReg();
2682
2683 // If we have an instruction which loads or stores, we can't guarantee that
2684 // it is identical.
2685 //
2686 // For example, we may have
2687 //
2688 // %x1 = G_LOAD %addr (load N from @somewhere)
2689 // ...
2690 // call @foo
2691 // ...
2692 // %x2 = G_LOAD %addr (load N from @somewhere)
2693 // ...
2694 // %or = G_OR %x1, %x2
2695 //
2696 // It's possible that @foo will modify whatever lives at the address we're
2697 // loading from. To be safe, let's just assume that all loads and stores
2698 // are different (unless we have something which is guaranteed to not
2699 // change.)
2700 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2701 return false;
2702
2703 // If both instructions are loads or stores, they are equal only if both
2704 // are dereferenceable invariant loads with the same number of bits.
2705 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2706 GLoadStore *LS1 = dyn_cast<GLoadStore>(I1);
2707 GLoadStore *LS2 = dyn_cast<GLoadStore>(I2);
2708 if (!LS1 || !LS2)
2709 return false;
2710
2711 if (!I2->isDereferenceableInvariantLoad() ||
2712 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2713 return false;
2714 }
2715
2716 // Check for physical registers on the instructions first to avoid cases
2717 // like this:
2718 //
2719 // %a = COPY $physreg
2720 // ...
2721 // SOMETHING implicit-def $physreg
2722 // ...
2723 // %b = COPY $physreg
2724 //
2725 // These copies are not equivalent.
2726 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2727 return MO.isReg() && MO.getReg().isPhysical();
2728 })) {
2729 // Check if we have a case like this:
2730 //
2731 // %a = COPY $physreg
2732 // %b = COPY %a
2733 //
2734 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2735 // From that, we know that they must have the same value, since they must
2736 // have come from the same COPY.
2737 return I1->isIdenticalTo(*I2);
2738 }
2739
2740 // We don't have any physical registers, so we don't necessarily need the
2741 // same vreg defs.
2742 //
2743 // On the off-chance that there's some target instruction feeding into the
2744 // instruction, let's use produceSameValue instead of isIdenticalTo.
2745 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2746 // Handle instructions with multiple defs that produce same values. Values
2747 // are same for operands with same index.
2748 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2749 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2750 // I1 and I2 are different instructions but produce same values,
2751 // %1 and %6 are same, %1 and %7 are not the same value.
2752 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg) ==
2753 I2->findRegisterDefOperandIdx(InstAndDef2->Reg);
2754 }
2755 return false;
2756}
2757
2759 if (!MOP.isReg())
2760 return false;
2761 auto *MI = MRI.getVRegDef(MOP.getReg());
2762 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2763 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2764 MaybeCst->getSExtValue() == C;
2765}
2766
2768 if (!MOP.isReg())
2769 return false;
2770 std::optional<FPValueAndVReg> MaybeCst;
2771 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2772 return false;
2773
2774 return MaybeCst->Value.isExactlyValue(C);
2775}
2776
2778 unsigned OpIdx) {
2779 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2780 Register OldReg = MI.getOperand(0).getReg();
2781 Register Replacement = MI.getOperand(OpIdx).getReg();
2782 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2783 MI.eraseFromParent();
2784 replaceRegWith(MRI, OldReg, Replacement);
2785}
2786
2788 Register Replacement) {
2789 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2790 Register OldReg = MI.getOperand(0).getReg();
2791 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2792 MI.eraseFromParent();
2793 replaceRegWith(MRI, OldReg, Replacement);
2794}
2795
2797 unsigned ConstIdx) {
2798 Register ConstReg = MI.getOperand(ConstIdx).getReg();
2799 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2800
2801 // Get the shift amount
2802 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2803 if (!VRegAndVal)
2804 return false;
2805
2806 // Return true of shift amount >= Bitwidth
2807 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
2808}
2809
2811 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
2812 MI.getOpcode() == TargetOpcode::G_FSHR) &&
2813 "This is not a funnel shift operation");
2814
2815 Register ConstReg = MI.getOperand(3).getReg();
2816 LLT ConstTy = MRI.getType(ConstReg);
2817 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2818
2819 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
2820 assert((VRegAndVal) && "Value is not a constant");
2821
2822 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
2823 APInt NewConst = VRegAndVal->Value.urem(
2824 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
2825
2827 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
2829 MI.getOpcode(), {MI.getOperand(0)},
2830 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
2831
2832 MI.eraseFromParent();
2833}
2834
2836 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2837 // Match (cond ? x : x)
2838 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2839 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2840 MRI);
2841}
2842
2844 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2845 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2846 MRI);
2847}
2848
2850 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2851 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2852 MRI);
2853}
2854
2856 MachineOperand &MO = MI.getOperand(OpIdx);
2857 return MO.isReg() &&
2858 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2859}
2860
2862 unsigned OpIdx) {
2863 MachineOperand &MO = MI.getOperand(OpIdx);
2864 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
2865}
2866
2868 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2870 Builder.buildFConstant(MI.getOperand(0), C);
2871 MI.eraseFromParent();
2872}
2873
2875 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2877 Builder.buildConstant(MI.getOperand(0), C);
2878 MI.eraseFromParent();
2879}
2880
2882 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2884 Builder.buildConstant(MI.getOperand(0), C);
2885 MI.eraseFromParent();
2886}
2887
2889 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2891 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
2892 MI.eraseFromParent();
2893}
2894
2896 assert(MI.getNumDefs() == 1 && "Expected only one def?");
2898 Builder.buildUndef(MI.getOperand(0));
2899 MI.eraseFromParent();
2900}
2901
2903 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2904 Register LHS = MI.getOperand(1).getReg();
2905 Register RHS = MI.getOperand(2).getReg();
2906 Register &NewLHS = std::get<0>(MatchInfo);
2907 Register &NewRHS = std::get<1>(MatchInfo);
2908
2909 // Helper lambda to check for opportunities for
2910 // ((0-A) + B) -> B - A
2911 // (A + (0-B)) -> A - B
2912 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
2913 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
2914 return false;
2915 NewLHS = MaybeNewLHS;
2916 return true;
2917 };
2918
2919 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
2920}
2921
2924 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
2925 "Invalid opcode");
2926 Register DstReg = MI.getOperand(0).getReg();
2927 LLT DstTy = MRI.getType(DstReg);
2928 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
2929 unsigned NumElts = DstTy.getNumElements();
2930 // If this MI is part of a sequence of insert_vec_elts, then
2931 // don't do the combine in the middle of the sequence.
2932 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
2933 TargetOpcode::G_INSERT_VECTOR_ELT)
2934 return false;
2935 MachineInstr *CurrInst = &MI;
2936 MachineInstr *TmpInst;
2937 int64_t IntImm;
2938 Register TmpReg;
2939 MatchInfo.resize(NumElts);
2940 while (mi_match(
2941 CurrInst->getOperand(0).getReg(), MRI,
2942 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
2943 if (IntImm >= NumElts || IntImm < 0)
2944 return false;
2945 if (!MatchInfo[IntImm])
2946 MatchInfo[IntImm] = TmpReg;
2947 CurrInst = TmpInst;
2948 }
2949 // Variable index.
2950 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
2951 return false;
2952 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
2953 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
2954 if (!MatchInfo[I - 1].isValid())
2955 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
2956 }
2957 return true;
2958 }
2959 // If we didn't end in a G_IMPLICIT_DEF, bail out.
2960 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
2961}
2962
2966 Register UndefReg;
2967 auto GetUndef = [&]() {
2968 if (UndefReg)
2969 return UndefReg;
2970 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2971 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
2972 return UndefReg;
2973 };
2974 for (unsigned I = 0; I < MatchInfo.size(); ++I) {
2975 if (!MatchInfo[I])
2976 MatchInfo[I] = GetUndef();
2977 }
2978 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
2979 MI.eraseFromParent();
2980}
2981
2983 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2985 Register SubLHS, SubRHS;
2986 std::tie(SubLHS, SubRHS) = MatchInfo;
2987 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
2988 MI.eraseFromParent();
2989}
2990
2993 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
2994 //
2995 // Creates the new hand + logic instruction (but does not insert them.)
2996 //
2997 // On success, MatchInfo is populated with the new instructions. These are
2998 // inserted in applyHoistLogicOpWithSameOpcodeHands.
2999 unsigned LogicOpcode = MI.getOpcode();
3000 assert(LogicOpcode == TargetOpcode::G_AND ||
3001 LogicOpcode == TargetOpcode::G_OR ||
3002 LogicOpcode == TargetOpcode::G_XOR);
3003 MachineIRBuilder MIB(MI);
3004 Register Dst = MI.getOperand(0).getReg();
3005 Register LHSReg = MI.getOperand(1).getReg();
3006 Register RHSReg = MI.getOperand(2).getReg();
3007
3008 // Don't recompute anything.
3009 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3010 return false;
3011
3012 // Make sure we have (hand x, ...), (hand y, ...)
3013 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3014 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3015 if (!LeftHandInst || !RightHandInst)
3016 return false;
3017 unsigned HandOpcode = LeftHandInst->getOpcode();
3018 if (HandOpcode != RightHandInst->getOpcode())
3019 return false;
3020 if (!LeftHandInst->getOperand(1).isReg() ||
3021 !RightHandInst->getOperand(1).isReg())
3022 return false;
3023
3024 // Make sure the types match up, and if we're doing this post-legalization,
3025 // we end up with legal types.
3026 Register X = LeftHandInst->getOperand(1).getReg();
3027 Register Y = RightHandInst->getOperand(1).getReg();
3028 LLT XTy = MRI.getType(X);
3029 LLT YTy = MRI.getType(Y);
3030 if (!XTy.isValid() || XTy != YTy)
3031 return false;
3032
3033 // Optional extra source register.
3034 Register ExtraHandOpSrcReg;
3035 switch (HandOpcode) {
3036 default:
3037 return false;
3038 case TargetOpcode::G_ANYEXT:
3039 case TargetOpcode::G_SEXT:
3040 case TargetOpcode::G_ZEXT: {
3041 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3042 break;
3043 }
3044 case TargetOpcode::G_AND:
3045 case TargetOpcode::G_ASHR:
3046 case TargetOpcode::G_LSHR:
3047 case TargetOpcode::G_SHL: {
3048 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3049 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3050 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3051 return false;
3052 ExtraHandOpSrcReg = ZOp.getReg();
3053 break;
3054 }
3055 }
3056
3057 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3058 return false;
3059
3060 // Record the steps to build the new instructions.
3061 //
3062 // Steps to build (logic x, y)
3063 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3064 OperandBuildSteps LogicBuildSteps = {
3065 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3066 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3067 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3068 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3069
3070 // Steps to build hand (logic x, y), ...z
3071 OperandBuildSteps HandBuildSteps = {
3072 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3073 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3074 if (ExtraHandOpSrcReg.isValid())
3075 HandBuildSteps.push_back(
3076 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3077 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3078
3079 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3080 return true;
3081}
3082
3085 assert(MatchInfo.InstrsToBuild.size() &&
3086 "Expected at least one instr to build?");
3088 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3089 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3090 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3091 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3092 for (auto &OperandFn : InstrToBuild.OperandFns)
3093 OperandFn(Instr);
3094 }
3095 MI.eraseFromParent();
3096}
3097
3099 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3100 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3101 int64_t ShlCst, AshrCst;
3102 Register Src;
3103 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3104 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3105 m_ICstOrSplat(AshrCst))))
3106 return false;
3107 if (ShlCst != AshrCst)
3108 return false;
3110 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3111 return false;
3112 MatchInfo = std::make_tuple(Src, ShlCst);
3113 return true;
3114}
3115
3117 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3118 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3119 Register Src;
3120 int64_t ShiftAmt;
3121 std::tie(Src, ShiftAmt) = MatchInfo;
3122 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3124 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3125 MI.eraseFromParent();
3126}
3127
3128/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3130 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3131 assert(MI.getOpcode() == TargetOpcode::G_AND);
3132
3133 Register Dst = MI.getOperand(0).getReg();
3134 LLT Ty = MRI.getType(Dst);
3135
3136 Register R;
3137 int64_t C1;
3138 int64_t C2;
3139 if (!mi_match(
3140 Dst, MRI,
3141 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3142 return false;
3143
3144 MatchInfo = [=](MachineIRBuilder &B) {
3145 if (C1 & C2) {
3146 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3147 return;
3148 }
3149 auto Zero = B.buildConstant(Ty, 0);
3150 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3151 };
3152 return true;
3153}
3154
3156 Register &Replacement) {
3157 // Given
3158 //
3159 // %y:_(sN) = G_SOMETHING
3160 // %x:_(sN) = G_SOMETHING
3161 // %res:_(sN) = G_AND %x, %y
3162 //
3163 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3164 //
3165 // Patterns like this can appear as a result of legalization. E.g.
3166 //
3167 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3168 // %one:_(s32) = G_CONSTANT i32 1
3169 // %and:_(s32) = G_AND %cmp, %one
3170 //
3171 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3172 assert(MI.getOpcode() == TargetOpcode::G_AND);
3173 if (!KB)
3174 return false;
3175
3176 Register AndDst = MI.getOperand(0).getReg();
3177 Register LHS = MI.getOperand(1).getReg();
3178 Register RHS = MI.getOperand(2).getReg();
3179 KnownBits LHSBits = KB->getKnownBits(LHS);
3180 KnownBits RHSBits = KB->getKnownBits(RHS);
3181
3182 // Check that x & Mask == x.
3183 // x & 1 == x, always
3184 // x & 0 == x, only if x is also 0
3185 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3186 //
3187 // Check if we can replace AndDst with the LHS of the G_AND
3188 if (canReplaceReg(AndDst, LHS, MRI) &&
3189 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3190 Replacement = LHS;
3191 return true;
3192 }
3193
3194 // Check if we can replace AndDst with the RHS of the G_AND
3195 if (canReplaceReg(AndDst, RHS, MRI) &&
3196 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3197 Replacement = RHS;
3198 return true;
3199 }
3200
3201 return false;
3202}
3203
3205 // Given
3206 //
3207 // %y:_(sN) = G_SOMETHING
3208 // %x:_(sN) = G_SOMETHING
3209 // %res:_(sN) = G_OR %x, %y
3210 //
3211 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3212 assert(MI.getOpcode() == TargetOpcode::G_OR);
3213 if (!KB)
3214 return false;
3215
3216 Register OrDst = MI.getOperand(0).getReg();
3217 Register LHS = MI.getOperand(1).getReg();
3218 Register RHS = MI.getOperand(2).getReg();
3219 KnownBits LHSBits = KB->getKnownBits(LHS);
3220 KnownBits RHSBits = KB->getKnownBits(RHS);
3221
3222 // Check that x | Mask == x.
3223 // x | 0 == x, always
3224 // x | 1 == x, only if x is also 1
3225 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3226 //
3227 // Check if we can replace OrDst with the LHS of the G_OR
3228 if (canReplaceReg(OrDst, LHS, MRI) &&
3229 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3230 Replacement = LHS;
3231 return true;
3232 }
3233
3234 // Check if we can replace OrDst with the RHS of the G_OR
3235 if (canReplaceReg(OrDst, RHS, MRI) &&
3236 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3237 Replacement = RHS;
3238 return true;
3239 }
3240
3241 return false;
3242}
3243
3245 // If the input is already sign extended, just drop the extension.
3246 Register Src = MI.getOperand(1).getReg();
3247 unsigned ExtBits = MI.getOperand(2).getImm();
3248 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3249 return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3250}
3251
3252static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3253 int64_t Cst, bool IsVector, bool IsFP) {
3254 // For i1, Cst will always be -1 regardless of boolean contents.
3255 return (ScalarSizeBits == 1 && Cst == -1) ||
3256 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3257}
3258
3260 SmallVectorImpl<Register> &RegsToNegate) {
3261 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3262 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3263 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3264 Register XorSrc;
3265 Register CstReg;
3266 // We match xor(src, true) here.
3267 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3268 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3269 return false;
3270
3271 if (!MRI.hasOneNonDBGUse(XorSrc))
3272 return false;
3273
3274 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3275 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3276 // list of tree nodes to visit.
3277 RegsToNegate.push_back(XorSrc);
3278 // Remember whether the comparisons are all integer or all floating point.
3279 bool IsInt = false;
3280 bool IsFP = false;
3281 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3282 Register Reg = RegsToNegate[I];
3283 if (!MRI.hasOneNonDBGUse(Reg))
3284 return false;
3285 MachineInstr *Def = MRI.getVRegDef(Reg);
3286 switch (Def->getOpcode()) {
3287 default:
3288 // Don't match if the tree contains anything other than ANDs, ORs and
3289 // comparisons.
3290 return false;
3291 case TargetOpcode::G_ICMP:
3292 if (IsFP)
3293 return false;
3294 IsInt = true;
3295 // When we apply the combine we will invert the predicate.
3296 break;
3297 case TargetOpcode::G_FCMP:
3298 if (IsInt)
3299 return false;
3300 IsFP = true;
3301 // When we apply the combine we will invert the predicate.
3302 break;
3303 case TargetOpcode::G_AND:
3304 case TargetOpcode::G_OR:
3305 // Implement De Morgan's laws:
3306 // ~(x & y) -> ~x | ~y
3307 // ~(x | y) -> ~x & ~y
3308 // When we apply the combine we will change the opcode and recursively
3309 // negate the operands.
3310 RegsToNegate.push_back(Def->getOperand(1).getReg());
3311 RegsToNegate.push_back(Def->getOperand(2).getReg());
3312 break;
3313 }
3314 }
3315
3316 // Now we know whether the comparisons are integer or floating point, check
3317 // the constant in the xor.
3318 int64_t Cst;
3319 if (Ty.isVector()) {
3320 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3321 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3322 if (!MaybeCst)
3323 return false;
3324 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3325 return false;
3326 } else {
3327 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3328 return false;
3329 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3330 return false;
3331 }
3332
3333 return true;
3334}
3335
3337 SmallVectorImpl<Register> &RegsToNegate) {
3338 for (Register Reg : RegsToNegate) {
3339 MachineInstr *Def = MRI.getVRegDef(Reg);
3340 Observer.changingInstr(*Def);
3341 // For each comparison, invert the opcode. For each AND and OR, change the
3342 // opcode.
3343 switch (Def->getOpcode()) {
3344 default:
3345 llvm_unreachable("Unexpected opcode");
3346 case TargetOpcode::G_ICMP:
3347 case TargetOpcode::G_FCMP: {
3348 MachineOperand &PredOp = Def->getOperand(1);
3351 PredOp.setPredicate(NewP);
3352 break;
3353 }
3354 case TargetOpcode::G_AND:
3355 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3356 break;
3357 case TargetOpcode::G_OR:
3358 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3359 break;
3360 }
3361 Observer.changedInstr(*Def);
3362 }
3363
3364 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3365 MI.eraseFromParent();
3366}
3367
3369 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3370 // Match (xor (and x, y), y) (or any of its commuted cases)
3371 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3372 Register &X = MatchInfo.first;
3373 Register &Y = MatchInfo.second;
3374 Register AndReg = MI.getOperand(1).getReg();
3375 Register SharedReg = MI.getOperand(2).getReg();
3376
3377 // Find a G_AND on either side of the G_XOR.
3378 // Look for one of
3379 //
3380 // (xor (and x, y), SharedReg)
3381 // (xor SharedReg, (and x, y))
3382 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3383 std::swap(AndReg, SharedReg);
3384 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3385 return false;
3386 }
3387
3388 // Only do this if we'll eliminate the G_AND.
3389 if (!MRI.hasOneNonDBGUse(AndReg))
3390 return false;
3391
3392 // We can combine if SharedReg is the same as either the LHS or RHS of the
3393 // G_AND.
3394 if (Y != SharedReg)
3395 std::swap(X, Y);
3396 return Y == SharedReg;
3397}
3398
3400 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3401 // Fold (xor (and x, y), y) -> (and (not x), y)
3403 Register X, Y;
3404 std::tie(X, Y) = MatchInfo;
3405 auto Not = Builder.buildNot(MRI.getType(X), X);
3407 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3408 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3409 MI.getOperand(2).setReg(Y);
3411}
3412
3414 auto &PtrAdd = cast<GPtrAdd>(MI);
3415 Register DstReg = PtrAdd.getReg(0);
3416 LLT Ty = MRI.getType(DstReg);
3418
3419 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3420 return false;
3421
3422 if (Ty.isPointer()) {
3423 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3424 return ConstVal && *ConstVal == 0;
3425 }
3426
3427 assert(Ty.isVector() && "Expecting a vector type");
3428 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3429 return isBuildVectorAllZeros(*VecMI, MRI);
3430}
3431
3433 auto &PtrAdd = cast<GPtrAdd>(MI);
3435 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3436 PtrAdd.eraseFromParent();
3437}
3438
3439/// The second source operand is known to be a power of 2.
3441 Register DstReg = MI.getOperand(0).getReg();
3442 Register Src0 = MI.getOperand(1).getReg();
3443 Register Pow2Src1 = MI.getOperand(2).getReg();
3444 LLT Ty = MRI.getType(DstReg);
3446
3447 // Fold (urem x, pow2) -> (and x, pow2-1)
3448 auto NegOne = Builder.buildConstant(Ty, -1);
3449 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3450 Builder.buildAnd(DstReg, Src0, Add);
3451 MI.eraseFromParent();
3452}
3453
3455 unsigned &SelectOpNo) {
3456 Register LHS = MI.getOperand(1).getReg();
3457 Register RHS = MI.getOperand(2).getReg();
3458
3459 Register OtherOperandReg = RHS;
3460 SelectOpNo = 1;
3462
3463 // Don't do this unless the old select is going away. We want to eliminate the
3464 // binary operator, not replace a binop with a select.
3465 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3467 OtherOperandReg = LHS;
3468 SelectOpNo = 2;
3470 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3472 return false;
3473 }
3474
3475 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3476 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3477
3478 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3479 /*AllowFP*/ true,
3480 /*AllowOpaqueConstants*/ false))
3481 return false;
3482 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3483 /*AllowFP*/ true,
3484 /*AllowOpaqueConstants*/ false))
3485 return false;
3486
3487 unsigned BinOpcode = MI.getOpcode();
3488
3489 // We know that one of the operands is a select of constants. Now verify that
3490 // the other binary operator operand is either a constant, or we can handle a
3491 // variable.
3492 bool CanFoldNonConst =
3493 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3494 (isNullOrNullSplat(*SelectLHS, MRI) ||
3495 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3496 (isNullOrNullSplat(*SelectRHS, MRI) ||
3497 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3498 if (CanFoldNonConst)
3499 return true;
3500
3501 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3502 /*AllowFP*/ true,
3503 /*AllowOpaqueConstants*/ false);
3504}
3505
3506/// \p SelectOperand is the operand in binary operator \p MI that is the select
3507/// to fold.
3509 const unsigned &SelectOperand) {
3511
3512 Register Dst = MI.getOperand(0).getReg();
3513 Register LHS = MI.getOperand(1).getReg();
3514 Register RHS = MI.getOperand(2).getReg();
3515 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3516
3517 Register SelectCond = Select->getOperand(1).getReg();
3518 Register SelectTrue = Select->getOperand(2).getReg();
3519 Register SelectFalse = Select->getOperand(3).getReg();
3520
3521 LLT Ty = MRI.getType(Dst);
3522 unsigned BinOpcode = MI.getOpcode();
3523
3524 Register FoldTrue, FoldFalse;
3525
3526 // We have a select-of-constants followed by a binary operator with a
3527 // constant. Eliminate the binop by pulling the constant math into the select.
3528 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3529 if (SelectOperand == 1) {
3530 // TODO: SelectionDAG verifies this actually constant folds before
3531 // committing to the combine.
3532
3533 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3534 FoldFalse =
3535 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3536 } else {
3537 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3538 FoldFalse =
3539 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3540 }
3541
3542 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3543 MI.eraseFromParent();
3544}
3545
3546std::optional<SmallVector<Register, 8>>
3547CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3548 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3549 // We want to detect if Root is part of a tree which represents a bunch
3550 // of loads being merged into a larger load. We'll try to recognize patterns
3551 // like, for example:
3552 //
3553 // Reg Reg
3554 // \ /
3555 // OR_1 Reg
3556 // \ /
3557 // OR_2
3558 // \ Reg
3559 // .. /
3560 // Root
3561 //
3562 // Reg Reg Reg Reg
3563 // \ / \ /
3564 // OR_1 OR_2
3565 // \ /
3566 // \ /
3567 // ...
3568 // Root
3569 //
3570 // Each "Reg" may have been produced by a load + some arithmetic. This
3571 // function will save each of them.
3572 SmallVector<Register, 8> RegsToVisit;
3574
3575 // In the "worst" case, we're dealing with a load for each byte. So, there
3576 // are at most #bytes - 1 ORs.
3577 const unsigned MaxIter =
3578 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3579 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3580 if (Ors.empty())
3581 break;
3582 const MachineInstr *Curr = Ors.pop_back_val();
3583 Register OrLHS = Curr->getOperand(1).getReg();
3584 Register OrRHS = Curr->getOperand(2).getReg();
3585
3586 // In the combine, we want to elimate the entire tree.
3587 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3588 return std::nullopt;
3589
3590 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3591 // something that may be a load + arithmetic.
3592 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3593 Ors.push_back(Or);
3594 else
3595 RegsToVisit.push_back(OrLHS);
3596 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3597 Ors.push_back(Or);
3598 else
3599 RegsToVisit.push_back(OrRHS);
3600 }
3601
3602 // We're going to try and merge each register into a wider power-of-2 type,
3603 // so we ought to have an even number of registers.
3604 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3605 return std::nullopt;
3606 return RegsToVisit;
3607}
3608
3609/// Helper function for findLoadOffsetsForLoadOrCombine.
3610///
3611/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3612/// and then moving that value into a specific byte offset.
3613///
3614/// e.g. x[i] << 24
3615///
3616/// \returns The load instruction and the byte offset it is moved into.
3617static std::optional<std::pair<GZExtLoad *, int64_t>>
3618matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3619 const MachineRegisterInfo &MRI) {
3620 assert(MRI.hasOneNonDBGUse(Reg) &&
3621 "Expected Reg to only have one non-debug use?");
3622 Register MaybeLoad;
3623 int64_t Shift;
3624 if (!mi_match(Reg, MRI,
3625 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3626 Shift = 0;
3627 MaybeLoad = Reg;
3628 }
3629
3630 if (Shift % MemSizeInBits != 0)
3631 return std::nullopt;
3632
3633 // TODO: Handle other types of loads.
3634 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3635 if (!Load)
3636 return std::nullopt;
3637
3638 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
3639 return std::nullopt;
3640
3641 return std::make_pair(Load, Shift / MemSizeInBits);
3642}
3643
3644std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
3645CombinerHelper::findLoadOffsetsForLoadOrCombine(
3647 const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
3648
3649 // Each load found for the pattern. There should be one for each RegsToVisit.
3651
3652 // The lowest index used in any load. (The lowest "i" for each x[i].)
3653 int64_t LowestIdx = INT64_MAX;
3654
3655 // The load which uses the lowest index.
3656 GZExtLoad *LowestIdxLoad = nullptr;
3657
3658 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3659 SmallSet<int64_t, 8> SeenIdx;
3660
3661 // Ensure each load is in the same MBB.
3662 // TODO: Support multiple MachineBasicBlocks.
3663 MachineBasicBlock *MBB = nullptr;
3664 const MachineMemOperand *MMO = nullptr;
3665
3666 // Earliest instruction-order load in the pattern.
3667 GZExtLoad *EarliestLoad = nullptr;
3668
3669 // Latest instruction-order load in the pattern.
3670 GZExtLoad *LatestLoad = nullptr;
3671
3672 // Base pointer which every load should share.
3674
3675 // We want to find a load for each register. Each load should have some
3676 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3677 // track of the load which uses the lowest index. Later, we will check if we
3678 // can use its pointer in the final, combined load.
3679 for (auto Reg : RegsToVisit) {
3680 // Find the load, and find the position that it will end up in (e.g. a
3681 // shifted) value.
3682 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3683 if (!LoadAndPos)
3684 return std::nullopt;
3685 GZExtLoad *Load;
3686 int64_t DstPos;
3687 std::tie(Load, DstPos) = *LoadAndPos;
3688
3689 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3690 // it is difficult to check for stores/calls/etc between loads.
3691 MachineBasicBlock *LoadMBB = Load->getParent();
3692 if (!MBB)
3693 MBB = LoadMBB;
3694 if (LoadMBB != MBB)
3695 return std::nullopt;
3696
3697 // Make sure that the MachineMemOperands of every seen load are compatible.
3698 auto &LoadMMO = Load->getMMO();
3699 if (!MMO)
3700 MMO = &LoadMMO;
3701 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
3702 return std::nullopt;
3703
3704 // Find out what the base pointer and index for the load is.
3705 Register LoadPtr;
3706 int64_t Idx;
3707 if (!mi_match(Load->getOperand(1).getReg(), MRI,
3708 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
3709 LoadPtr = Load->getOperand(1).getReg();
3710 Idx = 0;
3711 }
3712
3713 // Don't combine things like a[i], a[i] -> a bigger load.
3714 if (!SeenIdx.insert(Idx).second)
3715 return std::nullopt;
3716
3717 // Every load must share the same base pointer; don't combine things like:
3718 //
3719 // a[i], b[i + 1] -> a bigger load.
3720 if (!BasePtr.isValid())
3721 BasePtr = LoadPtr;
3722 if (BasePtr != LoadPtr)
3723 return std::nullopt;
3724
3725 if (Idx < LowestIdx) {
3726 LowestIdx = Idx;
3727 LowestIdxLoad = Load;
3728 }
3729
3730 // Keep track of the byte offset that this load ends up at. If we have seen
3731 // the byte offset, then stop here. We do not want to combine:
3732 //
3733 // a[i] << 16, a[i + k] << 16 -> a bigger load.
3734 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
3735 return std::nullopt;
3736 Loads.insert(Load);
3737
3738 // Keep track of the position of the earliest/latest loads in the pattern.
3739 // We will check that there are no load fold barriers between them later
3740 // on.
3741 //
3742 // FIXME: Is there a better way to check for load fold barriers?
3743 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
3744 EarliestLoad = Load;
3745 if (!LatestLoad || dominates(*LatestLoad, *Load))
3746 LatestLoad = Load;
3747 }
3748
3749 // We found a load for each register. Let's check if each load satisfies the
3750 // pattern.
3751 assert(Loads.size() == RegsToVisit.size() &&
3752 "Expected to find a load for each register?");
3753 assert(EarliestLoad != LatestLoad && EarliestLoad &&
3754 LatestLoad && "Expected at least two loads?");
3755
3756 // Check if there are any stores, calls, etc. between any of the loads. If
3757 // there are, then we can't safely perform the combine.
3758 //
3759 // MaxIter is chosen based off the (worst case) number of iterations it
3760 // typically takes to succeed in the LLVM test suite plus some padding.
3761 //
3762 // FIXME: Is there a better way to check for load fold barriers?
3763 const unsigned MaxIter = 20;
3764 unsigned Iter = 0;
3765 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
3766 LatestLoad->getIterator())) {
3767 if (Loads.count(&MI))
3768 continue;
3769 if (MI.isLoadFoldBarrier())
3770 return std::nullopt;
3771 if (Iter++ == MaxIter)
3772 return std::nullopt;
3773 }
3774
3775 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
3776}
3777
3779 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3780 assert(MI.getOpcode() == TargetOpcode::G_OR);
3781 MachineFunction &MF = *MI.getMF();
3782 // Assuming a little-endian target, transform:
3783 // s8 *a = ...
3784 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3785 // =>
3786 // s32 val = *((i32)a)
3787 //
3788 // s8 *a = ...
3789 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3790 // =>
3791 // s32 val = BSWAP(*((s32)a))
3792 Register Dst = MI.getOperand(0).getReg();
3793 LLT Ty = MRI.getType(Dst);
3794 if (Ty.isVector())
3795 return false;
3796
3797 // We need to combine at least two loads into this type. Since the smallest
3798 // possible load is into a byte, we need at least a 16-bit wide type.
3799 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
3800 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
3801 return false;
3802
3803 // Match a collection of non-OR instructions in the pattern.
3804 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
3805 if (!RegsToVisit)
3806 return false;
3807
3808 // We have a collection of non-OR instructions. Figure out how wide each of
3809 // the small loads should be based off of the number of potential loads we
3810 // found.
3811 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
3812 if (NarrowMemSizeInBits % 8 != 0)
3813 return false;
3814
3815 // Check if each register feeding into each OR is a load from the same
3816 // base pointer + some arithmetic.
3817 //
3818 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3819 //
3820 // Also verify that each of these ends up putting a[i] into the same memory
3821 // offset as a load into a wide type would.
3823 GZExtLoad *LowestIdxLoad, *LatestLoad;
3824 int64_t LowestIdx;
3825 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
3826 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
3827 if (!MaybeLoadInfo)
3828 return false;
3829 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
3830
3831 // We have a bunch of loads being OR'd together. Using the addresses + offsets
3832 // we found before, check if this corresponds to a big or little endian byte
3833 // pattern. If it does, then we can represent it using a load + possibly a
3834 // BSWAP.
3835 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
3836 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
3837 if (!IsBigEndian)
3838 return false;
3839 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
3840 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
3841 return false;
3842
3843 // Make sure that the load from the lowest index produces offset 0 in the
3844 // final value.
3845 //
3846 // This ensures that we won't combine something like this:
3847 //
3848 // load x[i] -> byte 2
3849 // load x[i+1] -> byte 0 ---> wide_load x[i]
3850 // load x[i+2] -> byte 1
3851 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
3852 const unsigned ZeroByteOffset =
3853 *IsBigEndian
3854 ? bigEndianByteAt(NumLoadsInTy, 0)
3855 : littleEndianByteAt(NumLoadsInTy, 0);
3856 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
3857 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
3858 ZeroOffsetIdx->second != LowestIdx)
3859 return false;
3860
3861 // We wil reuse the pointer from the load which ends up at byte offset 0. It
3862 // may not use index 0.
3863 Register Ptr = LowestIdxLoad->getPointerReg();
3864 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
3865 LegalityQuery::MemDesc MMDesc(MMO);
3866 MMDesc.MemoryTy = Ty;
3868 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
3869 return false;
3870 auto PtrInfo = MMO.getPointerInfo();
3871 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
3872
3873 // Load must be allowed and fast on the target.
3875 auto &DL = MF.getDataLayout();
3876 unsigned Fast = 0;
3877 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
3878 !Fast)
3879 return false;
3880
3881 MatchInfo = [=](MachineIRBuilder &MIB) {
3882 MIB.setInstrAndDebugLoc(*LatestLoad);
3883 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
3884 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
3885 if (NeedsBSwap)
3886 MIB.buildBSwap(Dst, LoadDst);
3887 };
3888 return true;
3889}
3890
3892 MachineInstr *&ExtMI) {
3893 auto &PHI = cast<GPhi>(MI);
3894 Register DstReg = PHI.getReg(0);
3895
3896 // TODO: Extending a vector may be expensive, don't do this until heuristics
3897 // are better.
3898 if (MRI.getType(DstReg).isVector())
3899 return false;
3900
3901 // Try to match a phi, whose only use is an extend.
3902 if (!MRI.hasOneNonDBGUse(DstReg))
3903 return false;
3904 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
3905 switch (ExtMI->getOpcode()) {
3906 case TargetOpcode::G_ANYEXT:
3907 return true; // G_ANYEXT is usually free.
3908 case TargetOpcode::G_ZEXT:
3909 case TargetOpcode::G_SEXT:
3910 break;
3911 default:
3912 return false;
3913 }
3914
3915 // If the target is likely to fold this extend away, don't propagate.
3917 return false;
3918
3919 // We don't want to propagate the extends unless there's a good chance that
3920 // they'll be optimized in some way.
3921 // Collect the unique incoming values.
3923 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
3924 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
3925 switch (DefMI->getOpcode()) {
3926 case TargetOpcode::G_LOAD:
3927 case TargetOpcode::G_TRUNC:
3928 case TargetOpcode::G_SEXT:
3929 case TargetOpcode::G_ZEXT:
3930 case TargetOpcode::G_ANYEXT:
3931 case TargetOpcode::G_CONSTANT:
3932 InSrcs.insert(DefMI);
3933 // Don't try to propagate if there are too many places to create new
3934 // extends, chances are it'll increase code size.
3935 if (InSrcs.size() > 2)
3936 return false;
3937 break;
3938 default:
3939 return false;
3940 }
3941 }
3942 return true;
3943}
3944
3946 MachineInstr *&ExtMI) {
3947 auto &PHI = cast<GPhi>(MI);
3948 Register DstReg = ExtMI->getOperand(0).getReg();
3949 LLT ExtTy = MRI.getType(DstReg);
3950
3951 // Propagate the extension into the block of each incoming reg's block.
3952 // Use a SetVector here because PHIs can have duplicate edges, and we want
3953 // deterministic iteration order.
3956 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
3957 auto SrcReg = PHI.getIncomingValue(I);
3958 auto *SrcMI = MRI.getVRegDef(SrcReg);
3959 if (!SrcMIs.insert(SrcMI))
3960 continue;
3961
3962 // Build an extend after each src inst.
3963 auto *MBB = SrcMI->getParent();
3964 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
3965 if (InsertPt != MBB->end() && InsertPt->isPHI())
3966 InsertPt = MBB->getFirstNonPHI();
3967
3968 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
3969 Builder.setDebugLoc(MI.getDebugLoc());
3970 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
3971 OldToNewSrcMap[SrcMI] = NewExt;
3972 }
3973
3974 // Create a new phi with the extended inputs.
3976 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
3977 NewPhi.addDef(DstReg);
3978 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
3979 if (!MO.isReg()) {
3980 NewPhi.addMBB(MO.getMBB());
3981 continue;
3982 }
3983 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
3984 NewPhi.addUse(NewSrc->getOperand(0).getReg());
3985 }
3986 Builder.insertInstr(NewPhi);
3987 ExtMI->eraseFromParent();
3988}
3989
3991 Register &Reg) {
3992 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
3993 // If we have a constant index, look for a G_BUILD_VECTOR source
3994 // and find the source register that the index maps to.
3995 Register SrcVec = MI.getOperand(1).getReg();
3996 LLT SrcTy = MRI.getType(SrcVec);
3997
3998 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
3999 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4000 return false;
4001
4002 unsigned VecIdx = Cst->Value.getZExtValue();
4003
4004 // Check if we have a build_vector or build_vector_trunc with an optional
4005 // trunc in front.
4006 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4007 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4008 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4009 }
4010
4011 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4012 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4013 return false;
4014
4015 EVT Ty(getMVTForLLT(SrcTy));
4016 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4017 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4018 return false;
4019
4020 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4021 return true;
4022}
4023
4025 Register &Reg) {
4026 // Check the type of the register, since it may have come from a
4027 // G_BUILD_VECTOR_TRUNC.
4028 LLT ScalarTy = MRI.getType(Reg);
4029 Register DstReg = MI.getOperand(0).getReg();
4030 LLT DstTy = MRI.getType(DstReg);
4031
4033 if (ScalarTy != DstTy) {
4034 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4035 Builder.buildTrunc(DstReg, Reg);
4036 MI.eraseFromParent();
4037 return;
4038 }
4040}
4041
4044 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4045 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4046 // This combine tries to find build_vector's which have every source element
4047 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4048 // the masked load scalarization is run late in the pipeline. There's already
4049 // a combine for a similar pattern starting from the extract, but that
4050 // doesn't attempt to do it if there are multiple uses of the build_vector,
4051 // which in this case is true. Starting the combine from the build_vector
4052 // feels more natural than trying to find sibling nodes of extracts.
4053 // E.g.
4054 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4055 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4056 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4057 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4058 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4059 // ==>
4060 // replace ext{1,2,3,4} with %s{1,2,3,4}
4061
4062 Register DstReg = MI.getOperand(0).getReg();
4063 LLT DstTy = MRI.getType(DstReg);
4064 unsigned NumElts = DstTy.getNumElements();
4065
4066 SmallBitVector ExtractedElts(NumElts);
4067 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4068 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4069 return false;
4070 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4071 if (!Cst)
4072 return false;
4073 unsigned Idx = Cst->getZExtValue();
4074 if (Idx >= NumElts)
4075 return false; // Out of range.
4076 ExtractedElts.set(Idx);
4077 SrcDstPairs.emplace_back(
4078 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4079 }
4080 // Match if every element was extracted.
4081 return ExtractedElts.all();
4082}
4083
4086 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
4087 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4088 for (auto &Pair : SrcDstPairs) {
4089 auto *ExtMI = Pair.second;
4090 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4091 ExtMI->eraseFromParent();
4092 }
4093 MI.eraseFromParent();
4094}
4095
4097 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4099 MatchInfo(Builder);
4100 MI.eraseFromParent();
4101}
4102
4104 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4106 MatchInfo(Builder);
4107}
4108
4110 BuildFnTy &MatchInfo) {
4111 assert(MI.getOpcode() == TargetOpcode::G_OR);
4112
4113 Register Dst = MI.getOperand(0).getReg();
4114 LLT Ty = MRI.getType(Dst);
4115 unsigned BitWidth = Ty.getScalarSizeInBits();
4116
4117 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4118 unsigned FshOpc = 0;
4119
4120 // Match (or (shl ...), (lshr ...)).
4121 if (!mi_match(Dst, MRI,
4122 // m_GOr() handles the commuted version as well.
4123 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4124 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4125 return false;
4126
4127 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4128 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4129 int64_t CstShlAmt, CstLShrAmt;
4130 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4131 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4132 CstShlAmt + CstLShrAmt == BitWidth) {
4133 FshOpc = TargetOpcode::G_FSHR;
4134 Amt = LShrAmt;
4135
4136 } else if (mi_match(LShrAmt, MRI,
4138 ShlAmt == Amt) {
4139 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4140 FshOpc = TargetOpcode::G_FSHL;
4141
4142 } else if (mi_match(ShlAmt, MRI,
4144 LShrAmt == Amt) {
4145 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4146 FshOpc = TargetOpcode::G_FSHR;
4147
4148 } else {
4149 return false;
4150 }
4151
4152 LLT AmtTy = MRI.getType(Amt);
4153 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
4154 return false;
4155
4156 MatchInfo = [=](MachineIRBuilder &B) {
4157 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4158 };
4159 return true;
4160}
4161
4162/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4164 unsigned Opc = MI.getOpcode();
4165 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4166 Register X = MI.getOperand(1).getReg();
4167 Register Y = MI.getOperand(2).getReg();
4168 if (X != Y)
4169 return false;
4170 unsigned RotateOpc =
4171 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4172 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4173}
4174
4176 unsigned Opc = MI.getOpcode();
4177 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4178 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4180 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4181 : TargetOpcode::G_ROTR));
4182 MI.removeOperand(2);
4184}
4185
4186// Fold (rot x, c) -> (rot x, c % BitSize)
4188 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4189 MI.getOpcode() == TargetOpcode::G_ROTR);
4190 unsigned Bitsize =
4191 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4192 Register AmtReg = MI.getOperand(2).getReg();
4193 bool OutOfRange = false;
4194 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4195 if (auto *CI = dyn_cast<ConstantInt>(C))
4196 OutOfRange |= CI->getValue().uge(Bitsize);
4197 return true;
4198 };
4199 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4200}
4201
4203 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4204 MI.getOpcode() == TargetOpcode::G_ROTR);
4205 unsigned Bitsize =
4206 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4208 Register Amt = MI.getOperand(2).getReg();
4209 LLT AmtTy = MRI.getType(Amt);
4210 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4211 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4213 MI.getOperand(2).setReg(Amt);
4215}
4216
4218 int64_t &MatchInfo) {
4219 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4220 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4221 auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
4222 auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
4223 std::optional<bool> KnownVal;
4224 switch (Pred) {
4225 default:
4226 llvm_unreachable("Unexpected G_ICMP predicate?");
4227 case CmpInst::ICMP_EQ:
4228 KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
4229 break;
4230 case CmpInst::ICMP_NE:
4231 KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
4232 break;
4233 case CmpInst::ICMP_SGE:
4234 KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
4235 break;
4236 case CmpInst::ICMP_SGT:
4237 KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
4238 break;
4239 case CmpInst::ICMP_SLE:
4240 KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
4241 break;
4242 case CmpInst::ICMP_SLT:
4243 KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
4244 break;
4245 case CmpInst::ICMP_UGE:
4246 KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
4247 break;
4248 case CmpInst::ICMP_UGT:
4249 KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
4250 break;
4251 case CmpInst::ICMP_ULE:
4252 KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
4253 break;
4254 case CmpInst::ICMP_ULT:
4255 KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
4256 break;
4257 }
4258 if (!KnownVal)
4259 return false;
4260 MatchInfo =
4261 *KnownVal
4263 /*IsVector = */
4264 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4265 /* IsFP = */ false)
4266 : 0;
4267 return true;
4268}
4269
4271 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4272 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4273 // Given:
4274 //
4275 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4276 // %cmp = G_ICMP ne %x, 0
4277 //
4278 // Or:
4279 //
4280 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4281 // %cmp = G_ICMP eq %x, 1
4282 //
4283 // We can replace %cmp with %x assuming true is 1 on the target.
4284 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4285 if (!CmpInst::isEquality(Pred))
4286 return false;
4287 Register Dst = MI.getOperand(0).getReg();
4288 LLT DstTy = MRI.getType(Dst);
4290 /* IsFP = */ false) != 1)
4291 return false;
4292 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4293 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4294 return false;
4295 Register LHS = MI.getOperand(2).getReg();
4296 auto KnownLHS = KB->getKnownBits(LHS);
4297 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4298 return false;
4299 // Make sure replacing Dst with the LHS is a legal operation.
4300 LLT LHSTy = MRI.getType(LHS);
4301 unsigned LHSSize = LHSTy.getSizeInBits();
4302 unsigned DstSize = DstTy.getSizeInBits();
4303 unsigned Op = TargetOpcode::COPY;
4304 if (DstSize != LHSSize)
4305 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4306 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4307 return false;
4308 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4309 return true;
4310}
4311
4312// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4314 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4315 assert(MI.getOpcode() == TargetOpcode::G_AND);
4316
4317 // Ignore vector types to simplify matching the two constants.
4318 // TODO: do this for vectors and scalars via a demanded bits analysis.
4319 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4320 if (Ty.isVector())
4321 return false;
4322
4323 Register Src;
4324 Register AndMaskReg;
4325 int64_t AndMaskBits;
4326 int64_t OrMaskBits;
4327 if (!mi_match(MI, MRI,
4328 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4329 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4330 return false;
4331
4332 // Check if OrMask could turn on any bits in Src.
4333 if (AndMaskBits & OrMaskBits)
4334 return false;
4335
4336 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4338 // Canonicalize the result to have the constant on the RHS.
4339 if (MI.getOperand(1).getReg() == AndMaskReg)
4340 MI.getOperand(2).setReg(AndMaskReg);
4341 MI.getOperand(1).setReg(Src);
4343 };
4344 return true;
4345}
4346
4347/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4349 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4350 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4351 Register Dst = MI.getOperand(0).getReg();
4352 Register Src = MI.getOperand(1).getReg();
4353 LLT Ty = MRI.getType(Src);
4355 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4356 return false;
4357 int64_t Width = MI.getOperand(2).getImm();
4358 Register ShiftSrc;
4359 int64_t ShiftImm;
4360 if (!mi_match(
4361 Src, MRI,
4362 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4363 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4364 return false;
4365 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4366 return false;
4367
4368 MatchInfo = [=](MachineIRBuilder &B) {
4369 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4370 auto Cst2 = B.buildConstant(ExtractTy, Width);
4371 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4372 };
4373 return true;
4374}
4375
4376/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4378 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4379 assert(MI.getOpcode() == TargetOpcode::G_AND);
4380 Register Dst = MI.getOperand(0).getReg();
4381 LLT Ty = MRI.getType(Dst);
4383 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4384 return false;
4385
4386 int64_t AndImm, LSBImm;
4387 Register ShiftSrc;
4388 const unsigned Size = Ty.getScalarSizeInBits();
4389 if (!mi_match(MI.getOperand(0).getReg(), MRI,
4390 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4391 m_ICst(AndImm))))
4392 return false;
4393
4394 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4395 auto MaybeMask = static_cast<uint64_t>(AndImm);
4396 if (MaybeMask & (MaybeMask + 1))
4397 return false;
4398
4399 // LSB must fit within the register.
4400 if (static_cast<uint64_t>(LSBImm) >= Size)
4401 return false;
4402
4403 uint64_t Width = APInt(Size, AndImm).countr_one();
4404 MatchInfo = [=](MachineIRBuilder &B) {
4405 auto WidthCst = B.buildConstant(ExtractTy, Width);
4406 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4407 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4408 };
4409 return true;
4410}
4411
4413 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4414 const unsigned Opcode = MI.getOpcode();
4415 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4416
4417 const Register Dst = MI.getOperand(0).getReg();
4418
4419 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4420 ? TargetOpcode::G_SBFX
4421 : TargetOpcode::G_UBFX;
4422
4423 // Check if the type we would use for the extract is legal
4424 LLT Ty = MRI.getType(Dst);
4426 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4427 return false;
4428
4429 Register ShlSrc;
4430 int64_t ShrAmt;
4431 int64_t ShlAmt;
4432 const unsigned Size = Ty.getScalarSizeInBits();
4433
4434 // Try to match shr (shl x, c1), c2
4435 if (!mi_match(Dst, MRI,
4436 m_BinOp(Opcode,
4437 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4438 m_ICst(ShrAmt))))
4439 return false;
4440
4441 // Make sure that the shift sizes can fit a bitfield extract
4442 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4443 return false;
4444
4445 // Skip this combine if the G_SEXT_INREG combine could handle it
4446 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4447 return false;
4448
4449 // Calculate start position and width of the extract
4450 const int64_t Pos = ShrAmt - ShlAmt;
4451 const int64_t Width = Size - ShrAmt;
4452
4453 MatchInfo = [=](MachineIRBuilder &B) {
4454 auto WidthCst = B.buildConstant(ExtractTy, Width);
4455 auto PosCst = B.buildConstant(ExtractTy, Pos);
4456 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4457 };
4458 return true;
4459}
4460
4462 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4463 const unsigned Opcode = MI.getOpcode();
4464 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4465
4466 const Register Dst = MI.getOperand(0).getReg();
4467 LLT Ty = MRI.getType(Dst);
4469 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4470 return false;
4471
4472 // Try to match shr (and x, c1), c2
4473 Register AndSrc;
4474 int64_t ShrAmt;
4475 int64_t SMask;
4476 if (!mi_match(Dst, MRI,
4477 m_BinOp(Opcode,
4478 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4479 m_ICst(ShrAmt))))
4480 return false;
4481
4482 const unsigned Size = Ty.getScalarSizeInBits();
4483 if (ShrAmt < 0 || ShrAmt >= Size)
4484 return false;
4485
4486 // If the shift subsumes the mask, emit the 0 directly.
4487 if (0 == (SMask >> ShrAmt)) {
4488 MatchInfo = [=](MachineIRBuilder &B) {
4489 B.buildConstant(Dst, 0);
4490 };
4491 return true;
4492 }
4493
4494 // Check that ubfx can do the extraction, with no holes in the mask.
4495 uint64_t UMask = SMask;
4496 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4497 UMask &= maskTrailingOnes<uint64_t>(Size);
4498 if (!isMask_64(UMask))
4499 return false;
4500
4501 // Calculate start position and width of the extract.
4502 const int64_t Pos = ShrAmt;
4503 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4504
4505 // It's preferable to keep the shift, rather than form G_SBFX.
4506 // TODO: remove the G_AND via demanded bits analysis.
4507 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
4508 return false;
4509
4510 MatchInfo = [=](MachineIRBuilder &B) {
4511 auto WidthCst = B.buildConstant(ExtractTy, Width);
4512 auto PosCst = B.buildConstant(ExtractTy, Pos);
4513 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4514 };
4515 return true;
4516}
4517
4518bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4519 MachineInstr &MI) {
4520 auto &PtrAdd = cast<GPtrAdd>(MI);
4521
4522 Register Src1Reg = PtrAdd.getBaseReg();
4523 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
4524 if (!Src1Def)
4525 return false;
4526
4527 Register Src2Reg = PtrAdd.getOffsetReg();
4528
4529 if (MRI.hasOneNonDBGUse(Src1Reg))
4530 return false;
4531
4532 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
4533 if (!C1)
4534 return false;
4535 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4536 if (!C2)
4537 return false;
4538
4539 const APInt &C1APIntVal = *C1;
4540 const APInt &C2APIntVal = *C2;
4541 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4542
4543 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
4544 // This combine may end up running before ptrtoint/inttoptr combines
4545 // manage to eliminate redundant conversions, so try to look through them.
4546 MachineInstr *ConvUseMI = &UseMI;
4547 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4548 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4549 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4550 Register DefReg = ConvUseMI->getOperand(0).getReg();
4551 if (!MRI.hasOneNonDBGUse(DefReg))
4552 break;
4553 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4554 ConvUseOpc = ConvUseMI->getOpcode();
4555 }
4556 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
4557 if (!LdStMI)
4558 continue;
4559 // Is x[offset2] already not a legal addressing mode? If so then
4560 // reassociating the constants breaks nothing (we test offset2 because
4561 // that's the one we hope to fold into the load or store).
4563 AM.HasBaseReg = true;
4564 AM.BaseOffs = C2APIntVal.getSExtValue();
4565 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
4566 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
4567 PtrAdd.getMF()->getFunction().getContext());
4568 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4569 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4570 AccessTy, AS))
4571 continue;
4572
4573 // Would x[offset1+offset2] still be a legal addressing mode?
4574 AM.BaseOffs = CombinedValue;
4575 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4576 AccessTy, AS))
4577 return true;
4578 }
4579
4580 return false;
4581}
4582
4584 MachineInstr *RHS,
4585 BuildFnTy &MatchInfo) {
4586 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4587 Register Src1Reg = MI.getOperand(1).getReg();
4588 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4589 return false;
4590 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4591 if (!C2)
4592 return false;
4593
4594 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4595 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4596
4597 auto NewBase =
4598 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
4600 MI.getOperand(1).setReg(NewBase.getReg(0));
4601 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4603 };
4604 return !reassociationCanBreakAddressingModePattern(MI);
4605}
4606
4608 MachineInstr *LHS,
4609 MachineInstr *RHS,
4610 BuildFnTy &MatchInfo) {
4611 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
4612 // if and only if (G_PTR_ADD X, C) has one use.
4613 Register LHSBase;
4614 std::optional<ValueAndVReg> LHSCstOff;
4615 if (!mi_match(MI.getBaseReg(), MRI,
4616 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
4617 return false;
4618
4619 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
4620 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4621 // When we change LHSPtrAdd's offset register we might cause it to use a reg
4622 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
4623 // doesn't happen.
4624 LHSPtrAdd->moveBefore(&MI);
4625 Register RHSReg = MI.getOffsetReg();
4626 // set VReg will cause type mismatch if it comes from extend/trunc
4627 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
4629 MI.getOperand(2).setReg(NewCst.getReg(0));
4631 Observer.changingInstr(*LHSPtrAdd);
4632 LHSPtrAdd->getOperand(2).setReg(RHSReg);
4633 Observer.changedInstr(*LHSPtrAdd);
4634 };
4635 return !reassociationCanBreakAddressingModePattern(MI);
4636}
4637
4639 MachineInstr *LHS,
4640 MachineInstr *RHS,
4641 BuildFnTy &MatchInfo) {
4642 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4643 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
4644 if (!LHSPtrAdd)
4645 return false;
4646
4647 Register Src2Reg = MI.getOperand(2).getReg();
4648 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
4649 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
4650 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
4651 if (!C1)
4652 return false;
4653 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4654 if (!C2)
4655 return false;
4656
4657 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4658 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
4660 MI.getOperand(1).setReg(LHSSrc1);
4661 MI.getOperand(2).setReg(NewCst.getReg(0));
4663 };
4664 return !reassociationCanBreakAddressingModePattern(MI);
4665}
4666
4668 BuildFnTy &MatchInfo) {
4669 auto &PtrAdd = cast<GPtrAdd>(MI);
4670 // We're trying to match a few pointer computation patterns here for
4671 // re-association opportunities.
4672 // 1) Isolating a constant operand to be on the RHS, e.g.:
4673 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4674 //
4675 // 2) Folding two constants in each sub-tree as long as such folding
4676 // doesn't break a legal addressing mode.
4677 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4678 //
4679 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
4680 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
4681 // iif (G_PTR_ADD X, C) has one use.
4682 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
4683 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
4684
4685 // Try to match example 2.
4686 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
4687 return true;
4688
4689 // Try to match example 3.
4690 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
4691 return true;
4692
4693 // Try to match example 1.
4694 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
4695 return true;
4696
4697 return false;
4698}
4700 Register OpLHS, Register OpRHS,
4701 BuildFnTy &MatchInfo) {
4702 LLT OpRHSTy = MRI.getType(OpRHS);
4703 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
4704
4705 if (OpLHSDef->getOpcode() != Opc)
4706 return false;
4707
4708 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
4709 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
4710 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
4711
4712 // If the inner op is (X op C), pull the constant out so it can be folded with
4713 // other constants in the expression tree. Folding is not guaranteed so we
4714 // might have (C1 op C2). In that case do not pull a constant out because it
4715 // won't help and can lead to infinite loops.
4718 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
4719 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
4720 MatchInfo = [=](MachineIRBuilder &B) {
4721 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
4722 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
4723 };
4724 return true;
4725 }
4726 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
4727 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
4728 // iff (op x, c1) has one use
4729 MatchInfo = [=](MachineIRBuilder &B) {
4730 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
4731 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
4732 };
4733 return true;
4734 }
4735 }
4736
4737 return false;
4738}
4739
4741 BuildFnTy &MatchInfo) {
4742 // We don't check if the reassociation will break a legal addressing mode
4743 // here since pointer arithmetic is handled by G_PTR_ADD.
4744 unsigned Opc = MI.getOpcode();
4745 Register DstReg = MI.getOperand(0).getReg();
4746 Register LHSReg = MI.getOperand(1).getReg();
4747 Register RHSReg = MI.getOperand(2).getReg();
4748
4749 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
4750 return true;
4751 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
4752 return true;
4753 return false;
4754}
4755
4757 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4758 Register SrcOp = MI.getOperand(1).getReg();
4759
4760 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
4761 MatchInfo = *MaybeCst;
4762 return true;
4763 }
4764
4765 return false;
4766}
4767
4769 Register Op1 = MI.getOperand(1).getReg();
4770 Register Op2 = MI.getOperand(2).getReg();
4771 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
4772 if (!MaybeCst)
4773 return false;
4774 MatchInfo = *MaybeCst;
4775 return true;
4776}
4777
4779 Register Op1 = MI.getOperand(1).getReg();
4780 Register Op2 = MI.getOperand(2).getReg();
4781 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
4782 if (!MaybeCst)
4783 return false;
4784 MatchInfo =
4785 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
4786 return true;
4787}
4788
4790 ConstantFP *&MatchInfo) {
4791 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
4792 MI.getOpcode() == TargetOpcode::G_FMAD);
4793 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
4794
4795 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
4796 if (!Op3Cst)
4797 return false;
4798
4799 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
4800 if (!Op2Cst)
4801 return false;
4802
4803 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
4804 if (!Op1Cst)
4805 return false;
4806
4807 APFloat Op1F = Op1Cst->getValueAPF();
4808 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
4810 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
4811 return true;
4812}
4813
4815 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4816 // Look for a binop feeding into an AND with a mask:
4817 //
4818 // %add = G_ADD %lhs, %rhs
4819 // %and = G_AND %add, 000...11111111
4820 //
4821 // Check if it's possible to perform the binop at a narrower width and zext
4822 // back to the original width like so:
4823 //
4824 // %narrow_lhs = G_TRUNC %lhs
4825 // %narrow_rhs = G_TRUNC %rhs
4826 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
4827 // %new_add = G_ZEXT %narrow_add
4828 // %and = G_AND %new_add, 000...11111111
4829 //
4830 // This can allow later combines to eliminate the G_AND if it turns out
4831 // that the mask is irrelevant.
4832 assert(MI.getOpcode() == TargetOpcode::G_AND);
4833 Register Dst = MI.getOperand(0).getReg();
4834 Register AndLHS = MI.getOperand(1).getReg();
4835 Register AndRHS = MI.getOperand(2).getReg();
4836 LLT WideTy = MRI.getType(Dst);
4837
4838 // If the potential binop has more than one use, then it's possible that one
4839 // of those uses will need its full width.
4840 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
4841 return false;
4842
4843 // Check if the LHS feeding the AND is impacted by the high bits that we're
4844 // masking out.
4845 //
4846 // e.g. for 64-bit x, y:
4847 //
4848 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
4849 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
4850 if (!LHSInst)
4851 return false;
4852 unsigned LHSOpc = LHSInst->getOpcode();
4853 switch (LHSOpc) {
4854 default:
4855 return false;
4856 case TargetOpcode::G_ADD:
4857 case TargetOpcode::G_SUB:
4858 case TargetOpcode::G_MUL:
4859 case TargetOpcode::G_AND:
4860 case TargetOpcode::G_OR:
4861 case TargetOpcode::G_XOR:
4862 break;
4863 }
4864
4865 // Find the mask on the RHS.
4866 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
4867 if (!Cst)
4868 return false;
4869 auto Mask = Cst->Value;
4870 if (!Mask.isMask())
4871 return false;
4872
4873 // No point in combining if there's nothing to truncate.
4874 unsigned NarrowWidth = Mask.countr_one();
4875 if (NarrowWidth == WideTy.getSizeInBits())
4876 return false;
4877 LLT NarrowTy = LLT::scalar(NarrowWidth);
4878
4879 // Check if adding the zext + truncates could be harmful.
4880 auto &MF = *MI.getMF();
4881 const auto &TLI = getTargetLowering();
4882 LLVMContext &Ctx = MF.getFunction().getContext();
4883 auto &DL = MF.getDataLayout();
4884 if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
4885 !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
4886 return false;
4887 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
4888 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
4889 return false;
4890 Register BinOpLHS = LHSInst->getOperand(1).getReg();
4891 Register BinOpRHS = LHSInst->getOperand(2).getReg();
4892 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4893 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
4894 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
4895 auto NarrowBinOp =
4896 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
4897 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
4899 MI.getOperand(1).setReg(Ext.getReg(0));
4901 };
4902 return true;
4903}
4904
4906 unsigned Opc = MI.getOpcode();
4907 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
4908
4909 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
4910 return false;
4911
4912 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4914 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
4915 : TargetOpcode::G_SADDO;
4916 MI.setDesc(Builder.getTII().get(NewOpc));
4917 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
4919 };
4920 return true;
4921}
4922
4924 // (G_*MULO x, 0) -> 0 + no carry out
4925 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
4926 MI.getOpcode() == TargetOpcode::G_SMULO);
4927 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
4928 return false;
4929 Register Dst = MI.getOperand(0).getReg();
4930 Register Carry = MI.getOperand(1).getReg();
4933 return false;
4934 MatchInfo = [=](MachineIRBuilder &B) {
4935 B.buildConstant(Dst, 0);
4936 B.buildConstant(Carry, 0);
4937 };
4938 return true;
4939}
4940
4942 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
4943 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
4944 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
4945 MI.getOpcode() == TargetOpcode::G_SADDE ||
4946 MI.getOpcode() == TargetOpcode::G_USUBE ||
4947 MI.getOpcode() == TargetOpcode::G_SSUBE);
4948 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
4949 return false;
4950 MatchInfo = [&](MachineIRBuilder &B) {
4951 unsigned NewOpcode;
4952 switch (MI.getOpcode()) {
4953 case TargetOpcode::G_UADDE:
4954 NewOpcode = TargetOpcode::G_UADDO;
4955 break;
4956 case TargetOpcode::G_SADDE:
4957 NewOpcode = TargetOpcode::G_SADDO;
4958 break;
4959 case TargetOpcode::G_USUBE:
4960 NewOpcode = TargetOpcode::G_USUBO;
4961 break;
4962 case TargetOpcode::G_SSUBE:
4963 NewOpcode = TargetOpcode::G_SSUBO;
4964 break;
4965 }
4967 MI.setDesc(B.getTII().get(NewOpcode));
4968 MI.removeOperand(4);
4970 };
4971 return true;
4972}
4973
4975 BuildFnTy &MatchInfo) {
4976 assert(MI.getOpcode() == TargetOpcode::G_SUB);
4977 Register Dst = MI.getOperand(0).getReg();
4978 // (x + y) - z -> x (if y == z)
4979 // (x + y) - z -> y (if x == z)
4980 Register X, Y, Z;
4981 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
4982 Register ReplaceReg;
4983 int64_t CstX, CstY;
4984 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
4986 ReplaceReg = X;
4987 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
4989 ReplaceReg = Y;
4990 if (ReplaceReg) {
4991 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
4992 return true;
4993 }
4994 }
4995
4996 // x - (y + z) -> 0 - y (if x == z)
4997 // x - (y + z) -> 0 - z (if x == y)
4998 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
4999 Register ReplaceReg;
5000 int64_t CstX;
5001 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5003 ReplaceReg = Y;
5004 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5006 ReplaceReg = Z;
5007 if (ReplaceReg) {
5008 MatchInfo = [=](MachineIRBuilder &B) {
5009 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5010 B.buildSub(Dst, Zero, ReplaceReg);
5011 };
5012 return true;
5013 }
5014 }
5015 return false;
5016}
5017
5019 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5020 auto &UDiv = cast<GenericMachineInstr>(MI);
5021 Register Dst = UDiv.getReg(0);
5022 Register LHS = UDiv.getReg(1);
5023 Register RHS = UDiv.getReg(2);
5024 LLT Ty = MRI.getType(Dst);
5025 LLT ScalarTy = Ty.getScalarType();
5026 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5028 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5029 auto &MIB = Builder;
5031
5032 bool UseNPQ = false;
5033 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5034
5035 auto BuildUDIVPattern = [&](const Constant *C) {
5036 auto *CI = cast<ConstantInt>(C);
5037 const APInt &Divisor = CI->getValue();
5038
5039 bool SelNPQ = false;
5040 APInt Magic(Divisor.getBitWidth(), 0);
5041 unsigned PreShift = 0, PostShift = 0;
5042
5043 // Magic algorithm doesn't work for division by 1. We need to emit a select
5044 // at the end.
5045 // TODO: Use undef values for divisor of 1.
5046 if (!Divisor.isOne()) {
5049
5050 Magic = std::move(magics.Magic);
5051
5052 assert(magics.PreShift < Divisor.getBitWidth() &&
5053 "We shouldn't generate an undefined shift!");
5054 assert(magics.PostShift < Divisor.getBitWidth() &&
5055 "We shouldn't generate an undefined shift!");
5056 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5057 PreShift = magics.PreShift;
5058 PostShift = magics.PostShift;
5059 SelNPQ = magics.IsAdd;
5060 }
5061
5062 PreShifts.push_back(
5063 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5064 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5065 NPQFactors.push_back(
5066 MIB.buildConstant(ScalarTy,
5067 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5068 : APInt::getZero(EltBits))
5069 .getReg(0));
5070 PostShifts.push_back(
5071 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5072 UseNPQ |= SelNPQ;
5073 return true;
5074 };
5075
5076 // Collect the shifts/magic values from each element.
5077 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5078 (void)Matched;
5079 assert(Matched && "Expected unary predicate match to succeed");
5080
5081 Register PreShift, PostShift, MagicFactor, NPQFactor;
5082 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5083 if (RHSDef) {
5084 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5085 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5086 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5087 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5088 } else {
5090 "Non-build_vector operation should have been a scalar");
5091 PreShift = PreShifts[0];
5092 MagicFactor = MagicFactors[0];
5093 PostShift = PostShifts[0];
5094 }
5095
5096 Register Q = LHS;
5097 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5098
5099 // Multiply the numerator (operand 0) by the magic value.
5100 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5101
5102 if (UseNPQ) {
5103 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5104
5105 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5106 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5107 if (Ty.isVector())
5108 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5109 else
5110 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5111
5112 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5113 }
5114
5115 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5116 auto One = MIB.buildConstant(Ty, 1);
5117 auto IsOne = MIB.buildICmp(
5119 Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
5120 return MIB.buildSelect(Ty, IsOne, LHS, Q);
5121}
5122
5124 assert(MI.getOpcode() == TargetOpcode::G_UDIV);
5125 Register Dst = MI.getOperand(0).getReg();
5126 Register RHS = MI.getOperand(2).getReg();
5127 LLT DstTy = MRI.getType(Dst);
5128 auto *RHSDef = MRI.getVRegDef(RHS);
5129 if (!isConstantOrConstantVector(*RHSDef, MRI))
5130 return false;
5131
5132 auto &MF = *MI.getMF();
5133 AttributeList Attr = MF.getFunction().getAttributes();
5134 const auto &TLI = getTargetLowering();
5135 LLVMContext &Ctx = MF.getFunction().getContext();
5136 auto &DL = MF.getDataLayout();
5137 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5138 return false;
5139
5140 // Don't do this for minsize because the instruction sequence is usually
5141 // larger.
5142 if (MF.getFunction().hasMinSize())
5143 return false;
5144
5145 // Don't do this if the types are not going to be legal.
5146 if (LI) {
5147 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5148 return false;
5149 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5150 return false;
5152 {TargetOpcode::G_ICMP,
5153 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5154 DstTy}}))
5155 return false;
5156 }
5157
5158 auto CheckEltValue = [&](const Constant *C) {
5159 if (auto *CI = dyn_cast_or_null<ConstantInt>(C))
5160 return !CI->isZero();
5161 return false;
5162 };
5163 return matchUnaryPredicate(MRI, RHS, CheckEltValue);
5164}
5165
5167 auto *NewMI = buildUDivUsingMul(MI);
5168 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5169}
5170
5172 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5173 Register Dst = MI.getOperand(0).getReg();
5174 Register RHS = MI.getOperand(2).getReg();
5175 LLT DstTy = MRI.getType(Dst);
5176
5177 auto &MF = *MI.getMF();
5178 AttributeList Attr = MF.getFunction().getAttributes();
5179 const auto &TLI = getTargetLowering();
5180 LLVMContext &Ctx = MF.getFunction().getContext();
5181 auto &DL = MF.getDataLayout();
5182 if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
5183 return false;
5184
5185 // Don't do this for minsize because the instruction sequence is usually
5186 // larger.
5187 if (MF.getFunction().hasMinSize())
5188 return false;
5189
5190 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5191 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5192 return matchUnaryPredicate(
5193 MRI, RHS, [](const Constant *C) { return C && !C->isZeroValue(); });
5194 }
5195
5196 // Don't support the general case for now.
5197 return false;
5198}
5199
5201 auto *NewMI = buildSDivUsingMul(MI);
5202 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5203}
5204
5206 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5207 auto &SDiv = cast<GenericMachineInstr>(MI);
5208 Register Dst = SDiv.getReg(0);
5209 Register LHS = SDiv.getReg(1);
5210 Register RHS = SDiv.getReg(2);
5211 LLT Ty = MRI.getType(Dst);
5212 LLT ScalarTy = Ty.getScalarType();
5214 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5215 auto &MIB = Builder;
5217
5218 bool UseSRA = false;
5219 SmallVector<Register, 16> Shifts, Factors;
5220
5221 auto *RHSDef = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5222 bool IsSplat = getIConstantSplatVal(*RHSDef, MRI).has_value();
5223
5224 auto BuildSDIVPattern = [&](const Constant *C) {
5225 // Don't recompute inverses for each splat element.
5226 if (IsSplat && !Factors.empty()) {
5227 Shifts.push_back(Shifts[0]);
5228 Factors.push_back(Factors[0]);
5229 return true;
5230 }
5231
5232 auto *CI = cast<ConstantInt>(C);
5233 APInt Divisor = CI->getValue();
5234 unsigned Shift = Divisor.countr_zero();
5235 if (Shift) {
5236 Divisor.ashrInPlace(Shift);
5237 UseSRA = true;
5238 }
5239
5240 // Calculate the multiplicative inverse modulo BW.
5241 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5242 unsigned W = Divisor.getBitWidth();
5243 APInt Factor = Divisor.zext(W + 1)
5245 .trunc(W);
5246 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5247 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5248 return true;
5249 };
5250
5251 // Collect all magic values from the build vector.
5252 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5253 (void)Matched;
5254 assert(Matched && "Expected unary predicate match to succeed");
5255
5256 Register Shift, Factor;
5257 if (Ty.isVector()) {
5258 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5259 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5260 } else {
5261 Shift = Shifts[0];
5262 Factor = Factors[0];
5263 }
5264
5265 Register Res = LHS;
5266
5267 if (UseSRA)
5268 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5269
5270 return MIB.buildMul(Ty, Res, Factor);
5271}
5272
5274 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
5275 Register RHS = MI.getOperand(2).getReg();
5276 Register Dst = MI.getOperand(0).getReg();
5277 LLT Ty = MRI.getType(Dst);
5279 auto MatchPow2ExceptOne = [&](const Constant *C) {
5280 if (auto *CI = dyn_cast<ConstantInt>(C))
5281 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
5282 return false;
5283 };
5284 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
5285 return false;
5286 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
5287}
5288
5290 Register LHS = MI.getOperand(1).getReg();
5291 Register RHS = MI.getOperand(2).getReg();
5292 Register Dst = MI.getOperand(0).getReg();
5293 LLT Ty = MRI.getType(Dst);
5295 unsigned NumEltBits = Ty.getScalarSizeInBits();
5296
5298 auto LogBase2 = buildLogBase2(RHS, Builder);
5299 auto ShiftAmt =
5300 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
5301 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
5302 Builder.buildLShr(Dst, LHS, Trunc);
5303 MI.eraseFromParent();
5304}
5305
5307 BuildFnTy &MatchInfo) {
5308 unsigned Opc = MI.getOpcode();
5309 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
5310 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5311 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
5312
5313 Register Dst = MI.getOperand(0).getReg();
5314 Register X = MI.getOperand(1).getReg();
5315 Register Y = MI.getOperand(2).getReg();
5316 LLT Type = MRI.getType(Dst);
5317
5318 // fold (fadd x, fneg(y)) -> (fsub x, y)
5319 // fold (fadd fneg(y), x) -> (fsub x, y)
5320 // G_ADD is commutative so both cases are checked by m_GFAdd
5321 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5322 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
5323 Opc = TargetOpcode::G_FSUB;
5324 }
5325 /// fold (fsub x, fneg(y)) -> (fadd x, y)
5326 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
5327 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
5328 Opc = TargetOpcode::G_FADD;
5329 }
5330 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
5331 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
5332 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
5333 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
5334 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
5335 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
5336 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
5337 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
5338 // no opcode change
5339 } else
5340 return false;
5341
5342 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5344 MI.setDesc(B.getTII().get(Opc));
5345 MI.getOperand(1).setReg(X);
5346 MI.getOperand(2).setReg(Y);
5348 };
5349 return true;
5350}
5351
5353 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5354
5355 Register LHS = MI.getOperand(1).getReg();
5356 MatchInfo = MI.getOperand(2).getReg();
5357 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
5358
5359 const auto LHSCst = Ty.isVector()
5360 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
5362 if (!LHSCst)
5363 return false;
5364
5365 // -0.0 is always allowed
5366 if (LHSCst->Value.isNegZero())
5367 return true;
5368
5369 // +0.0 is only allowed if nsz is set.
5370 if (LHSCst->Value.isPosZero())
5371 return MI.getFlag(MachineInstr::FmNsz);
5372
5373 return false;
5374}
5375
5378 Register Dst = MI.getOperand(0).getReg();
5380 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
5381 eraseInst(MI);
5382}
5383
5384/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
5385/// due to global flags or MachineInstr flags.
5386static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
5387 if (MI.getOpcode() != TargetOpcode::G_FMUL)
5388 return false;
5389 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
5390}
5391
5392static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
5393 const MachineRegisterInfo &MRI) {
5394 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
5395 MRI.use_instr_nodbg_end()) >
5396 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
5397 MRI.use_instr_nodbg_end());
5398}
5399
5401 bool &AllowFusionGlobally,
5402 bool &HasFMAD, bool &Aggressive,
5403 bool CanReassociate) {
5404
5405 auto *MF = MI.getMF();
5406 const auto &TLI = *MF->getSubtarget().getTargetLowering();
5407 const TargetOptions &Options = MF->getTarget().Options;
5408 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5409
5410 if (CanReassociate &&
5411 !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
5412 return false;
5413
5414 // Floating-point multiply-add with intermediate rounding.
5415 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
5416 // Floating-point multiply-add without intermediate rounding.
5417 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
5418 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
5419 // No valid opcode, do not combine.
5420 if (!HasFMAD && !HasFMA)
5421 return false;
5422
5423 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
5424 Options.UnsafeFPMath || HasFMAD;
5425 // If the addition is not contractable, do not combine.
5426 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
5427 return false;
5428
5429 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
5430 return true;
5431}
5432
5434 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5435 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5436
5437 bool AllowFusionGlobally, HasFMAD, Aggressive;
5438 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5439 return false;
5440
5441 Register Op1 = MI.getOperand(1).getReg();
5442 Register Op2 = MI.getOperand(2).getReg();
5445 unsigned PreferredFusedOpcode =
5446 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5447
5448 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5449 // prefer to fold the multiply with fewer uses.
5450 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5451 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5452 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5453 std::swap(LHS, RHS);
5454 }
5455
5456 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
5457 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5458 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
5459 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5460 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5461 {LHS.MI->getOperand(1).getReg(),
5462 LHS.MI->getOperand(2).getReg(), RHS.Reg});
5463 };
5464 return true;
5465 }
5466
5467 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
5468 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5469 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
5470 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5471 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5472 {RHS.MI->getOperand(1).getReg(),
5473 RHS.MI->getOperand(2).getReg(), LHS.Reg});
5474 };
5475 return true;
5476 }
5477
5478 return false;
5479}
5480
5482 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5483 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5484
5485 bool AllowFusionGlobally, HasFMAD, Aggressive;
5486 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5487 return false;
5488
5489 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5490 Register Op1 = MI.getOperand(1).getReg();
5491 Register Op2 = MI.getOperand(2).getReg();
5494 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5495
5496 unsigned PreferredFusedOpcode =
5497 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5498
5499 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5500 // prefer to fold the multiply with fewer uses.
5501 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5502 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5503 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5504 std::swap(LHS, RHS);
5505 }
5506
5507 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
5508 MachineInstr *FpExtSrc;
5509 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5510 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5511 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5512 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5513 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5514 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5515 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5516 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5517 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
5518 };
5519 return true;
5520 }
5521
5522 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
5523 // Note: Commutes FADD operands.
5524 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
5525 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
5526 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5527 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
5528 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5529 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
5530 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
5531 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5532 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
5533 };
5534 return true;
5535 }
5536
5537 return false;
5538}
5539
5541 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5542 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5543
5544 bool AllowFusionGlobally, HasFMAD, Aggressive;
5545 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
5546 return false;
5547
5548 Register Op1 = MI.getOperand(1).getReg();
5549 Register Op2 = MI.getOperand(2).getReg();
5552 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5553
5554 unsigned PreferredFusedOpcode =
5555 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5556
5557 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5558 // prefer to fold the multiply with fewer uses.
5559 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5560 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5561 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5562 std::swap(LHS, RHS);
5563 }
5564
5565 MachineInstr *FMA = nullptr;
5566 Register Z;
5567 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
5568 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5569 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
5570 TargetOpcode::G_FMUL) &&
5571 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
5572 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
5573 FMA = LHS.MI;
5574 Z = RHS.Reg;
5575 }
5576 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
5577 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5578 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
5579 TargetOpcode::G_FMUL) &&
5580 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
5581 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
5582 Z = LHS.Reg;
5583 FMA = RHS.MI;
5584 }
5585
5586 if (FMA) {
5587 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
5588 Register X = FMA->getOperand(1).getReg();
5589 Register Y = FMA->getOperand(2).getReg();
5590 Register U = FMulMI->getOperand(1).getReg();
5591 Register V = FMulMI->getOperand(2).getReg();
5592
5593 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5594 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
5595 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
5596 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5597 {X, Y, InnerFMA});
5598 };
5599 return true;
5600 }
5601
5602 return false;
5603}
5604
5606 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5607 assert(MI.getOpcode() == TargetOpcode::G_FADD);
5608
5609 bool AllowFusionGlobally, HasFMAD, Aggressive;
5610 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5611 return false;
5612
5613 if (!Aggressive)
5614 return false;
5615
5616 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5617 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
5618 Register Op1 = MI.getOperand(1).getReg();
5619 Register Op2 = MI.getOperand(2).getReg();
5622
5623 unsigned PreferredFusedOpcode =
5624 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5625
5626 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5627 // prefer to fold the multiply with fewer uses.
5628 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5629 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
5630 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5631 std::swap(LHS, RHS);
5632 }
5633
5634 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
5635 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
5637 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
5638 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
5639 Register InnerFMA =
5640 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
5641 .getReg(0);
5642 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5643 {X, Y, InnerFMA});
5644 };
5645
5646 MachineInstr *FMulMI, *FMAMI;
5647 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
5648 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5649 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
5650 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
5651 m_GFPExt(m_MInstr(FMulMI))) &&
5652 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5653 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5654 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5655 MatchInfo = [=](MachineIRBuilder &B) {
5656 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5657 FMulMI->getOperand(2).getReg(), RHS.Reg,
5658 LHS.MI->getOperand(1).getReg(),
5659 LHS.MI->getOperand(2).getReg(), B);
5660 };
5661 return true;
5662 }
5663
5664 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
5665 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5666 // FIXME: This turns two single-precision and one double-precision
5667 // operation into two double-precision operations, which might not be
5668 // interesting for all targets, especially GPUs.
5669 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5670 FMAMI->getOpcode() == PreferredFusedOpcode) {
5671 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5672 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5673 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5674 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5675 MatchInfo = [=](MachineIRBuilder &B) {
5676 Register X = FMAMI->getOperand(1).getReg();
5677 Register Y = FMAMI->getOperand(2).getReg();
5678 X = B.buildFPExt(DstType, X).getReg(0);
5679 Y = B.buildFPExt(DstType, Y).getReg(0);
5680 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5681 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
5682 };
5683
5684 return true;
5685 }
5686 }
5687
5688 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
5689 // -> (fma x, y, (fma (fpext u), (fpext v), z))
5690 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
5691 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
5692 m_GFPExt(m_MInstr(FMulMI))) &&
5693 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5694 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5695 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5696 MatchInfo = [=](MachineIRBuilder &B) {
5697 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5698 FMulMI->getOperand(2).getReg(), LHS.Reg,
5699 RHS.MI->getOperand(1).getReg(),
5700 RHS.MI->getOperand(2).getReg(), B);
5701 };
5702 return true;
5703 }
5704
5705 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
5706 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
5707 // FIXME: This turns two single-precision and one double-precision
5708 // operation into two double-precision operations, which might not be
5709 // interesting for all targets, especially GPUs.
5710 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
5711 FMAMI->getOpcode() == PreferredFusedOpcode) {
5712 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
5713 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5714 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
5715 MRI.getType(FMAMI->getOperand(0).getReg()))) {
5716 MatchInfo = [=](MachineIRBuilder &B) {
5717 Register X = FMAMI->getOperand(1).getReg();
5718 Register Y = FMAMI->getOperand(2).getReg();
5719 X = B.buildFPExt(DstType, X).getReg(0);
5720 Y = B.buildFPExt(DstType, Y).getReg(0);
5721 buildMatchInfo(FMulMI->getOperand(1).getReg(),
5722 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
5723 };
5724 return true;
5725 }
5726 }
5727
5728 return false;
5729}
5730
5732 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5733 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5734
5735 bool AllowFusionGlobally, HasFMAD, Aggressive;
5736 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5737 return false;
5738
5739 Register Op1 = MI.getOperand(1).getReg();
5740 Register Op2 = MI.getOperand(2).getReg();
5743 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5744
5745 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
5746 // prefer to fold the multiply with fewer uses.
5747 int FirstMulHasFewerUses = true;
5748 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5749 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5750 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
5751 FirstMulHasFewerUses = false;
5752
5753 unsigned PreferredFusedOpcode =
5754 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5755
5756 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
5757 if (FirstMulHasFewerUses &&
5758 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
5759 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
5760 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5761 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
5762 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5763 {LHS.MI->getOperand(1).getReg(),
5764 LHS.MI->getOperand(2).getReg(), NegZ});
5765 };
5766 return true;
5767 }
5768 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
5769 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
5770 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
5771 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5772 Register NegY =
5773 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
5774 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5775 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
5776 };
5777 return true;
5778 }
5779
5780 return false;
5781}
5782
5784 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5785 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5786
5787 bool AllowFusionGlobally, HasFMAD, Aggressive;
5788 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5789 return false;
5790
5791 Register LHSReg = MI.getOperand(1).getReg();
5792 Register RHSReg = MI.getOperand(2).getReg();
5793 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5794
5795 unsigned PreferredFusedOpcode =
5796 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5797
5798 MachineInstr *FMulMI;
5799 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
5800 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
5801 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
5802 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
5803 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
5804 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5805 Register NegX =
5806 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5807 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
5808 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5809 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
5810 };
5811 return true;
5812 }
5813
5814 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
5815 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
5816 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
5817 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
5818 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
5819 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5820 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5821 {FMulMI->getOperand(1).getReg(),
5822 FMulMI->getOperand(2).getReg(), LHSReg});
5823 };
5824 return true;
5825 }
5826
5827 return false;
5828}
5829
5831 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5832 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5833
5834 bool AllowFusionGlobally, HasFMAD, Aggressive;
5835 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5836 return false;
5837
5838 Register LHSReg = MI.getOperand(1).getReg();
5839 Register RHSReg = MI.getOperand(2).getReg();
5840 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5841
5842 unsigned PreferredFusedOpcode =
5843 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5844
5845 MachineInstr *FMulMI;
5846 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
5847 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
5848 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5849 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
5850 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5851 Register FpExtX =
5852 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5853 Register FpExtY =
5854 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
5855 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
5856 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5857 {FpExtX, FpExtY, NegZ});
5858 };
5859 return true;
5860 }
5861
5862 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
5863 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
5864 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5865 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
5866 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5867 Register FpExtY =
5868 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
5869 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
5870 Register FpExtZ =
5871 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
5872 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
5873 {NegY, FpExtZ, LHSReg});
5874 };
5875 return true;
5876 }
5877
5878 return false;
5879}
5880
5882 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
5883 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
5884
5885 bool AllowFusionGlobally, HasFMAD, Aggressive;
5886 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
5887 return false;
5888
5889 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
5890 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5891 Register LHSReg = MI.getOperand(1).getReg();
5892 Register RHSReg = MI.getOperand(2).getReg();
5893
5894 unsigned PreferredFusedOpcode =
5895 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
5896
5897 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
5899 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
5900 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
5901 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
5902 };
5903
5904 MachineInstr *FMulMI;
5905 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
5906 // (fneg (fma (fpext x), (fpext y), z))
5907 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
5908 // (fneg (fma (fpext x), (fpext y), z))
5909 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
5910 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
5911 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5912 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
5913 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5914 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5916 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
5917 FMulMI->getOperand(2).getReg(), RHSReg, B);
5918 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
5919 };
5920 return true;
5921 }
5922
5923 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
5924 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
5925 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
5926 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
5927 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
5928 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
5929 MRI.getType(FMulMI->getOperand(0).getReg()))) {
5930 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5931 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
5932 FMulMI->getOperand(2).getReg(), LHSReg, B);
5933 };
5934 return true;
5935 }
5936
5937 return false;
5938}
5939
5941 unsigned &IdxToPropagate) {
5942 bool PropagateNaN;
5943 switch (MI.getOpcode()) {
5944 default:
5945 return false;
5946 case TargetOpcode::G_FMINNUM:
5947 case TargetOpcode::G_FMAXNUM:
5948 PropagateNaN = false;
5949 break;
5950 case TargetOpcode::G_FMINIMUM:
5951 case TargetOpcode::G_FMAXIMUM:
5952 PropagateNaN = true;
5953 break;
5954 }
5955
5956 auto MatchNaN = [&](unsigned Idx) {
5957 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
5958 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
5959 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
5960 return false;
5961 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
5962 return true;
5963 };
5964
5965 return MatchNaN(1) || MatchNaN(2);
5966}
5967
5969 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
5970 Register LHS = MI.getOperand(1).getReg();
5971 Register RHS = MI.getOperand(2).getReg();
5972
5973 // Helper lambda to check for opportunities for
5974 // A + (B - A) -> B
5975 // (B - A) + A -> B
5976 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
5977 Register Reg;
5978 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
5979 Reg == MaybeSameReg;
5980 };
5981 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
5982}
5983
5985 Register &MatchInfo) {
5986 // This combine folds the following patterns:
5987 //
5988 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
5989 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
5990 // into
5991 // x
5992 // if
5993 // k == sizeof(VecEltTy)/2
5994 // type(x) == type(dst)
5995 //
5996 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
5997 // into
5998 // x
5999 // if
6000 // type(x) == type(dst)
6001
6002 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
6003 LLT DstEltTy = DstVecTy.getElementType();
6004
6005 Register Lo, Hi;
6006
6007 if (mi_match(
6008 MI, MRI,
6010 MatchInfo = Lo;
6011 return MRI.getType(MatchInfo) == DstVecTy;
6012 }
6013
6014 std::optional<ValueAndVReg> ShiftAmount;
6015 const auto LoPattern = m_GBitcast(m_Reg(Lo));
6016 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
6017 if (mi_match(
6018 MI, MRI,
6019 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
6020 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
6021 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
6022 MatchInfo = Lo;
6023 return MRI.getType(MatchInfo) == DstVecTy;
6024 }
6025 }
6026
6027 return false;
6028}
6029
6031 Register &MatchInfo) {
6032 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
6033 // if type(x) == type(G_TRUNC)
6034 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6035 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
6036 return false;
6037
6038 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
6039}
6040
6042 Register &MatchInfo) {
6043 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
6044 // y if K == size of vector element type
6045 std::optional<ValueAndVReg> ShiftAmt;
6046 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6048 m_GCst(ShiftAmt))))
6049 return false;
6050
6051 LLT MatchTy = MRI.getType(MatchInfo);
6052 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
6053 MatchTy == MRI.getType(MI.getOperand(0).getReg());
6054}
6055
6056unsigned CombinerHelper::getFPMinMaxOpcForSelect(
6057 CmpInst::Predicate Pred, LLT DstTy,
6058 SelectPatternNaNBehaviour VsNaNRetVal) const {
6059 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
6060 "Expected a NaN behaviour?");
6061 // Choose an opcode based off of legality or the behaviour when one of the
6062 // LHS/RHS may be NaN.
6063 switch (Pred) {
6064 default:
6065 return 0;
6066 case CmpInst::FCMP_UGT:
6067 case CmpInst::FCMP_UGE:
6068 case CmpInst::FCMP_OGT:
6069 case CmpInst::FCMP_OGE:
6070 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6071 return TargetOpcode::G_FMAXNUM;
6072 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6073 return TargetOpcode::G_FMAXIMUM;
6074 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
6075 return TargetOpcode::G_FMAXNUM;
6076 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
6077 return TargetOpcode::G_FMAXIMUM;
6078 return 0;
6079 case CmpInst::FCMP_ULT:
6080 case CmpInst::FCMP_ULE:
6081 case CmpInst::FCMP_OLT:
6082 case CmpInst::FCMP_OLE:
6083 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6084 return TargetOpcode::G_FMINNUM;
6085 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6086 return TargetOpcode::G_FMINIMUM;
6087 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
6088 return TargetOpcode::G_FMINNUM;
6089 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
6090 return 0;
6091 return TargetOpcode::G_FMINIMUM;
6092 }
6093}
6094
6095CombinerHelper::SelectPatternNaNBehaviour
6096CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
6097 bool IsOrderedComparison) const {
6098 bool LHSSafe = isKnownNeverNaN(LHS, MRI);
6099 bool RHSSafe = isKnownNeverNaN(RHS, MRI);
6100 // Completely unsafe.
6101 if (!LHSSafe && !RHSSafe)
6102 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
6103 if (LHSSafe && RHSSafe)
6104 return SelectPatternNaNBehaviour::RETURNS_ANY;
6105 // An ordered comparison will return false when given a NaN, so it
6106 // returns the RHS.
6107 if (IsOrderedComparison)
6108 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
6109 : SelectPatternNaNBehaviour::RETURNS_OTHER;
6110 // An unordered comparison will return true when given a NaN, so it
6111 // returns the LHS.
6112 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
6113 : SelectPatternNaNBehaviour::RETURNS_NAN;
6114}
6115
6116bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
6117 Register TrueVal, Register FalseVal,
6118 BuildFnTy &MatchInfo) {
6119 // Match: select (fcmp cond x, y) x, y
6120 // select (fcmp cond x, y) y, x
6121 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
6122 LLT DstTy = MRI.getType(Dst);
6123 // Bail out early on pointers, since we'll never want to fold to a min/max.
6124 if (DstTy.isPointer())
6125 return false;
6126 // Match a floating point compare with a less-than/greater-than predicate.
6127 // TODO: Allow multiple users of the compare if they are all selects.
6128 CmpInst::Predicate Pred;
6129 Register CmpLHS, CmpRHS;
6130 if (!mi_match(Cond, MRI,
6132 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
6133 CmpInst::isEquality(Pred))
6134 return false;
6135 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
6136 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
6137 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
6138 return false;
6139 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
6140 std::swap(CmpLHS, CmpRHS);
6141 Pred = CmpInst::getSwappedPredicate(Pred);
6142 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
6143 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
6144 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
6145 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
6146 }
6147 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
6148 return false;
6149 // Decide what type of max/min this should be based off of the predicate.
6150 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
6151 if (!Opc || !isLegal({Opc, {DstTy}}))
6152 return false;
6153 // Comparisons between signed zero and zero may have different results...
6154 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
6155 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
6156 // We don't know if a comparison between two 0s will give us a consistent
6157 // result. Be conservative and only proceed if at least one side is
6158 // non-zero.
6159 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
6160 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
6161 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
6162 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
6163 return false;
6164 }
6165 }
6166 MatchInfo = [=](MachineIRBuilder &B) {
6167 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
6168 };
6169 return true;
6170}
6171
6173 BuildFnTy &MatchInfo) {
6174 // TODO: Handle integer cases.
6175 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
6176 // Condition may be fed by a truncated compare.
6177 Register Cond = MI.getOperand(1).getReg();
6178 Register MaybeTrunc;
6179 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
6180 Cond = MaybeTrunc;
6181 Register Dst = MI.getOperand(0).getReg();
6182 Register TrueVal = MI.getOperand(2).getReg();
6183 Register FalseVal = MI.getOperand(3).getReg();
6184 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
6185}
6186
6188 BuildFnTy &MatchInfo) {
6189 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
6190 // (X + Y) == X --> Y == 0
6191 // (X + Y) != X --> Y != 0
6192 // (X - Y) == X --> Y == 0
6193 // (X - Y) != X --> Y != 0
6194 // (X ^ Y) == X --> Y == 0
6195 // (X ^ Y) != X --> Y != 0
6196 Register Dst = MI.getOperand(0).getReg();
6197 CmpInst::Predicate Pred;
6198 Register X, Y, OpLHS, OpRHS;
6199 bool MatchedSub = mi_match(
6200 Dst, MRI,
6201 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
6202 if (MatchedSub && X != OpLHS)
6203 return false;
6204 if (!MatchedSub) {
6205 if (!mi_match(Dst, MRI,
6206 m_c_GICmp(m_Pred(Pred), m_Reg(X),
6207 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
6208 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
6209 return false;
6210 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
6211 }
6212 MatchInfo = [=](MachineIRBuilder &B) {
6213 auto Zero = B.buildConstant(MRI.getType(Y), 0);
6214 B.buildICmp(Pred, Dst, Y, Zero);
6215 };
6216 return CmpInst::isEquality(Pred) && Y.isValid();
6217}
6218
6220 Register ShiftReg = MI.getOperand(2).getReg();
6221 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
6222 auto IsShiftTooBig = [&](const Constant *C) {
6223 auto *CI = dyn_cast<ConstantInt>(C);
6224 return CI && CI->uge(ResTy.getScalarSizeInBits());
6225 };
6226 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
6227}
6228
6230 Register LHS = MI.getOperand(1).getReg();
6231 Register RHS = MI.getOperand(2).getReg();
6232 auto *LHSDef = MRI.getVRegDef(LHS);
6233 if (getIConstantVRegVal(LHS, MRI).has_value())
6234 return true;
6235
6236 // LHS may be a G_CONSTANT_FOLD_BARRIER. If so we commute
6237 // as long as we don't already have a constant on the RHS.
6238 if (LHSDef->getOpcode() != TargetOpcode::G_CONSTANT_FOLD_BARRIER)
6239 return false;
6240 return MRI.getVRegDef(RHS)->getOpcode() !=
6241 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
6243}
6244
6246 Register LHS = MI.getOperand(1).getReg();
6247 Register RHS = MI.getOperand(2).getReg();
6248 std::optional<FPValueAndVReg> ValAndVReg;
6249 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
6250 return false;
6251 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
6252}
6253
6256 Register LHSReg = MI.getOperand(1).getReg();
6257 Register RHSReg = MI.getOperand(2).getReg();
6258 MI.getOperand(1).setReg(RHSReg);
6259 MI.getOperand(2).setReg(LHSReg);
6261}
6262
6263bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) {
6264 LLT SrcTy = MRI.getType(Src);
6265 if (SrcTy.isFixedVector())
6266 return isConstantSplatVector(Src, 1, AllowUndefs);
6267 if (SrcTy.isScalar()) {
6268 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6269 return true;
6270 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6271 return IConstant && IConstant->Value == 1;
6272 }
6273 return false; // scalable vector
6274}
6275
6276bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) {
6277 LLT SrcTy = MRI.getType(Src);
6278 if (SrcTy.isFixedVector())
6279 return isConstantSplatVector(Src, 0, AllowUndefs);
6280 if (SrcTy.isScalar()) {
6281 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
6282 return true;
6283 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6284 return IConstant && IConstant->Value == 0;
6285 }
6286 return false; // scalable vector
6287}
6288
6289// Ignores COPYs during conformance checks.
6290// FIXME scalable vectors.
6291bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
6292 bool AllowUndefs) {
6293 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6294 if (!BuildVector)
6295 return false;
6296 unsigned NumSources = BuildVector->getNumSources();
6297
6298 for (unsigned I = 0; I < NumSources; ++I) {
6299 GImplicitDef *ImplicitDef =
6300 getOpcodeDef<GImplicitDef>(BuildVector->getSourceReg(I), MRI);
6301 if (ImplicitDef && AllowUndefs)
6302 continue;
6303 if (ImplicitDef && !AllowUndefs)
6304 return false;
6305 std::optional<ValueAndVReg> IConstant =
6307 if (IConstant && IConstant->Value == SplatValue)
6308 continue;
6309 return false;
6310 }
6311 return true;
6312}
6313
6314// Ignores COPYs during lookups.
6315// FIXME scalable vectors
6316std::optional<APInt>
6317CombinerHelper::getConstantOrConstantSplatVector(Register Src) {
6318 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6319 if (IConstant)
6320 return IConstant->Value;
6321
6322 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6323 if (!BuildVector)
6324 return std::nullopt;
6325 unsigned NumSources = BuildVector->getNumSources();
6326
6327 std::optional<APInt> Value = std::nullopt;
6328 for (unsigned I = 0; I < NumSources; ++I) {
6329 std::optional<ValueAndVReg> IConstant =
6331 if (!IConstant)
6332 return std::nullopt;
6333 if (!Value)
6334 Value = IConstant->Value;
6335 else if (*Value != IConstant->Value)
6336 return std::nullopt;
6337 }
6338 return Value;
6339}
6340
6341// FIXME G_SPLAT_VECTOR
6342bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
6343 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
6344 if (IConstant)
6345 return true;
6346
6347 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
6348 if (!BuildVector)
6349 return false;
6350
6351 unsigned NumSources = BuildVector->getNumSources();
6352 for (unsigned I = 0; I < NumSources; ++I) {
6353 std::optional<ValueAndVReg> IConstant =
6355 if (!IConstant)
6356 return false;
6357 }
6358 return true;
6359}
6360
6361// TODO: use knownbits to determine zeros
6362bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
6363 BuildFnTy &MatchInfo) {
6364 uint32_t Flags = Select->getFlags();
6365 Register Dest = Select->getReg(0);
6366 Register Cond = Select->getCondReg();
6367 Register True = Select->getTrueReg();
6368 Register False = Select->getFalseReg();
6369 LLT CondTy = MRI.getType(Select->getCondReg());
6370 LLT TrueTy = MRI.getType(Select->getTrueReg());
6371
6372 // We only do this combine for scalar boolean conditions.
6373 if (CondTy != LLT::scalar(1))
6374 return false;
6375
6376 if (TrueTy.isPointer())
6377 return false;
6378
6379 // Both are scalars.
6380 std::optional<ValueAndVReg> TrueOpt =
6382 std::optional<ValueAndVReg> FalseOpt =
6384
6385 if (!TrueOpt || !FalseOpt)
6386 return false;
6387
6388 APInt TrueValue = TrueOpt->Value;
6389 APInt FalseValue = FalseOpt->Value;
6390
6391 // select Cond, 1, 0 --> zext (Cond)
6392 if (TrueValue.isOne() && FalseValue.isZero()) {
6393 MatchInfo = [=](MachineIRBuilder &B) {
6394 B.setInstrAndDebugLoc(*Select);
6395 B.buildZExtOrTrunc(Dest, Cond);
6396 };
6397 return true;
6398 }
6399
6400 // select Cond, -1, 0 --> sext (Cond)
6401 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
6402 MatchInfo = [=](MachineIRBuilder &B) {
6403 B.setInstrAndDebugLoc(*Select);
6404 B.buildSExtOrTrunc(Dest, Cond);
6405 };
6406 return true;
6407 }
6408
6409 // select Cond, 0, 1 --> zext (!Cond)
6410 if (TrueValue.isZero() && FalseValue.isOne()) {
6411 MatchInfo = [=](MachineIRBuilder &B) {
6412 B.setInstrAndDebugLoc(*Select);
6414 B.buildNot(Inner, Cond);
6415 B.buildZExtOrTrunc(Dest, Inner);
6416 };
6417 return true;
6418 }
6419
6420 // select Cond, 0, -1 --> sext (!Cond)
6421 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
6422 MatchInfo = [=](MachineIRBuilder &B) {
6423 B.setInstrAndDebugLoc(*Select);
6425 B.buildNot(Inner, Cond);
6426 B.buildSExtOrTrunc(Dest, Inner);
6427 };
6428 return true;
6429 }
6430
6431 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
6432 if (TrueValue - 1 == FalseValue) {
6433 MatchInfo = [=](MachineIRBuilder &B) {
6434 B.setInstrAndDebugLoc(*Select);
6436 B.buildZExtOrTrunc(Inner, Cond);
6437 B.buildAdd(Dest, Inner, False);
6438 };
6439 return true;
6440 }
6441
6442 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
6443 if (TrueValue + 1 == FalseValue) {
6444 MatchInfo = [=](MachineIRBuilder &B) {
6445 B.setInstrAndDebugLoc(*Select);
6447 B.buildSExtOrTrunc(Inner, Cond);
6448 B.buildAdd(Dest, Inner, False);
6449 };
6450 return true;
6451 }
6452
6453 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
6454 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
6455 MatchInfo = [=](MachineIRBuilder &B) {
6456 B.setInstrAndDebugLoc(*Select);
6458 B.buildZExtOrTrunc(Inner, Cond);
6459 // The shift amount must be scalar.
6460 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
6461 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
6462 B.buildShl(Dest, Inner, ShAmtC, Flags);
6463 };
6464 return true;
6465 }
6466 // select Cond, -1, C --> or (sext Cond), C
6467 if (TrueValue.isAllOnes()) {
6468 MatchInfo = [=](MachineIRBuilder &B) {
6469 B.setInstrAndDebugLoc(*Select);
6471 B.buildSExtOrTrunc(Inner, Cond);
6472 B.buildOr(Dest, Inner, False, Flags);
6473 };
6474 return true;
6475 }
6476
6477 // select Cond, C, -1 --> or (sext (not Cond)), C
6478 if (FalseValue.isAllOnes()) {
6479 MatchInfo = [=](MachineIRBuilder &B) {
6480 B.setInstrAndDebugLoc(*Select);
6482 B.buildNot(Not, Cond);
6484 B.buildSExtOrTrunc(Inner, Not);
6485 B.buildOr(Dest, Inner, True, Flags);
6486 };
6487 return true;
6488 }
6489
6490 return false;
6491}
6492
6493// TODO: use knownbits to determine zeros
6494bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
6495 BuildFnTy &MatchInfo) {
6496 uint32_t Flags = Select->getFlags();
6497 Register DstReg = Select->getReg(0);
6498 Register Cond = Select->getCondReg();
6499 Register True = Select->getTrueReg();
6500 Register False = Select->getFalseReg();
6501 LLT CondTy = MRI.getType(Select->getCondReg());
6502 LLT TrueTy = MRI.getType(Select->getTrueReg());
6503
6504 // Boolean or fixed vector of booleans.
6505 if (CondTy.isScalableVector() ||
6506 (CondTy.isFixedVector() &&
6507 CondTy.getElementType().getScalarSizeInBits() != 1) ||
6508 CondTy.getScalarSizeInBits() != 1)
6509 return false;
6510
6511 if (CondTy != TrueTy)
6512 return false;
6513
6514 // select Cond, Cond, F --> or Cond, F
6515 // select Cond, 1, F --> or Cond, F
6516 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
6517 MatchInfo = [=](MachineIRBuilder &B) {
6518 B.setInstrAndDebugLoc(*Select);
6520 B.buildZExtOrTrunc(Ext, Cond);
6521 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6522 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
6523 };
6524 return true;
6525 }
6526
6527 // select Cond, T, Cond --> and Cond, T
6528 // select Cond, T, 0 --> and Cond, T
6529 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
6530 MatchInfo = [=](MachineIRBuilder &B) {
6531 B.setInstrAndDebugLoc(*Select);
6533 B.buildZExtOrTrunc(Ext, Cond);
6534 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6535 B.buildAnd(DstReg, Ext, FreezeTrue);
6536 };
6537 return true;
6538 }
6539
6540 // select Cond, T, 1 --> or (not Cond), T
6541 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
6542 MatchInfo = [=](MachineIRBuilder &B) {
6543 B.setInstrAndDebugLoc(*Select);
6544 // First the not.
6546 B.buildNot(Inner, Cond);
6547 // Then an ext to match the destination register.
6549 B.buildZExtOrTrunc(Ext, Inner);
6550 auto FreezeTrue = B.buildFreeze(TrueTy, True);
6551 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
6552 };
6553 return true;
6554 }
6555
6556 // select Cond, 0, F --> and (not Cond), F
6557 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
6558 MatchInfo = [=](MachineIRBuilder &B) {
6559 B.setInstrAndDebugLoc(*Select);
6560 // First the not.
6562 B.buildNot(Inner, Cond);
6563 // Then an ext to match the destination register.
6565 B.buildZExtOrTrunc(Ext, Inner);
6566 auto FreezeFalse = B.buildFreeze(TrueTy, False);
6567 B.buildAnd(DstReg, Ext, FreezeFalse);
6568 };
6569 return true;
6570 }
6571
6572 return false;
6573}
6574
6575bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select,
6576 BuildFnTy &MatchInfo) {
6577 Register DstReg = Select->getReg(0);
6578 Register Cond = Select->getCondReg();
6579 Register True = Select->getTrueReg();
6580 Register False = Select->getFalseReg();
6581 LLT DstTy = MRI.getType(DstReg);
6582
6583 if (DstTy.isPointer())
6584 return false;
6585
6586 // We need an G_ICMP on the condition register.
6587 GICmp *Cmp = getOpcodeDef<GICmp>(Cond, MRI);
6588 if (!Cmp)
6589 return false;
6590
6591 // We want to fold the icmp and replace the select.
6592 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
6593 return false;
6594
6595 CmpInst::Predicate Pred = Cmp->getCond();
6596 // We need a larger or smaller predicate for
6597 // canonicalization.
6598 if (CmpInst::isEquality(Pred))
6599 return false;
6600
6601 Register CmpLHS = Cmp->getLHSReg();
6602 Register CmpRHS = Cmp->getRHSReg();
6603
6604 // We can swap CmpLHS and CmpRHS for higher hitrate.
6605 if (True == CmpRHS && False == CmpLHS) {
6606 std::swap(CmpLHS, CmpRHS);
6607 Pred = CmpInst::getSwappedPredicate(Pred);
6608 }
6609
6610 // (icmp X, Y) ? X : Y -> integer minmax.
6611 // see matchSelectPattern in ValueTracking.
6612 // Legality between G_SELECT and integer minmax can differ.
6613 if (True == CmpLHS && False == CmpRHS) {
6614 switch (Pred) {
6615 case ICmpInst::ICMP_UGT:
6616 case ICmpInst::ICMP_UGE: {
6617 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
6618 return false;
6619 MatchInfo = [=](MachineIRBuilder &B) {
6620 B.buildUMax(DstReg, True, False);
6621 };
6622 return true;
6623 }
6624 case ICmpInst::ICMP_SGT:
6625 case ICmpInst::ICMP_SGE: {
6626 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
6627 return false;
6628 MatchInfo = [=](MachineIRBuilder &B) {
6629 B.buildSMax(DstReg, True, False);
6630 };
6631 return true;
6632 }
6633 case ICmpInst::ICMP_ULT:
6634 case ICmpInst::ICMP_ULE: {
6635 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
6636 return false;
6637 MatchInfo = [=](MachineIRBuilder &B) {
6638 B.buildUMin(DstReg, True, False);
6639 };
6640 return true;
6641 }
6642 case ICmpInst::ICMP_SLT:
6643 case ICmpInst::ICMP_SLE: {
6644 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
6645 return false;
6646 MatchInfo = [=](MachineIRBuilder &B) {
6647 B.buildSMin(DstReg, True, False);
6648 };
6649 return true;
6650 }
6651 default:
6652 return false;
6653 }
6654 }
6655
6656 return false;
6657}
6658
6660 GSelect *Select = cast<GSelect>(&MI);
6661
6662 if (tryFoldSelectOfConstants(Select, MatchInfo))
6663 return true;
6664
6665 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
6666 return true;
6667
6668 if (tryFoldSelectToIntMinMax(Select, MatchInfo))
6669 return true;
6670
6671 return false;
6672}
6673
6674/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
6675/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
6676/// into a single comparison using range-based reasoning.
6677/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
6678bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(GLogicalBinOp *Logic,
6679 BuildFnTy &MatchInfo) {
6680 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
6681 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
6682 Register DstReg = Logic->getReg(0);
6683 Register LHS = Logic->getLHSReg();
6684 Register RHS = Logic->getRHSReg();
6685 unsigned Flags = Logic->getFlags();
6686
6687 // We need an G_ICMP on the LHS register.
6688 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
6689 if (!Cmp1)
6690 return false;
6691
6692 // We need an G_ICMP on the RHS register.
6693 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
6694 if (!Cmp2)
6695 return false;
6696
6697 // We want to fold the icmps.
6698 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
6699 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
6700 return false;
6701
6702 APInt C1;
6703 APInt C2;
6704 std::optional<ValueAndVReg> MaybeC1 =
6706 if (!MaybeC1)
6707 return false;
6708 C1 = MaybeC1->Value;
6709
6710 std::optional<ValueAndVReg> MaybeC2 =
6712 if (!MaybeC2)
6713 return false;
6714 C2 = MaybeC2->Value;
6715
6716 Register R1 = Cmp1->getLHSReg();
6717 Register R2 = Cmp2->getLHSReg();
6718 CmpInst::Predicate Pred1 = Cmp1->getCond();
6719 CmpInst::Predicate Pred2 = Cmp2->getCond();
6720 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
6721 LLT CmpOperandTy = MRI.getType(R1);
6722
6723 if (CmpOperandTy.isPointer())
6724 return false;
6725
6726 // We build ands, adds, and constants of type CmpOperandTy.
6727 // They must be legal to build.
6728 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
6729 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
6730 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
6731 return false;
6732
6733 // Look through add of a constant offset on R1, R2, or both operands. This
6734 // allows us to interpret the R + C' < C'' range idiom into a proper range.
6735 std::optional<APInt> Offset1;
6736 std::optional<APInt> Offset2;
6737 if (R1 != R2) {
6738 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
6739 std::optional<ValueAndVReg> MaybeOffset1 =
6741 if (MaybeOffset1) {
6742 R1 = Add->getLHSReg();
6743 Offset1 = MaybeOffset1->Value;
6744 }
6745 }
6746 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
6747 std::optional<ValueAndVReg> MaybeOffset2 =
6749 if (MaybeOffset2) {
6750 R2 = Add->getLHSReg();
6751 Offset2 = MaybeOffset2->Value;
6752 }
6753 }
6754 }
6755
6756 if (R1 != R2)
6757 return false;
6758
6759 // We calculate the icmp ranges including maybe offsets.
6761 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
6762 if (Offset1)
6763 CR1 = CR1.subtract(*Offset1);
6764
6766 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
6767 if (Offset2)
6768 CR2 = CR2.subtract(*Offset2);
6769
6770 bool CreateMask = false;
6771 APInt LowerDiff;
6772 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
6773 if (!CR) {
6774 // We need non-wrapping ranges.
6775 if (CR1.isWrappedSet() || CR2.isWrappedSet())
6776 return false;
6777
6778 // Check whether we have equal-size ranges that only differ by one bit.
6779 // In that case we can apply a mask to map one range onto the other.
6780 LowerDiff = CR1.getLower() ^ CR2.getLower();
6781 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
6782 APInt CR1Size = CR1.getUpper() - CR1.getLower();
6783 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
6784 CR1Size != CR2.getUpper() - CR2.getLower())
6785 return false;
6786
6787 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
6788 CreateMask = true;
6789 }
6790
6791 if (IsAnd)
6792 CR = CR->inverse();
6793
6794 CmpInst::Predicate NewPred;
6795 APInt NewC, Offset;
6796 CR->getEquivalentICmp(NewPred, NewC, Offset);
6797
6798 // We take the result type of one of the original icmps, CmpTy, for
6799 // the to be build icmp. The operand type, CmpOperandTy, is used for
6800 // the other instructions and constants to be build. The types of
6801 // the parameters and output are the same for add and and. CmpTy
6802 // and the type of DstReg might differ. That is why we zext or trunc
6803 // the icmp into the destination register.
6804
6805 MatchInfo = [=](MachineIRBuilder &B) {
6806 if (CreateMask && Offset != 0) {
6807 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
6808 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
6809 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
6810 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
6811 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6812 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
6813 B.buildZExtOrTrunc(DstReg, ICmp);
6814 } else if (CreateMask && Offset == 0) {
6815 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
6816 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
6817 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6818 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
6819 B.buildZExtOrTrunc(DstReg, ICmp);
6820 } else if (!CreateMask && Offset != 0) {
6821 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
6822 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
6823 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6824 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
6825 B.buildZExtOrTrunc(DstReg, ICmp);
6826 } else if (!CreateMask && Offset == 0) {
6827 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
6828 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
6829 B.buildZExtOrTrunc(DstReg, ICmp);
6830 } else {
6831 llvm_unreachable("unexpected configuration of CreateMask and Offset");
6832 }
6833 };
6834 return true;
6835}
6836
6837bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
6838 BuildFnTy &MatchInfo) {
6839 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
6840 Register DestReg = Logic->getReg(0);
6841 Register LHS = Logic->getLHSReg();
6842 Register RHS = Logic->getRHSReg();
6843 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
6844
6845 // We need a compare on the LHS register.
6846 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
6847 if (!Cmp1)
6848 return false;
6849
6850 // We need a compare on the RHS register.
6851 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
6852 if (!Cmp2)
6853 return false;
6854
6855 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
6856 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
6857
6858 // We build one fcmp, want to fold the fcmps, replace the logic op,
6859 // and the fcmps must have the same shape.
6861 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
6862 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
6863 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
6864 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
6865 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
6866 return false;
6867
6868 CmpInst::Predicate PredL = Cmp1->getCond();
6869 CmpInst::Predicate PredR = Cmp2->getCond();
6870 Register LHS0 = Cmp1->getLHSReg();
6871 Register LHS1 = Cmp1->getRHSReg();
6872 Register RHS0 = Cmp2->getLHSReg();
6873 Register RHS1 = Cmp2->getRHSReg();
6874
6875 if (LHS0 == RHS1 && LHS1 == RHS0) {
6876 // Swap RHS operands to match LHS.
6877 PredR = CmpInst::getSwappedPredicate(PredR);
6878 std::swap(RHS0, RHS1);
6879 }
6880
6881 if (LHS0 == RHS0 && LHS1 == RHS1) {
6882 // We determine the new predicate.
6883 unsigned CmpCodeL = getFCmpCode(PredL);
6884 unsigned CmpCodeR = getFCmpCode(PredR);
6885 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
6886 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
6887 MatchInfo = [=](MachineIRBuilder &B) {
6888 // The fcmp predicates fill the lower part of the enum.
6889 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
6890 if (Pred == FCmpInst::FCMP_FALSE &&
6892 auto False = B.buildConstant(CmpTy, 0);
6893 B.buildZExtOrTrunc(DestReg, False);
6894 } else if (Pred == FCmpInst::FCMP_TRUE &&
6896 auto True =
6897 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
6898 CmpTy.isVector() /*isVector*/,
6899 true /*isFP*/));
6900 B.buildZExtOrTrunc(DestReg, True);
6901 } else { // We take the predicate without predicate optimizations.
6902 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
6903 B.buildZExtOrTrunc(DestReg, Cmp);
6904 }
6905 };
6906 return true;
6907 }
6908
6909 return false;
6910}
6911
6913 GAnd *And = cast<GAnd>(&MI);
6914
6915 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
6916 return true;
6917
6918 if (tryFoldLogicOfFCmps(And, MatchInfo))
6919 return true;
6920
6921 return false;
6922}
6923
6925 GOr *Or = cast<GOr>(&MI);
6926
6927 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
6928 return true;
6929
6930 if (tryFoldLogicOfFCmps(Or, MatchInfo))
6931 return true;
6932
6933 return false;
6934}
6935
6937 GAddCarryOut *Add = cast<GAddCarryOut>(&MI);
6938
6939 // Addo has no flags
6940 Register Dst = Add->getReg(0);
6941 Register Carry = Add->getReg(1);
6942 Register LHS = Add->getLHSReg();
6943 Register RHS = Add->getRHSReg();
6944 bool IsSigned = Add->isSigned();
6945 LLT DstTy = MRI.getType(Dst);
6946 LLT CarryTy = MRI.getType(Carry);
6947
6948 // We want do fold the [u|s]addo.
6949 if (!MRI.hasOneNonDBGUse(Dst))
6950 return false;
6951
6952 // Fold addo, if the carry is dead -> add, undef.
6953 if (MRI.use_nodbg_empty(Carry) &&
6954 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
6955 MatchInfo = [=](MachineIRBuilder &B) {
6956 B.buildAdd(Dst, LHS, RHS);
6957 B.buildUndef(Carry);
6958 };
6959 return true;
6960 }
6961
6962 // We want do fold the [u|s]addo.
6963 if (!MRI.hasOneNonDBGUse(Carry))
6964 return false;
6965
6966 // Canonicalize constant to RHS.
6967 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
6968 if (IsSigned) {
6969 MatchInfo = [=](MachineIRBuilder &B) {
6970 B.buildSAddo(Dst, Carry, RHS, LHS);
6971 };
6972 return true;
6973 }
6974 // !IsSigned
6975 MatchInfo = [=](MachineIRBuilder &B) {
6976 B.buildUAddo(Dst, Carry, RHS, LHS);
6977 };
6978 return true;
6979 }
6980
6981 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
6982 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
6983
6984 // Fold addo(c1, c2) -> c3, carry.
6985 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
6987 bool Overflow;
6988 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
6989 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
6990 MatchInfo = [=](MachineIRBuilder &B) {
6991 B.buildConstant(Dst, Result);
6992 B.buildConstant(Carry, Overflow);
6993 };
6994 return true;
6995 }
6996
6997 // Fold (addo x, 0) -> x, no borrow
6998 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
6999 MatchInfo = [=](MachineIRBuilder &B) {
7000 B.buildCopy(Dst, LHS);
7001 B.buildConstant(Carry, 0);
7002 };
7003 return true;
7004 }
7005
7006 // Given 2 constant operands whose sum does not overflow:
7007 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
7008 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
7009 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
7010 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
7011 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
7012 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
7013 std::optional<APInt> MaybeAddRHS =
7014 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
7015 if (MaybeAddRHS) {
7016 bool Overflow;
7017 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
7018 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
7019 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
7020 if (IsSigned) {
7021 MatchInfo = [=](MachineIRBuilder &B) {
7022 auto ConstRHS = B.buildConstant(DstTy, NewC);
7023 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7024 };
7025 return true;
7026 }
7027 // !IsSigned
7028 MatchInfo = [=](MachineIRBuilder &B) {
7029 auto ConstRHS = B.buildConstant(DstTy, NewC);
7030 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
7031 };
7032 return true;
7033 }
7034 }
7035 };
7036
7037 // We try to combine addo to non-overflowing add.
7038 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
7040 return false;
7041
7042 // We try to combine uaddo to non-overflowing add.
7043 if (!IsSigned) {
7044 ConstantRange CRLHS =
7045 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/false);
7046 ConstantRange CRRHS =
7047 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/false);
7048
7049 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
7051 return false;
7053 MatchInfo = [=](MachineIRBuilder &B) {
7054 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
7055 B.buildConstant(Carry, 0);
7056 };
7057 return true;
7058 }
7061 MatchInfo = [=](MachineIRBuilder &B) {
7062 B.buildAdd(Dst, LHS, RHS);
7063 B.buildConstant(Carry, 1);
7064 };
7065 return true;
7066 }
7067 }
7068 return false;
7069 }
7070
7071 // We try to combine saddo to non-overflowing add.
7072
7073 // If LHS and RHS each have at least two sign bits, then there is no signed
7074 // overflow.
7075 if (KB->computeNumSignBits(RHS) > 1 && KB->computeNumSignBits(LHS) > 1) {
7076 MatchInfo = [=](MachineIRBuilder &B) {
7077 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7078 B.buildConstant(Carry, 0);
7079 };
7080 return true;
7081 }
7082
7083 ConstantRange CRLHS =
7084 ConstantRange::fromKnownBits(KB->getKnownBits(LHS), /*IsSigned=*/true);
7085 ConstantRange CRRHS =
7086 ConstantRange::fromKnownBits(KB->getKnownBits(RHS), /*IsSigned=*/true);
7087
7088 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
7090 return false;
7092 MatchInfo = [=](MachineIRBuilder &B) {
7093 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
7094 B.buildConstant(Carry, 0);
7095 };
7096 return true;
7097 }
7100 MatchInfo = [=](MachineIRBuilder &B) {
7101 B.buildAdd(Dst, LHS, RHS);
7102 B.buildConstant(Carry, 1);
7103 };
7104 return true;
7105 }
7106 }
7107
7108 return false;
7109}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const LLT S1
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static Type * getTypeForLLT(LLT Ty, LLVMContext &C)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
const fltSemantics & getSemantics() const
Definition: APFloat.h:1303
bool isNaN() const
Definition: APFloat.h:1293
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition: APFloat.h:1096
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
APInt multiplicativeInverse(const APInt &modulo) const
Computes the multiplicative inverse of this APInt for a given modulo.
Definition: APInt.cpp:1250
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1672
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
int32_t exactLogBase2() const
Definition: APInt.h:1725
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:812
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1010
unsigned countl_one() const
Count the number of leading one bits.
Definition: APInt.h:1565
bool isMask(unsigned numBits) const
Definition: APInt.h:466
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
bool isOne() const
Determine if this is a value of 1.
Definition: APInt.h:367
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1606
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
AttributeSet getAttributes(unsigned Index) const
The attributes for the specified index are returned.
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition: InstrTypes.h:1216
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:965
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition: InstrTypes.h:982
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:994
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:995
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:971
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition: InstrTypes.h:980
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:969
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:970
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:989
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:988
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:992
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:979
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:990
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:977
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:972
@ ICMP_EQ
equal
Definition: InstrTypes.h:986
@ ICMP_NE
not equal
Definition: InstrTypes.h:987
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:993
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:991
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:978
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition: InstrTypes.h:967
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:1128
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:1090
static bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyUDivByConst(MachineInstr &MI)
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops)
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
bool matchPtrAddZero(MachineInstr &MI)
}
bool matchAllExplicitUsesAreUndef(MachineInstr &MI)
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx)
Delete MI and replace all of its uses with its OpIdx-th operand.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool matchUDivByConst(MachineInstr &MI)
Combine G_UDIV by constant into a multiply by magic constant.
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI)
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchShiftsTooBig(MachineInstr &MI)
Match shifts greater or equal to the bitwidth of the operation.
bool tryCombineCopy(MachineInstr &MI)
If MI is COPY, try to combine it.
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
bool matchUndefStore(MachineInstr &MI)
Return true if a G_STORE instruction MI is storing an undef value.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchRedundantSExtInReg(MachineInstr &MI)
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo)
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo)
Do constant FP folding when opportunities are exposed after MIR building.
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI)
void applyCommuteBinOpOperands(MachineInstr &MI)
bool matchBinOpSameVal(MachineInstr &MI)
Optimize (x op x) -> x.
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineCopy(MachineInstr &MI)
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx)
Return true if a G_SELECT instruction MI has a constant comparison.
void eraseInst(MachineInstr &MI)
Erase MI.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchAddSubSameReg(MachineInstr &MI, Register &Src)
Transform G_ADD(x, G_SUB(y, x)) to y.
void applyRotateOutOfRange(MachineInstr &MI)
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchRotateOutOfRange(MachineInstr &MI)
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
void applyCombineShuffleVector(MachineInstr &MI, const ArrayRef< Register > Ops)
Replace MI with a concat_vectors with Ops.
const TargetLowering & getTargetLowering() const
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
void applyPtrAddZero(MachineInstr &MI)
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo)
void setRegBank(Register Reg, const RegisterBank *RegBank)
Set the register bank of Reg.
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement)
void replaceInstWithConstant(MachineInstr &MI, int64_t C)
Replace an instruction with a G_CONSTANT with value C.
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
Match ashr (shl x, C), C -> sext_inreg (C)
bool tryCombineExtendingLoads(MachineInstr &MI)
If MI is extend that consumes the result of a load, try to combine it.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount)
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo)
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applySDivByConst(MachineInstr &MI)
bool matchUndefSelectCmp(MachineInstr &MI)
Return true if a G_SELECT instruction MI has an undef comparison.
void replaceInstWithUndef(MachineInstr &MI)
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantOr(MachineInstr &MI, Register &Replacement)
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is undef.
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo)
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst)
void replaceInstWithFConstant(MachineInstr &MI, double C)
Replace an instruction with a G_FCONSTANT with value C.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo)
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2)
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
Fold (shift (shift base, x), y) -> (shift base (x+y))
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo)
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*MULO x, 0) -> 0 + no carry out.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement)
Delete MI and replace all of its uses with Replacement.
bool matchFunnelShiftToRotate(MachineInstr &MI)
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
Combine inverting a result of a compare into the opposite cond code.
void applyCombineExtOfExt(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is known to be a power of 2.
void applyCombineCopy(MachineInstr &MI)
void applyCombineTruncOfExt(MachineInstr &MI, std::pair< Register, unsigned > &MatchInfo)
bool matchAnyExplicitUseIsUndef(MachineInstr &MI)
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops)
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
bool matchSextTruncSextLoad(MachineInstr &MI)
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
GISelKnownBits * KB
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
MachineInstr * buildSDivUsingMul(MachineInstr &MI)
Given an G_SDIV MI expressing a signed divide by constant, return an expression that implements it by...
void applyFunnelShiftConstantModulo(MachineInstr &MI)
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool isPreLegalize() const
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo)
Match (and (load x), mask) -> zextload x.
bool matchConstantOp(const MachineOperand &MOP, int64_t C)
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ands.
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg)
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate)
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool matchConstantFPOp(const MachineOperand &MOP, double C)
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo)
Return true if MI is a G_ADD which can be simplified to a G_SUB.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Optimize memcpy intrinsics et al, e.g.
bool matchSelectSameVal(MachineInstr &MI)
Optimize (cond ? x : x) -> x.
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst)
Transform fp_instr(cst) to constant result of the fp operation.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo)
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo)
Try to reassociate to reassociate operands of a commutative binop.
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool tryEmitMemcpyInline(MachineInstr &MI)
Emit loads and stores that perform the given memcpy.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo)
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info)
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData)
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo)
Constant fold G_FMA/G_FMAD.
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo)
bool isLegal(const LegalityQuery &Query) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine selects.
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts)
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo)
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg)
Transform anyext(trunc(x)) to x.
void applySimplifyURemByPow2(MachineInstr &MI)
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
MachineRegisterInfo & MRI
void applyUMulHToLShr(MachineInstr &MI)
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo)
Match expression trees of the form.
bool matchShuffleToExtract(MachineInstr &MI)
bool matchUndefShuffleVectorMask(MachineInstr &MI)
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal)
Transform a multiply by a power-of-2 value to a left shift.
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo)
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo)
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo)
Fold away a merge of an unmerge of the corresponding values.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI)
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx)
Checks if constant at ConstIdx is larger than MI 's bitwidth.
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelKnownBits *KB=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
bool matchCombineTruncOfExt(MachineInstr &MI, std::pair< Register, unsigned > &MatchInfo)
Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x).
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg)
bool matchUMulHToLShr(MachineInstr &MI)
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI)
Returns true if DefMI dominates UseMI.
MachineInstr * buildUDivUsingMul(MachineInstr &MI)
Given an G_UDIV MI expressing a divide by constant, return an expression that implements it by multip...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg)
Transform zext(trunc(x)) to x.
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData)
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false)
const LegalizerInfo * LI
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI)
void applyShuffleToExtract(MachineInstr &MI)
MachineDominatorTree * MDT
bool matchSDivByConst(MachineInstr &MI)
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo)
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo)
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
const RegisterBankInfo * RBI
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo)
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI)
const TargetRegisterInfo * TRI
bool tryCombineShuffleVector(MachineInstr &MI)
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg)
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo)
GISelChangeObserver & Observer
bool matchCombineExtOfExt(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Transform [asz]ext([asz]ext(x)) to [asz]ext x.
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo)
Match sext_inreg(load p), imm -> sextload p.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo)
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute)
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine ors.
void applyFunnelShiftToRotate(MachineInstr &MI)
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands)
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond)
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo)
Combine addos.
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg)
Transform PtrToInt(IntToPtr(x)) to x.
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal)
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchCommuteConstantToRHS(MachineInstr &MI)
Match constant LHS ops that should be commuted.
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo)
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI)
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo)
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Replace MI with a series of instructions described in MatchInfo.
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo)
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
MachineIRBuilder & Builder
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo)
Match: shr (and x, n), k -> ubfx x, pos, width.
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo)
Reassociate commutative binary operations like G_ADD.
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo)
Push a binary operator through a select on constants.
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo)
Do constant folding when opportunities are exposed after MIR building.
bool matchOperandIsZero(MachineInstr &MI, unsigned OpIdx)
Check if operand OpIdx is zero.
bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo)
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo)
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo)
void applySextTruncSextLoad(MachineInstr &MI)
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo)
bool matchCommuteFPConstantToRHS(MachineInstr &MI)
Match constant LHS FP ops that should be commuted.
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:267
const APFloat & getValue() const
Definition: Constants.h:311
const APFloat & getValueAPF() const
Definition: Constants.h:310
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:144
This class represents a range of values.
Definition: ConstantRange.h:47
std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isBigEndian() const
Definition: DataLayout.h:239
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:235
unsigned size() const
Definition: DenseMap.h:99
iterator end()
Definition: DenseMap.h:84
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:342
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_FCMP.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
void finishedChangingAllUsesOfReg()
All instructions reported as changing by changingAllUsesOfReg() have finished being changed.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
void changingAllUsesOfReg(const MachineRegisterInfo &MRI, Register Reg)
All the instructions using the given register are being changed.
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
KnownBits getKnownBits(Register R)
APInt getKnownZeroes(Register R)
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents a G_IMPLICIT_DEF.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:182
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
constexpr bool isByteSized() const
Definition: LowLevelType.h:263
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
Definition: LowLevelType.h:178
constexpr LLT getScalarType() const
Definition: LowLevelType.h:208
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
bool isLegalOrCustom(const LegalityQuery &Query) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
LLVMContext & getContext() const
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildExtOrTrunc(unsigned ExtOpc, const DstOp &Res, const SrcOp &Op)
Build and insert Res = ExtOpc, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes of...
void setDebugLoc(const DebugLoc &DL)
Set the debug location to DL for all the next build instructions.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don't insert <empty> = Opcode <empty>.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:544
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:327
int findRegisterDefOperandIdx(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:377
void cloneMemRefs(MachineFunction &MF, const MachineInstr &MI)
Clone another MachineInstr's memory reference descriptor list and replace ours with it.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:547
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:554
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:372
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
const RegClassOrRegBank & getRegClassOrRegBank(Register Reg) const
Return the register bank or register class of Reg.
void setRegClassOrRegBank(Register Reg, const RegClassOrRegBank &RCOrRB)
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool hasOneUse(Register RegNo) const
hasOneUse - Return true if there is exactly one instruction using the specified register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
bool constrainRegAttrs(Register Reg, Register ConstrainingReg, unsigned MinNumRegs=0)
Constrain the register class or the register bank of the virtual register Reg (and low-level type) to...
iterator_range< use_iterator > use_operands(Register Reg) const
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
Definition: RegisterBank.h:28
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition: SmallPtrSet.h:94
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
virtual bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const
Given the generic extension instruction ExtMI, returns true if this extension is a likely candidate f...
virtual bool produceSameValue(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo *MRI=nullptr) const
Return true if two machine instructions would produce identical values.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, SDValue N1) const
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Value(Type *Ty, unsigned scid)
Definition: Value.cpp:53
self_iterator getIterator()
Definition: ilist_node.h:109
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
operand_type_match m_Reg()
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(int64_t RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
operand_type_match m_Pred()
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition: Utils.cpp:1343
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:625
static double log2(double V)
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:438
EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx)
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition: Utils.cpp:293
std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1303
bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition: Utils.cpp:1456
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:713
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the given value is known to have exactly one bit set when defined.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1426
bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition: Utils.cpp:1438
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:465
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1471
bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition: Utils.cpp:1503
std::function< void(MachineIRBuilder &)> BuildFnTy
std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:644
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition: Utils.cpp:1406
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition: Utils.cpp:199
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:246
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition: Utils.cpp:1336
std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:932
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition: Utils.cpp:427
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition: Utils.cpp:1528
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:413
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1758
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition: Utils.cpp:446
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition: Utils.cpp:472
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition: Utils.cpp:1321
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition: Utils.h:224
Extended Value Type.
Definition: ValueTypes.h:34
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
static std::optional< bool > eq(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_EQ result.
Definition: KnownBits.cpp:482
static std::optional< bool > ne(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_NE result.
Definition: KnownBits.cpp:490
static std::optional< bool > sge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGE result.
Definition: KnownBits.cpp:530
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:244
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:141
static std::optional< bool > ugt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGT result.
Definition: KnownBits.cpp:496
static std::optional< bool > slt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLT result.
Definition: KnownBits.cpp:536
static std::optional< bool > ult(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULT result.
Definition: KnownBits.cpp:512
static std::optional< bool > ule(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_ULE result.
Definition: KnownBits.cpp:516
static std::optional< bool > sle(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SLE result.
Definition: KnownBits.cpp:540
static std::optional< bool > sgt(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_SGT result.
Definition: KnownBits.cpp:520
static std::optional< bool > uge(const KnownBits &LHS, const KnownBits &RHS)
Determine if these known bits always give the same ICMP_UGE result.
Definition: KnownBits.cpp:506
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
MachineInstr * MI
const RegisterBank * Bank
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Magic data for optimising unsigned division by a constant.
static UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...