29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
64 MRI = &
MF.getRegInfo();
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
83 auto &RegClassOrBank = MRI.getRegClassOrRegBank(
Reg);
84 const TargetRegisterClass *RC =
87 const LLT Ty = MRI.getType(
Reg);
91 return MRI.getVRegDef(
Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
105 MachineOperand &Dst =
MI.getOperand(0);
106 MachineOperand &Src =
MI.getOperand(1);
112 const TargetRegisterClass *DstRC
113 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
114 const TargetRegisterClass *SrcRC
115 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
116 if (!DstRC || DstRC != SrcRC)
119 if (!RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) ||
120 !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
122 const MCInstrDesc &MCID =
MI.getDesc();
124 MI.getOperand(0).setIsEarlyClobber(
true);
129bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
132 I.setDesc(TII.get(TargetOpcode::COPY));
134 const MachineOperand &Src =
I.getOperand(1);
135 MachineOperand &Dst =
I.getOperand(0);
139 if (isVCC(DstReg, *MRI)) {
140 if (SrcReg == AMDGPU::SCC) {
141 const TargetRegisterClass *RC
142 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
145 return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
148 if (!isVCC(SrcReg, *MRI)) {
150 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
153 const TargetRegisterClass *SrcRC
154 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
156 std::optional<ValueAndVReg> ConstVal =
160 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
162 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
164 Register MaskedReg = MRI->createVirtualRegister(SrcRC);
171 assert(Subtarget->useRealTrue16Insts());
172 const int64_t NoMods = 0;
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
179 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
186 bool IsSGPR = TRI.isSGPRClass(SrcRC);
187 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
194 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
200 if (!MRI->getRegClassOrNull(SrcReg))
201 MRI->setRegClass(SrcReg, SrcRC);
206 const TargetRegisterClass *RC =
207 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
208 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
214 for (
const MachineOperand &MO :
I.operands()) {
215 if (MO.getReg().isPhysical())
218 const TargetRegisterClass *RC =
219 TRI.getConstrainedRegClassForOperand(MO, *MRI);
222 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
227bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
230 Register VCCReg =
I.getOperand(1).getReg();
234 if (STI.hasScalarCompareEq64()) {
236 STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
239 Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
240 Cmp =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
247 Register DstReg =
I.getOperand(0).getReg();
251 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
254bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
258 Register DstReg =
I.getOperand(0).getReg();
259 Register SrcReg =
I.getOperand(1).getReg();
260 std::optional<ValueAndVReg> Arg =
264 const int64_t
Value = Arg->Value.getZExtValue();
266 unsigned Opcode = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
273 return RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI);
279 unsigned SelectOpcode =
280 STI.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
290bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
291 Register DstReg =
I.getOperand(0).getReg();
292 Register SrcReg =
I.getOperand(1).getReg();
297 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
305bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
306 const Register DefReg =
I.getOperand(0).getReg();
307 const LLT DefTy = MRI->getType(DefReg);
319 MRI->getRegClassOrRegBank(DefReg);
321 const TargetRegisterClass *DefRC =
330 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
339 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
340 const Register SrcReg =
I.getOperand(i).getReg();
342 const RegisterBank *RB = MRI->getRegBankOrNull(SrcReg);
344 const LLT SrcTy = MRI->getType(SrcReg);
345 const TargetRegisterClass *SrcRC =
346 TRI.getRegClassForTypeOnBank(SrcTy, *RB);
347 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
352 I.setDesc(TII.get(TargetOpcode::PHI));
353 return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
359 unsigned SubIdx)
const {
363 Register DstReg = MRI->createVirtualRegister(&SubRC);
366 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
368 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
394 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
396 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
398 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
404bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
405 Register DstReg =
I.getOperand(0).getReg();
406 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
408 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
409 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
410 DstRB->
getID() != AMDGPU::VCCRegBankID)
413 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
426bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
429 Register DstReg =
I.getOperand(0).getReg();
431 LLT Ty = MRI->getType(DstReg);
436 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
437 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
438 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
442 const unsigned Opc =
Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
445 .
add(
I.getOperand(1))
446 .
add(
I.getOperand(2))
453 if (STI.hasAddNoCarryInsts()) {
454 const unsigned Opc =
Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
455 I.setDesc(TII.get(
Opc));
462 const unsigned Opc =
Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
464 Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
468 .
add(
I.getOperand(1))
469 .
add(
I.getOperand(2))
476 assert(!
Sub &&
"illegal sub should not reach here");
478 const TargetRegisterClass &RC
479 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
480 const TargetRegisterClass &HalfRC
481 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
483 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
484 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
485 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
486 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
488 Register DstLo = MRI->createVirtualRegister(&HalfRC);
489 Register DstHi = MRI->createVirtualRegister(&HalfRC);
492 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
495 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
500 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
501 Register CarryReg = MRI->createVirtualRegister(CarryRC);
502 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
507 MachineInstr *Addc =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
517 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
524 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
531bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
536 Register Dst0Reg =
I.getOperand(0).getReg();
537 Register Dst1Reg =
I.getOperand(1).getReg();
538 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
539 I.getOpcode() == AMDGPU::G_UADDE;
540 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
541 I.getOpcode() == AMDGPU::G_USUBE;
543 if (isVCC(Dst1Reg, *MRI)) {
544 unsigned NoCarryOpc =
545 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
546 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
547 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
554 Register Src0Reg =
I.getOperand(2).getReg();
555 Register Src1Reg =
I.getOperand(3).getReg();
558 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
559 .
addReg(
I.getOperand(4).getReg());
562 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
563 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
565 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
566 .
add(
I.getOperand(2))
567 .
add(
I.getOperand(3));
569 if (MRI->use_nodbg_empty(Dst1Reg)) {
570 CarryInst.setOperandDead(3);
572 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
574 if (!MRI->getRegClassOrNull(Dst1Reg))
575 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
578 if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
579 !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
580 !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
584 !RBI.constrainGenericRegister(
I.getOperand(4).getReg(),
585 AMDGPU::SReg_32RegClass, *MRI))
592bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
596 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
597 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() &&
598 MRI->use_nodbg_empty(
I.getOperand(1).getReg());
601 if (Subtarget->hasMADIntraFwdBug())
602 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
603 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
605 Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
606 : AMDGPU::V_MAD_NC_I64_I32_e64;
608 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
613 I.setDesc(TII.get(
Opc));
615 I.addImplicitDefUseOperands(*
MF);
616 I.getOperand(0).setIsEarlyClobber(
true);
622bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
624 Register DstReg =
I.getOperand(0).getReg();
625 Register SrcReg =
I.getOperand(1).getReg();
626 LLT DstTy = MRI->getType(DstReg);
627 LLT SrcTy = MRI->getType(SrcReg);
632 unsigned Offset =
I.getOperand(2).getImm();
633 if (
Offset % 32 != 0 || DstSize > 128)
641 const TargetRegisterClass *DstRC =
642 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
643 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
646 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
647 const TargetRegisterClass *SrcRC =
648 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
653 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubReg);
658 *SrcRC,
I.getOperand(1));
660 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
661 .
addReg(SrcReg, {}, SubReg);
667bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
668 MachineBasicBlock *BB =
MI.getParent();
670 LLT DstTy = MRI->getType(DstReg);
671 LLT SrcTy = MRI->getType(
MI.getOperand(1).getReg());
678 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
680 const TargetRegisterClass *DstRC =
681 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
685 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
686 MachineInstrBuilder MIB =
687 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
688 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
689 MachineOperand &Src =
MI.getOperand(
I + 1);
693 const TargetRegisterClass *SrcRC
694 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
695 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
699 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
702 MI.eraseFromParent();
706bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
707 MachineBasicBlock *BB =
MI.getParent();
708 const int NumDst =
MI.getNumOperands() - 1;
710 MachineOperand &Src =
MI.getOperand(NumDst);
714 LLT DstTy = MRI->getType(DstReg0);
715 LLT SrcTy = MRI->getType(SrcReg);
720 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
722 const TargetRegisterClass *SrcRC =
723 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
724 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
730 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
731 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
732 MachineOperand &Dst =
MI.getOperand(
I);
733 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
734 .
addReg(SrcReg, {}, SubRegs[
I]);
737 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
738 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
741 const TargetRegisterClass *DstRC =
742 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
743 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
747 MI.eraseFromParent();
751bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
752 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
753 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
757 LLT SrcTy = MRI->getType(Src0);
761 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
762 return selectG_MERGE_VALUES(
MI);
769 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
773 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
774 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
777 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
778 DstBank->
getID() == AMDGPU::VGPRRegBankID);
779 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
782 MachineBasicBlock *BB =
MI.getParent();
792 const int64_t K0 = ConstSrc0->Value.getSExtValue();
793 const int64_t K1 = ConstSrc1->Value.getSExtValue();
794 uint32_t Lo16 =
static_cast<uint32_t
>(K0) & 0xffff;
795 uint32_t Hi16 =
static_cast<uint32_t
>(K1) & 0xffff;
796 uint32_t
Imm = Lo16 | (Hi16 << 16);
801 MI.eraseFromParent();
802 return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI);
807 MI.eraseFromParent();
808 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
819 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
820 MI.setDesc(TII.get(AMDGPU::COPY));
823 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
824 return RBI.constrainGenericRegister(Dst, RC, *MRI) &&
825 RBI.constrainGenericRegister(Src0, RC, *MRI);
830 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
831 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
836 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
842 MI.eraseFromParent();
867 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
868 if (Shift0 && Shift1) {
869 Opc = AMDGPU::S_PACK_HH_B32_B16;
870 MI.getOperand(1).setReg(ShiftSrc0);
871 MI.getOperand(2).setReg(ShiftSrc1);
873 Opc = AMDGPU::S_PACK_LH_B32_B16;
874 MI.getOperand(2).setReg(ShiftSrc1);
878 if (ConstSrc1 && ConstSrc1->Value == 0) {
880 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
885 MI.eraseFromParent();
889 if (STI.hasSPackHL()) {
890 Opc = AMDGPU::S_PACK_HL_B32_B16;
891 MI.getOperand(1).setReg(ShiftSrc0);
895 MI.setDesc(TII.get(
Opc));
900bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
901 const MachineOperand &MO =
I.getOperand(0);
905 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
906 if ((!RC && !MRI->getRegBankOrNull(MO.
getReg())) ||
907 (RC && RBI.constrainGenericRegister(MO.
getReg(), *RC, *MRI))) {
908 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
915bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
918 Register DstReg =
I.getOperand(0).getReg();
919 Register Src0Reg =
I.getOperand(1).getReg();
920 Register Src1Reg =
I.getOperand(2).getReg();
921 LLT Src1Ty = MRI->getType(Src1Reg);
923 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
926 int64_t
Offset =
I.getOperand(3).getImm();
929 if (
Offset % 32 != 0 || InsSize % 32 != 0)
936 unsigned SubReg = TRI.getSubRegFromChannel(
Offset / 32, InsSize / 32);
937 if (SubReg == AMDGPU::NoSubRegister)
940 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
941 const TargetRegisterClass *DstRC =
942 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
946 const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
947 const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
948 const TargetRegisterClass *Src0RC =
949 TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
950 const TargetRegisterClass *Src1RC =
951 TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
955 Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg);
956 if (!Src0RC || !Src1RC)
959 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
960 !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
961 !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
965 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
974bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
977 Register OffsetReg =
MI.getOperand(2).getReg();
978 Register WidthReg =
MI.getOperand(3).getReg();
980 assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID &&
981 "scalar BFX instructions are expanded in regbankselect");
982 assert(MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
983 "64-bit vector BFX instructions are expanded in regbankselect");
986 MachineBasicBlock *
MBB =
MI.getParent();
988 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
989 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
994 MI.eraseFromParent();
999bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
1000 if (STI.getLDSBankCount() != 16)
1006 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
1007 !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
1008 !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
1018 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1020 MachineBasicBlock *
MBB =
MI.getParent();
1024 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1027 .
addImm(
MI.getOperand(3).getImm());
1040 MI.eraseFromParent();
1049bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1051 if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
1054 MachineBasicBlock *
MBB =
MI.getParent();
1058 Register LaneSelect =
MI.getOperand(3).getReg();
1061 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1063 std::optional<ValueAndVReg> ConstSelect =
1069 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1072 std::optional<ValueAndVReg> ConstVal =
1078 STI.hasInv2PiInlineImm())) {
1079 MIB.
addImm(ConstVal->Value.getSExtValue());
1087 RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
1089 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1097 MI.eraseFromParent();
1104bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1108 LLT Ty = MRI->getType(Dst0);
1111 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1113 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1120 MachineBasicBlock *
MBB =
MI.getParent();
1124 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1126 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1139 MI.eraseFromParent();
1144bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1146 switch (IntrinsicID) {
1147 case Intrinsic::amdgcn_if_break: {
1152 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1153 .
add(
I.getOperand(0))
1154 .
add(
I.getOperand(2))
1155 .
add(
I.getOperand(3));
1157 Register DstReg =
I.getOperand(0).getReg();
1158 Register Src0Reg =
I.getOperand(2).getReg();
1159 Register Src1Reg =
I.getOperand(3).getReg();
1161 I.eraseFromParent();
1164 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1168 case Intrinsic::amdgcn_interp_p1_f16:
1169 return selectInterpP1F16(
I);
1170 case Intrinsic::amdgcn_wqm:
1171 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1172 case Intrinsic::amdgcn_softwqm:
1173 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1174 case Intrinsic::amdgcn_strict_wwm:
1175 case Intrinsic::amdgcn_wwm:
1176 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1177 case Intrinsic::amdgcn_strict_wqm:
1178 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1179 case Intrinsic::amdgcn_writelane:
1180 return selectWritelane(
I);
1181 case Intrinsic::amdgcn_div_scale:
1182 return selectDivScale(
I);
1183 case Intrinsic::amdgcn_icmp:
1184 case Intrinsic::amdgcn_fcmp:
1187 return selectIntrinsicCmp(
I);
1188 case Intrinsic::amdgcn_ballot:
1189 return selectBallot(
I);
1190 case Intrinsic::amdgcn_reloc_constant:
1191 return selectRelocConstant(
I);
1192 case Intrinsic::amdgcn_groupstaticsize:
1193 return selectGroupStaticSize(
I);
1194 case Intrinsic::returnaddress:
1195 return selectReturnAddress(
I);
1196 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1197 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1198 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1199 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1200 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1201 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1202 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1203 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1204 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1205 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1206 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1207 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1208 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1209 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1210 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1211 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1212 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1213 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1214 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1215 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1216 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1217 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1218 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1219 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1220 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1221 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1222 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1223 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1224 return selectSMFMACIntrin(
I);
1225 case Intrinsic::amdgcn_permlane16_swap:
1226 case Intrinsic::amdgcn_permlane32_swap:
1227 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1228 case Intrinsic::amdgcn_wave_shuffle:
1229 return selectWaveShuffleIntrin(
I);
1240 if (
Size == 16 && !ST.has16BitInsts())
1243 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1244 unsigned FakeS16Opc,
unsigned S32Opc,
1247 return ST.hasTrue16BitInsts()
1248 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1259 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1260 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1261 AMDGPU::V_CMP_NE_U64_e64);
1263 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1264 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1265 AMDGPU::V_CMP_EQ_U64_e64);
1267 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1268 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1269 AMDGPU::V_CMP_GT_I64_e64);
1271 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1272 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1273 AMDGPU::V_CMP_GE_I64_e64);
1275 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1276 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1277 AMDGPU::V_CMP_LT_I64_e64);
1279 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1280 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1281 AMDGPU::V_CMP_LE_I64_e64);
1283 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1284 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1285 AMDGPU::V_CMP_GT_U64_e64);
1287 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1288 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1289 AMDGPU::V_CMP_GE_U64_e64);
1291 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1292 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1293 AMDGPU::V_CMP_LT_U64_e64);
1295 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1296 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1297 AMDGPU::V_CMP_LE_U64_e64);
1300 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1301 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1302 AMDGPU::V_CMP_EQ_F64_e64);
1304 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1305 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1306 AMDGPU::V_CMP_GT_F64_e64);
1308 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1309 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1310 AMDGPU::V_CMP_GE_F64_e64);
1312 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1313 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1314 AMDGPU::V_CMP_LT_F64_e64);
1316 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1317 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1318 AMDGPU::V_CMP_LE_F64_e64);
1320 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1321 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1322 AMDGPU::V_CMP_NEQ_F64_e64);
1324 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1325 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1326 AMDGPU::V_CMP_O_F64_e64);
1328 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1329 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1330 AMDGPU::V_CMP_U_F64_e64);
1332 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1333 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1334 AMDGPU::V_CMP_NLG_F64_e64);
1336 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1337 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1338 AMDGPU::V_CMP_NLE_F64_e64);
1340 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1341 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1342 AMDGPU::V_CMP_NLT_F64_e64);
1344 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1345 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1346 AMDGPU::V_CMP_NGE_F64_e64);
1348 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1349 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1350 AMDGPU::V_CMP_NGT_F64_e64);
1352 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1353 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1354 AMDGPU::V_CMP_NEQ_F64_e64);
1356 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1357 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1358 AMDGPU::V_CMP_TRU_F64_e64);
1360 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1361 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1362 AMDGPU::V_CMP_F_F64_e64);
1367 unsigned Size)
const {
1369 if (!STI.hasScalarCompareEq64())
1374 return AMDGPU::S_CMP_LG_U64;
1376 return AMDGPU::S_CMP_EQ_U64;
1385 return AMDGPU::S_CMP_LG_U32;
1387 return AMDGPU::S_CMP_EQ_U32;
1389 return AMDGPU::S_CMP_GT_I32;
1391 return AMDGPU::S_CMP_GE_I32;
1393 return AMDGPU::S_CMP_LT_I32;
1395 return AMDGPU::S_CMP_LE_I32;
1397 return AMDGPU::S_CMP_GT_U32;
1399 return AMDGPU::S_CMP_GE_U32;
1401 return AMDGPU::S_CMP_LT_U32;
1403 return AMDGPU::S_CMP_LE_U32;
1405 return AMDGPU::S_CMP_EQ_F32;
1407 return AMDGPU::S_CMP_GT_F32;
1409 return AMDGPU::S_CMP_GE_F32;
1411 return AMDGPU::S_CMP_LT_F32;
1413 return AMDGPU::S_CMP_LE_F32;
1415 return AMDGPU::S_CMP_LG_F32;
1417 return AMDGPU::S_CMP_O_F32;
1419 return AMDGPU::S_CMP_U_F32;
1421 return AMDGPU::S_CMP_NLG_F32;
1423 return AMDGPU::S_CMP_NLE_F32;
1425 return AMDGPU::S_CMP_NLT_F32;
1427 return AMDGPU::S_CMP_NGE_F32;
1429 return AMDGPU::S_CMP_NGT_F32;
1431 return AMDGPU::S_CMP_NEQ_F32;
1438 if (!STI.hasSALUFloatInsts())
1443 return AMDGPU::S_CMP_EQ_F16;
1445 return AMDGPU::S_CMP_GT_F16;
1447 return AMDGPU::S_CMP_GE_F16;
1449 return AMDGPU::S_CMP_LT_F16;
1451 return AMDGPU::S_CMP_LE_F16;
1453 return AMDGPU::S_CMP_LG_F16;
1455 return AMDGPU::S_CMP_O_F16;
1457 return AMDGPU::S_CMP_U_F16;
1459 return AMDGPU::S_CMP_NLG_F16;
1461 return AMDGPU::S_CMP_NLE_F16;
1463 return AMDGPU::S_CMP_NLT_F16;
1465 return AMDGPU::S_CMP_NGE_F16;
1467 return AMDGPU::S_CMP_NGT_F16;
1469 return AMDGPU::S_CMP_NEQ_F16;
1478bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1483 Register SrcReg =
I.getOperand(2).getReg();
1484 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1488 Register CCReg =
I.getOperand(0).getReg();
1489 if (!isVCC(CCReg, *MRI)) {
1490 int Opcode = getS_CMPOpcode(Pred,
Size);
1493 MachineInstr *ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode))
1494 .
add(
I.getOperand(2))
1495 .
add(
I.getOperand(3));
1496 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1500 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
1501 I.eraseFromParent();
1505 if (
I.getOpcode() == AMDGPU::G_FCMP)
1512 MachineInstrBuilder ICmp;
1515 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1517 .
add(
I.getOperand(2))
1519 .
add(
I.getOperand(3))
1522 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1523 .
add(
I.getOperand(2))
1524 .
add(
I.getOperand(3));
1528 *TRI.getBoolRC(), *MRI);
1530 I.eraseFromParent();
1534bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1535 Register Dst =
I.getOperand(0).getReg();
1536 if (isVCC(Dst, *MRI))
1539 LLT DstTy = MRI->getType(Dst);
1545 Register SrcReg =
I.getOperand(2).getReg();
1546 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1554 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1555 I.eraseFromParent();
1556 return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1563 MachineInstrBuilder SelectedMI;
1564 MachineOperand &
LHS =
I.getOperand(2);
1565 MachineOperand &
RHS =
I.getOperand(3);
1566 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1567 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1569 copyToVGPRIfSrcFolded(Src0, Src0Mods,
LHS, &
I,
true);
1571 copyToVGPRIfSrcFolded(Src1, Src1Mods,
RHS, &
I,
true);
1572 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1574 SelectedMI.
addImm(Src0Mods);
1575 SelectedMI.
addReg(Src0Reg);
1577 SelectedMI.
addImm(Src1Mods);
1578 SelectedMI.
addReg(Src1Reg);
1584 RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1587 I.eraseFromParent();
1598 if (
MI->getParent() !=
MBB)
1602 if (
MI->getOpcode() == AMDGPU::COPY) {
1605 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1606 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1623bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1626 Register DstReg =
I.getOperand(0).getReg();
1627 Register SrcReg =
I.getOperand(2).getReg();
1628 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();
1629 const unsigned WaveSize = STI.getWavefrontSize();
1633 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1636 std::optional<ValueAndVReg> Arg =
1641 if (BallotSize != WaveSize) {
1642 Dst = MRI->createVirtualRegister(TRI.getBoolRC());
1646 const int64_t
Value = Arg->Value.getZExtValue();
1649 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1656 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1662 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1666 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1676 if (BallotSize != WaveSize) {
1677 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1679 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1686 I.eraseFromParent();
1690bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1691 Register DstReg =
I.getOperand(0).getReg();
1692 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
1693 const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
1694 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
1697 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1699 Module *
M =
MF->getFunction().getParent();
1700 const MDNode *
Metadata =
I.getOperand(2).getMetadata();
1707 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1710 I.eraseFromParent();
1714bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1717 Register DstReg =
I.getOperand(0).getReg();
1718 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1719 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1720 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1728 const SIMachineFunctionInfo *MFI =
MF->getInfo<SIMachineFunctionInfo>();
1731 Module *
M =
MF->getFunction().getParent();
1732 const GlobalValue *GV =
1737 I.eraseFromParent();
1742bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1747 MachineOperand &Dst =
I.getOperand(0);
1749 unsigned Depth =
I.getOperand(2).getImm();
1751 const TargetRegisterClass *RC
1752 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1754 !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
1759 MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1762 I.eraseFromParent();
1766 MachineFrameInfo &MFI =
MF.getFrameInfo();
1771 Register ReturnAddrReg = TRI.getReturnAddressReg(
MF);
1773 AMDGPU::SReg_64RegClass,
DL);
1776 I.eraseFromParent();
1780bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1783 MachineBasicBlock *BB =
MI.getParent();
1784 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1785 .
add(
MI.getOperand(1));
1788 MI.eraseFromParent();
1790 if (!MRI->getRegClassOrNull(
Reg))
1791 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1795bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1797 MachineBasicBlock *
MBB =
MI.getParent();
1801 unsigned IndexOperand =
MI.getOperand(7).getImm();
1802 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1803 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1805 if (WaveDone && !WaveRelease) {
1809 Fn,
"ds_ordered_count: wave_done requires wave_release",
DL));
1812 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1813 IndexOperand &= ~0x3f;
1814 unsigned CountDw = 0;
1817 CountDw = (IndexOperand >> 24) & 0xf;
1818 IndexOperand &= ~(0xf << 24);
1820 if (CountDw < 1 || CountDw > 4) {
1823 Fn,
"ds_ordered_count: dword count must be between 1 and 4",
DL));
1831 Fn,
"ds_ordered_count: bad index operand",
DL));
1834 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1837 unsigned Offset0 = OrderedCountIndex << 2;
1838 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
1841 Offset1 |= (CountDw - 1) << 6;
1844 Offset1 |= ShaderType << 2;
1846 unsigned Offset = Offset0 | (Offset1 << 8);
1854 MachineInstrBuilder
DS =
1855 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1860 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
1864 MI.eraseFromParent();
1870 case Intrinsic::amdgcn_ds_gws_init:
1871 return AMDGPU::DS_GWS_INIT;
1872 case Intrinsic::amdgcn_ds_gws_barrier:
1873 return AMDGPU::DS_GWS_BARRIER;
1874 case Intrinsic::amdgcn_ds_gws_sema_v:
1875 return AMDGPU::DS_GWS_SEMA_V;
1876 case Intrinsic::amdgcn_ds_gws_sema_br:
1877 return AMDGPU::DS_GWS_SEMA_BR;
1878 case Intrinsic::amdgcn_ds_gws_sema_p:
1879 return AMDGPU::DS_GWS_SEMA_P;
1880 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1881 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1887bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1889 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1890 !STI.hasGWSSemaReleaseAll()))
1894 const bool HasVSrc =
MI.getNumOperands() == 3;
1895 assert(HasVSrc ||
MI.getNumOperands() == 2);
1897 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1898 const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
1899 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1905 MachineBasicBlock *
MBB =
MI.getParent();
1908 MachineInstr *Readfirstlane =
nullptr;
1913 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1914 Readfirstlane = OffsetDef;
1919 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1929 std::tie(BaseOffset, ImmOffset) =
1932 if (Readfirstlane) {
1935 if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
1941 if (!RBI.constrainGenericRegister(BaseOffset,
1942 AMDGPU::SReg_32RegClass, *MRI))
1946 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1961 const MCInstrDesc &InstrDesc = TII.get(
Opc);
1966 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
1967 const TargetRegisterClass *DataRC = TII.getRegClass(InstrDesc, Data0Idx);
1968 const TargetRegisterClass *SubRC =
1969 TRI.getSubRegisterClass(DataRC, AMDGPU::sub0);
1973 if (!RBI.constrainGenericRegister(VSrc, *DataRC, *MRI))
1983 Register DataReg = MRI->createVirtualRegister(DataRC);
1984 if (!RBI.constrainGenericRegister(VSrc, *SubRC, *MRI))
1987 Register UndefReg = MRI->createVirtualRegister(SubRC);
2006 MI.eraseFromParent();
2010bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
2011 bool IsAppend)
const {
2012 Register PtrBase =
MI.getOperand(2).getReg();
2013 LLT PtrTy = MRI->getType(PtrBase);
2017 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
2020 if (!isDSOffsetLegal(PtrBase,
Offset)) {
2021 PtrBase =
MI.getOperand(2).getReg();
2025 MachineBasicBlock *
MBB =
MI.getParent();
2027 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2031 if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
2038 MI.eraseFromParent();
2043bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
2044 MachineFunction *
MF =
MI.getMF();
2045 SIMachineFunctionInfo *MFInfo =
MF->getInfo<SIMachineFunctionInfo>();
2056 TFE = TexFailCtrl & 0x1;
2058 LWE = TexFailCtrl & 0x2;
2061 return TexFailCtrl == 0;
2064bool AMDGPUInstructionSelector::selectImageIntrinsic(
2066 MachineBasicBlock *
MBB =
MI.getParent();
2072 Register ResultDef =
MI.getOperand(0).getReg();
2073 if (MRI->use_nodbg_empty(ResultDef))
2077 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2085 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2087 Register VDataIn = AMDGPU::NoRegister;
2088 Register VDataOut = AMDGPU::NoRegister;
2090 int NumVDataDwords = -1;
2091 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2092 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2098 Unorm =
MI.getOperand(ArgOffset + Intr->
UnormIndex).getImm() != 0;
2102 bool IsTexFail =
false;
2104 TFE, LWE, IsTexFail))
2107 const int Flags =
MI.getOperand(ArgOffset + Intr->
NumArgs).getImm();
2108 const bool IsA16 = (
Flags & 1) != 0;
2109 const bool IsG16 = (
Flags & 2) != 0;
2112 if (IsA16 && !STI.hasG16() && !IsG16)
2116 unsigned DMaskLanes = 0;
2118 if (BaseOpcode->
Atomic) {
2120 VDataOut =
MI.getOperand(0).getReg();
2121 VDataIn =
MI.getOperand(2).getReg();
2122 LLT Ty = MRI->getType(VDataIn);
2125 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2130 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2132 DMask = Is64Bit ? 0xf : 0x3;
2133 NumVDataDwords = Is64Bit ? 4 : 2;
2135 DMask = Is64Bit ? 0x3 : 0x1;
2136 NumVDataDwords = Is64Bit ? 2 : 1;
2139 DMask =
MI.getOperand(ArgOffset + Intr->
DMaskIndex).getImm();
2142 if (BaseOpcode->
Store) {
2143 VDataIn =
MI.getOperand(1).getReg();
2144 VDataTy = MRI->getType(VDataIn);
2149 VDataOut =
MI.getOperand(0).getReg();
2150 VDataTy = MRI->getType(VDataOut);
2151 NumVDataDwords = DMaskLanes;
2153 if (IsD16 && !STI.hasUnpackedD16VMem())
2154 NumVDataDwords = (DMaskLanes + 1) / 2;
2159 if (Subtarget->hasG16() && IsG16) {
2160 const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
2163 IntrOpcode = G16MappingInfo->
G16;
2167 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2177 int NumVAddrRegs = 0;
2178 int NumVAddrDwords = 0;
2181 MachineOperand &AddrOp =
MI.getOperand(ArgOffset +
I);
2182 if (!AddrOp.
isReg())
2190 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
2197 NumVAddrRegs != 1 &&
2198 (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
2199 : NumVAddrDwords == NumVAddrRegs);
2200 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2211 NumVDataDwords, NumVAddrDwords);
2212 }
else if (IsGFX11Plus) {
2214 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2215 : AMDGPU::MIMGEncGfx11Default,
2216 NumVDataDwords, NumVAddrDwords);
2217 }
else if (IsGFX10Plus) {
2219 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2220 : AMDGPU::MIMGEncGfx10Default,
2221 NumVDataDwords, NumVAddrDwords);
2223 if (Subtarget->hasGFX90AInsts()) {
2225 NumVDataDwords, NumVAddrDwords);
2229 <<
"requested image instruction is not supported on this GPU\n");
2236 NumVDataDwords, NumVAddrDwords);
2239 NumVDataDwords, NumVAddrDwords);
2249 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
2251 Register TmpReg = MRI->createVirtualRegister(
2252 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2253 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2256 if (!MRI->use_empty(VDataOut)) {
2269 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2270 MachineOperand &SrcOp =
MI.getOperand(ArgOffset + Intr->
VAddrStart +
I);
2271 if (SrcOp.
isReg()) {
2290 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2292 MIB.
addImm(IsA16 ? -1 : 0);
2294 if (!Subtarget->hasGFX90AInsts()) {
2306 MIB.
addImm(IsD16 ? -1 : 0);
2308 MI.eraseFromParent();
2310 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
2316bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2322 MachineBasicBlock *
MBB =
MI.getParent();
2327 unsigned Offset =
MI.getOperand(6).getImm();
2331 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2332 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2333 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2335 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2336 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2338 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2339 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2351 MI.eraseFromParent();
2356bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2359 switch (IntrinsicID) {
2360 case Intrinsic::amdgcn_end_cf:
2361 return selectEndCfIntrinsic(
I);
2362 case Intrinsic::amdgcn_ds_ordered_add:
2363 case Intrinsic::amdgcn_ds_ordered_swap:
2364 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2365 case Intrinsic::amdgcn_ds_gws_init:
2366 case Intrinsic::amdgcn_ds_gws_barrier:
2367 case Intrinsic::amdgcn_ds_gws_sema_v:
2368 case Intrinsic::amdgcn_ds_gws_sema_br:
2369 case Intrinsic::amdgcn_ds_gws_sema_p:
2370 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2371 return selectDSGWSIntrinsic(
I, IntrinsicID);
2372 case Intrinsic::amdgcn_ds_append:
2373 return selectDSAppendConsume(
I,
true);
2374 case Intrinsic::amdgcn_ds_consume:
2375 return selectDSAppendConsume(
I,
false);
2376 case Intrinsic::amdgcn_init_whole_wave:
2377 return selectInitWholeWave(
I);
2378 case Intrinsic::amdgcn_raw_buffer_load_lds:
2379 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
2380 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2381 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
2382 case Intrinsic::amdgcn_struct_buffer_load_lds:
2383 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
2384 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2385 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds:
2386 return selectBufferLoadLds(
I);
2391 case Intrinsic::amdgcn_load_to_lds:
2392 case Intrinsic::amdgcn_load_async_to_lds:
2393 case Intrinsic::amdgcn_global_load_lds:
2394 case Intrinsic::amdgcn_global_load_async_lds:
2395 return selectGlobalLoadLds(
I);
2396 case Intrinsic::amdgcn_tensor_load_to_lds:
2397 case Intrinsic::amdgcn_tensor_store_from_lds:
2398 return selectTensorLoadStore(
I, IntrinsicID);
2399 case Intrinsic::amdgcn_asyncmark:
2400 case Intrinsic::amdgcn_wait_asyncmark:
2402 if (!Subtarget->hasVMemToLDSLoad())
2405 case Intrinsic::amdgcn_exp_compr:
2406 if (!STI.hasCompressedExport()) {
2408 F.getContext().diagnose(
2409 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2414 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2415 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2416 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2417 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2418 return selectDSBvhStackIntrinsic(
I);
2419 case Intrinsic::amdgcn_s_alloc_vgpr: {
2425 Register ResReg =
I.getOperand(0).getReg();
2427 MachineInstr *AllocMI =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_ALLOC_VGPR))
2428 .
add(
I.getOperand(2));
2431 I.eraseFromParent();
2433 return RBI.constrainGenericRegister(ResReg, AMDGPU::SReg_32RegClass, *MRI);
2435 case Intrinsic::amdgcn_s_barrier_init:
2436 case Intrinsic::amdgcn_s_barrier_signal_var:
2437 return selectNamedBarrierInit(
I, IntrinsicID);
2438 case Intrinsic::amdgcn_s_wakeup_barrier: {
2439 if (!STI.hasSWakeupBarrier()) {
2441 F.getContext().diagnose(
2442 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2446 return selectNamedBarrierInst(
I, IntrinsicID);
2448 case Intrinsic::amdgcn_s_barrier_join:
2449 case Intrinsic::amdgcn_s_get_named_barrier_state:
2450 return selectNamedBarrierInst(
I, IntrinsicID);
2451 case Intrinsic::amdgcn_s_get_barrier_state:
2452 return selectSGetBarrierState(
I, IntrinsicID);
2453 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2454 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2459bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2466 Register DstReg =
I.getOperand(0).getReg();
2467 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
2469 const MachineOperand &CCOp =
I.getOperand(1);
2471 if (!isVCC(CCReg, *MRI)) {
2472 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2473 AMDGPU::S_CSELECT_B32;
2474 MachineInstr *CopySCC =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
2480 if (!MRI->getRegClassOrNull(CCReg))
2481 MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
2483 .
add(
I.getOperand(2))
2484 .
add(
I.getOperand(3));
2488 I.eraseFromParent();
2497 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2499 .
add(
I.getOperand(3))
2501 .
add(
I.getOperand(2))
2502 .
add(
I.getOperand(1));
2505 I.eraseFromParent();
2509bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2510 Register DstReg =
I.getOperand(0).getReg();
2511 Register SrcReg =
I.getOperand(1).getReg();
2512 const LLT DstTy = MRI->getType(DstReg);
2513 const LLT SrcTy = MRI->getType(SrcReg);
2516 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
2517 const RegisterBank *DstRB;
2523 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
2528 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2533 const TargetRegisterClass *SrcRC =
2534 TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
2535 const TargetRegisterClass *DstRC =
2536 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
2537 if (!SrcRC || !DstRC)
2540 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
2541 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
2546 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2547 assert(STI.useRealTrue16Insts());
2551 .
addReg(SrcReg, {}, AMDGPU::lo16);
2552 I.eraseFromParent();
2560 Register LoReg = MRI->createVirtualRegister(DstRC);
2561 Register HiReg = MRI->createVirtualRegister(DstRC);
2563 .
addReg(SrcReg, {}, AMDGPU::sub0);
2565 .
addReg(SrcReg, {}, AMDGPU::sub1);
2567 if (IsVALU && STI.hasSDWA()) {
2570 MachineInstr *MovSDWA =
2571 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2581 Register TmpReg0 = MRI->createVirtualRegister(DstRC);
2582 Register TmpReg1 = MRI->createVirtualRegister(DstRC);
2583 Register ImmReg = MRI->createVirtualRegister(DstRC);
2585 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2595 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2596 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2597 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2609 And.setOperandDead(3);
2610 Or.setOperandDead(3);
2614 I.eraseFromParent();
2622 unsigned SubRegIdx = DstSize < 32
2623 ?
static_cast<unsigned>(AMDGPU::sub0)
2624 : TRI.getSubRegFromChannel(0, DstSize / 32);
2625 if (SubRegIdx == AMDGPU::NoSubRegister)
2630 const TargetRegisterClass *SrcWithSubRC
2631 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2635 if (SrcWithSubRC != SrcRC) {
2636 if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
2640 I.getOperand(1).setSubReg(SubRegIdx);
2643 I.setDesc(TII.get(TargetOpcode::COPY));
2650 int SignedMask =
static_cast<int>(Mask);
2651 return SignedMask >= -16 && SignedMask <= 64;
2655const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2664 return &RBI.getRegBankFromRegClass(*RC, LLT());
2668bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2669 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2670 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2673 const Register DstReg =
I.getOperand(0).getReg();
2674 const Register SrcReg =
I.getOperand(1).getReg();
2676 const LLT DstTy = MRI->getType(DstReg);
2677 const LLT SrcTy = MRI->getType(SrcReg);
2678 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2685 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2688 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2690 return selectCOPY(
I);
2692 const TargetRegisterClass *SrcRC =
2693 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2694 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
2695 const TargetRegisterClass *DstRC =
2696 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2698 Register UndefReg = MRI->createVirtualRegister(SrcRC);
2699 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2705 I.eraseFromParent();
2707 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
2708 RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
2711 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2717 MachineInstr *ExtI =
2721 I.eraseFromParent();
2726 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2727 MachineInstr *ExtI =
2732 I.eraseFromParent();
2737 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2738 const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
2739 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2740 if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
2743 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2744 const unsigned SextOpc = SrcSize == 8 ?
2745 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2748 I.eraseFromParent();
2749 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2754 if (DstSize > 32 && SrcSize == 32) {
2755 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2756 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2759 .
addReg(SrcReg, {}, SubReg)
2767 .
addReg(SrcReg, {}, SubReg)
2768 .addImm(AMDGPU::sub0)
2771 I.eraseFromParent();
2772 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass,
2776 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2777 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2780 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2782 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2783 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2784 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2786 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2788 .
addReg(SrcReg, {}, SubReg)
2789 .addImm(AMDGPU::sub0)
2797 I.eraseFromParent();
2798 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
2813 I.eraseFromParent();
2814 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2848 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2855 assert(Mask.size() == 2);
2857 if (Mask[0] == 1 && Mask[1] <= 1) {
2865bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2866 if (!Subtarget->hasSALUFloatInsts())
2869 Register Dst =
I.getOperand(0).getReg();
2870 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2871 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2874 Register Src =
I.getOperand(1).getReg();
2880 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2882 I.eraseFromParent();
2883 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
2890bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2903 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2904 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2909 MachineInstr *Fabs =
getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
2913 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2914 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2917 MachineBasicBlock *BB =
MI.getParent();
2919 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2920 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2921 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2922 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2924 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2925 .
addReg(Src, {}, AMDGPU::sub0);
2926 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2927 .
addReg(Src, {}, AMDGPU::sub1);
2928 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2932 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2937 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2942 MI.eraseFromParent();
2947bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2949 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2950 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2955 MachineBasicBlock *BB =
MI.getParent();
2957 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2958 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2959 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2960 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2962 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2963 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2966 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2967 .
addReg(Src, {}, AMDGPU::sub0);
2968 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2969 .
addReg(Src, {}, AMDGPU::sub1);
2970 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2975 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2979 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2985 MI.eraseFromParent();
2990 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2993void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2996 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2997 const MachineInstr *PtrMI =
2998 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
3002 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
3007 for (
unsigned i = 1; i != 3; ++i) {
3008 const MachineOperand &GEPOp = PtrMI->
getOperand(i);
3009 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.
getReg());
3014 assert(GEPInfo.Imm == 0);
3018 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.
getReg(), MRI, TRI);
3019 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
3020 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
3022 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
3026 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
3029bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
3030 return RBI.getRegBank(
Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
3033bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
3034 if (!
MI.hasOneMemOperand())
3037 const MachineMemOperand *MMO = *
MI.memoperands_begin();
3050 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
3051 return RBI.getRegBank(
MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
3052 AMDGPU::SGPRRegBankID;
3055 return I &&
I->getMetadata(
"amdgpu.uniform");
3059 for (
const GEPInfo &GEPInfo : AddrInfo) {
3060 if (!GEPInfo.VgprParts.empty())
3066void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
3067 const LLT PtrTy = MRI->getType(
I.getOperand(1).getReg());
3070 STI.ldsRequiresM0Init()) {
3074 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
3079bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
3086 if (
Reg.isPhysical())
3090 const unsigned Opcode =
MI.getOpcode();
3092 if (Opcode == AMDGPU::COPY)
3095 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
3096 Opcode == AMDGPU::G_XOR)
3101 return GI->is(Intrinsic::amdgcn_class);
3103 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
3106bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
3108 MachineOperand &CondOp =
I.getOperand(0);
3114 const TargetRegisterClass *ConstrainRC;
3121 if (!isVCC(CondReg, *MRI)) {
3125 CondPhysReg = AMDGPU::SCC;
3126 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3127 ConstrainRC = &AMDGPU::SReg_32RegClass;
3134 const bool Is64 = STI.isWave64();
3135 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3136 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3138 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
3139 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3146 CondPhysReg = TRI.getVCC();
3147 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3148 ConstrainRC = TRI.getBoolRC();
3151 if (!MRI->getRegClassOrNull(CondReg))
3152 MRI->setRegClass(CondReg, ConstrainRC);
3154 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3157 .
addMBB(
I.getOperand(1).getMBB());
3159 I.eraseFromParent();
3163bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3165 Register DstReg =
I.getOperand(0).getReg();
3166 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3167 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3168 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3172 return RBI.constrainGenericRegister(
3173 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3176bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3177 Register DstReg =
I.getOperand(0).getReg();
3178 Register SrcReg =
I.getOperand(1).getReg();
3179 Register MaskReg =
I.getOperand(2).getReg();
3180 LLT Ty = MRI->getType(DstReg);
3181 LLT MaskTy = MRI->getType(MaskReg);
3185 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3186 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3187 const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
3188 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3194 APInt MaskOnes =
VT->getKnownOnes(MaskReg).zext(64);
3198 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3199 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3202 !CanCopyLow32 && !CanCopyHi32) {
3203 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3207 I.eraseFromParent();
3212 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3213 const TargetRegisterClass &RegRC
3214 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3216 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
3217 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
3218 const TargetRegisterClass *MaskRC =
3219 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3221 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3222 !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3223 !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
3228 "ptrmask should have been narrowed during legalize");
3230 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3236 I.eraseFromParent();
3240 Register HiReg = MRI->createVirtualRegister(&RegRC);
3241 Register LoReg = MRI->createVirtualRegister(&RegRC);
3244 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3245 .
addReg(SrcReg, {}, AMDGPU::sub0);
3246 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3247 .
addReg(SrcReg, {}, AMDGPU::sub1);
3256 Register MaskLo = MRI->createVirtualRegister(&RegRC);
3257 MaskedLo = MRI->createVirtualRegister(&RegRC);
3259 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3260 .
addReg(MaskReg, {}, AMDGPU::sub0);
3261 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3270 Register MaskHi = MRI->createVirtualRegister(&RegRC);
3271 MaskedHi = MRI->createVirtualRegister(&RegRC);
3273 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3274 .
addReg(MaskReg, {}, AMDGPU::sub1);
3275 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3280 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3285 I.eraseFromParent();
3291static std::pair<Register, unsigned>
3298 std::tie(IdxBaseReg,
Offset) =
3300 if (IdxBaseReg == AMDGPU::NoRegister) {
3304 IdxBaseReg = IdxReg;
3311 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3312 return std::pair(IdxReg, SubRegs[0]);
3313 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3316bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3322 LLT DstTy = MRI->getType(DstReg);
3323 LLT SrcTy = MRI->getType(SrcReg);
3325 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3326 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3327 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3331 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3334 const TargetRegisterClass *SrcRC =
3335 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3336 const TargetRegisterClass *DstRC =
3337 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3338 if (!SrcRC || !DstRC)
3340 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3341 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3342 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3345 MachineBasicBlock *BB =
MI.getParent();
3353 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3357 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3360 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3362 .
addReg(SrcReg, {}, SubReg)
3364 MI.eraseFromParent();
3371 if (!STI.useVGPRIndexMode()) {
3372 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3374 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3375 .
addReg(SrcReg, {}, SubReg)
3377 MI.eraseFromParent();
3381 const MCInstrDesc &GPRIDXDesc =
3382 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC),
true);
3388 MI.eraseFromParent();
3393bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3400 LLT VecTy = MRI->getType(DstReg);
3401 LLT ValTy = MRI->getType(ValReg);
3405 const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
3406 const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
3407 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3413 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3416 const TargetRegisterClass *VecRC =
3417 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3418 const TargetRegisterClass *ValRC =
3419 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3421 if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
3422 !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
3423 !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
3424 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3427 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3431 std::tie(IdxReg, SubReg) =
3434 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3435 STI.useVGPRIndexMode();
3437 MachineBasicBlock *BB =
MI.getParent();
3441 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3444 const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
3445 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3450 MI.eraseFromParent();
3454 const MCInstrDesc &GPRIDXDesc =
3455 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC),
false);
3462 MI.eraseFromParent();
3468 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
3469 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
3470 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
3471 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds:
3472 case Intrinsic::amdgcn_load_async_to_lds:
3473 case Intrinsic::amdgcn_global_load_async_lds:
3479bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3480 if (!Subtarget->hasVMemToLDSLoad())
3483 unsigned Size =
MI.getOperand(3).getImm();
3487 const bool HasVIndex =
MI.getNumOperands() == 9;
3491 VIndex =
MI.getOperand(4).getReg();
3495 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3496 std::optional<ValueAndVReg> MaybeVOffset =
3498 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3504 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3505 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3506 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3507 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3510 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3511 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3512 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3513 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3516 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3517 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3518 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3519 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3522 if (!Subtarget->hasLDSLoadB96_B128())
3525 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3526 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3527 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3528 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3531 if (!Subtarget->hasLDSLoadB96_B128())
3534 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3535 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3536 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3537 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3541 MachineBasicBlock *
MBB =
MI.getParent();
3544 .
add(
MI.getOperand(2));
3548 if (HasVIndex && HasVOffset) {
3549 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
3550 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3557 }
else if (HasVIndex) {
3559 }
else if (HasVOffset) {
3563 MIB.
add(
MI.getOperand(1));
3564 MIB.
add(
MI.getOperand(5 + OpOffset));
3565 MIB.
add(
MI.getOperand(6 + OpOffset));
3567 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3576 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3581 MachinePointerInfo StorePtrI = LoadPtrI;
3592 MachineMemOperand *StoreMMO =
3598 MI.eraseFromParent();
3611 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3617 return Def->getOperand(1).getReg();
3631 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3639 return Def->getOperand(1).getReg();
3641 if (
VT->signBitIsZero(
Reg))
3642 return matchZeroExtendFromS32(
Reg);
3650AMDGPUInstructionSelector::matchZeroExtendFromS32OrS32(
Register Reg)
const {
3652 : matchZeroExtendFromS32(
Reg);
3658AMDGPUInstructionSelector::matchSignExtendFromS32OrS32(
Register Reg)
const {
3660 : matchSignExtendFromS32(
Reg);
3664AMDGPUInstructionSelector::matchExtendFromS32OrS32(
Register Reg,
3665 bool IsSigned)
const {
3667 return matchSignExtendFromS32OrS32(
Reg);
3669 return matchZeroExtendFromS32OrS32(
Reg);
3679 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3686 return Def->getOperand(1).getReg();
3691bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3692 if (!Subtarget->hasVMemToLDSLoad())
3696 unsigned Size =
MI.getOperand(3).getImm();
3703 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3706 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3709 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3712 if (!Subtarget->hasLDSLoadB96_B128())
3714 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3717 if (!Subtarget->hasLDSLoadB96_B128())
3719 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3723 MachineBasicBlock *
MBB =
MI.getParent();
3726 .
add(
MI.getOperand(2));
3732 if (!isSGPR(Addr)) {
3734 if (isSGPR(AddrDef->Reg)) {
3735 Addr = AddrDef->Reg;
3736 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3739 if (isSGPR(SAddr)) {
3740 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3741 if (
Register Off = matchZeroExtendFromS32(PtrBaseOffset)) {
3752 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3764 MIB.
add(
MI.getOperand(4));
3766 unsigned Aux =
MI.getOperand(5).getImm();
3770 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3772 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3773 MachinePointerInfo StorePtrI = LoadPtrI;
3782 MachineMemOperand *StoreMMO =
3784 sizeof(int32_t),
Align(4));
3788 MI.eraseFromParent();
3793bool AMDGPUInstructionSelector::selectTensorLoadStore(
MachineInstr &
MI,
3795 bool IsLoad = IID == Intrinsic::amdgcn_tensor_load_to_lds;
3797 IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d4 : AMDGPU::TENSOR_STORE_FROM_LDS_d4;
3801 const auto isAllZeros = [&](MachineOperand &Opnd) {
3802 const MachineInstr *
DefMI = MRI->getVRegDef(Opnd.getReg());
3811 Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d2
3812 : AMDGPU::TENSOR_STORE_FROM_LDS_d2;
3817 MachineBasicBlock *
MBB =
MI.getParent();
3819 .
add(
MI.getOperand(1))
3820 .
add(
MI.getOperand(2));
3822 if (NumGroups >= 4) {
3823 MIB.
add(
MI.getOperand(3))
3824 .
add(
MI.getOperand(4));
3828 .
add(
MI.getOperand(6));
3830 MI.eraseFromParent();
3834bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
3836 unsigned OpcodeOpIdx =
3837 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
3838 MI.setDesc(TII.get(
MI.getOperand(OpcodeOpIdx).getImm()));
3839 MI.removeOperand(OpcodeOpIdx);
3840 MI.addImplicitDefUseOperands(*
MI.getMF());
3847bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3850 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3851 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3853 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3854 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3856 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3857 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3859 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3860 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3862 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3863 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3865 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3866 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3868 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3869 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3871 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3872 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3874 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3875 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3877 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3878 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3880 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3881 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3883 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3884 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3886 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3887 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3889 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3890 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3892 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3893 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3895 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3896 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3898 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3899 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3901 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3902 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3904 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3905 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3907 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3908 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3910 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3911 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3913 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3914 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3916 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3917 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3919 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3920 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3922 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3923 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3925 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3926 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3928 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3929 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3931 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3932 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3938 auto VDst_In =
MI.getOperand(4);
3940 MI.setDesc(TII.get(
Opc));
3941 MI.removeOperand(4);
3942 MI.removeOperand(1);
3943 MI.addOperand(VDst_In);
3944 MI.addImplicitDefUseOperands(*
MI.getMF());
3945 const MCInstrDesc &MCID =
MI.getDesc();
3947 MI.getOperand(0).setIsEarlyClobber(
true);
3952bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3954 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3955 !Subtarget->hasPermlane16Swap())
3957 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3958 !Subtarget->hasPermlane32Swap())
3961 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3962 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3963 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3965 MI.removeOperand(2);
3966 MI.setDesc(TII.get(Opcode));
3969 MachineOperand &FI =
MI.getOperand(4);
3976bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3979 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3980 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3981 MachineBasicBlock *
MBB =
MI.getParent();
3985 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3986 .
addImm(Subtarget->getWavefrontSizeLog2())
3991 .
addImm(Subtarget->getWavefrontSizeLog2())
3995 const TargetRegisterClass &RC =
3996 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3997 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
4000 MI.eraseFromParent();
4004bool AMDGPUInstructionSelector::selectWaveShuffleIntrin(
4007 MachineBasicBlock *
MBB =
MI.getParent();
4014 const LLT DstTy = MRI->getType(DstReg);
4016 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4017 const TargetRegisterClass *DstRC =
4018 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
4023 if (!Subtarget->supportsBPermute())
4027 if (Subtarget->supportsWaveWideBPermute()) {
4028 Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
4029 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), ShiftIdxReg)
4039 assert(Subtarget->isWave64());
4043 MRI->createVirtualRegister(TRI.getRegClass(AMDGPU::SReg_32RegClassID));
4044 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefValReg);
4046 Register UndefExecReg = MRI->createVirtualRegister(
4047 TRI.getRegClass(AMDGPU::SReg_64_XEXECRegClassID));
4048 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefExecReg);
4050 Register PoisonValReg = MRI->createVirtualRegister(DstRC);
4051 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_SET_INACTIVE_B32), PoisonValReg)
4059 Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
4060 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), ShiftIdxReg)
4064 Register PoisonIdxReg = MRI->createVirtualRegister(DstRC);
4065 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_SET_INACTIVE_B32), PoisonIdxReg)
4073 Register SameSidePermReg = MRI->createVirtualRegister(DstRC);
4074 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::DS_BPERMUTE_B32), SameSidePermReg)
4079 Register SwappedValReg = MRI->createVirtualRegister(DstRC);
4080 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_PERMLANE64_B32), SwappedValReg)
4083 Register OppSidePermReg = MRI->createVirtualRegister(DstRC);
4084 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::DS_BPERMUTE_B32), OppSidePermReg)
4089 Register WWMSwapPermReg = MRI->createVirtualRegister(DstRC);
4090 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::STRICT_WWM), WWMSwapPermReg)
4097 Register ThreadIDReg = MRI->createVirtualRegister(DstRC);
4098 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_MBCNT_LO_U32_B32_e64), ThreadIDReg)
4102 Register XORReg = MRI->createVirtualRegister(DstRC);
4107 Register ANDReg = MRI->createVirtualRegister(DstRC);
4112 Register CompareReg = MRI->createVirtualRegister(
4113 TRI.getRegClass(AMDGPU::SReg_64_XEXECRegClassID));
4114 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_CMP_EQ_U32_e64), CompareReg)
4119 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
4127 MI.eraseFromParent();
4136 unsigned NumOpcodes = 0;
4149 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4160 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4174 if (Src.size() == 3) {
4181 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4182 if (Src[
I] ==
LHS) {
4192 Bits = SrcBits[Src.size()];
4198 switch (
MI->getOpcode()) {
4199 case TargetOpcode::G_AND:
4200 case TargetOpcode::G_OR:
4201 case TargetOpcode::G_XOR: {
4206 if (!getOperandBits(
LHS, LHSBits) ||
4207 !getOperandBits(
RHS, RHSBits)) {
4208 Src = std::move(Backup);
4209 return std::make_pair(0, 0);
4215 NumOpcodes +=
Op.first;
4216 LHSBits =
Op.second;
4221 NumOpcodes +=
Op.first;
4222 RHSBits =
Op.second;
4227 return std::make_pair(0, 0);
4231 switch (
MI->getOpcode()) {
4232 case TargetOpcode::G_AND:
4233 TTbl = LHSBits & RHSBits;
4235 case TargetOpcode::G_OR:
4236 TTbl = LHSBits | RHSBits;
4238 case TargetOpcode::G_XOR:
4239 TTbl = LHSBits ^ RHSBits;
4245 return std::make_pair(NumOpcodes + 1, TTbl);
4248bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
4249 if (!Subtarget->hasBitOp3Insts())
4253 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4254 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
4260 unsigned NumOpcodes;
4262 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
4266 if (NumOpcodes < 2 || Src.empty())
4269 const bool IsB32 = MRI->getType(DstReg) ==
LLT::scalar(32);
4270 if (NumOpcodes == 2 && IsB32) {
4278 }
else if (NumOpcodes < 4) {
4285 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
4286 if (!IsB32 && STI.hasTrue16BitInsts())
4287 Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
4288 : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
4289 unsigned CBL = STI.getConstantBusLimit(
Opc);
4290 MachineBasicBlock *
MBB =
MI.getParent();
4293 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4294 const RegisterBank *RB = RBI.getRegBank(Src[
I], *MRI, TRI);
4295 if (RB->
getID() != AMDGPU::SGPRRegBankID)
4301 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4312 while (Src.size() < 3)
4313 Src.push_back(Src[0]);
4330 MI.eraseFromParent();
4335bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
4337 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
4340 MachineInstr *
DefMI = MRI->getVRegDef(SrcReg);
4342 Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
4344 MachineBasicBlock *
MBB =
MI.getParent();
4348 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4351 .
addImm(Subtarget->getWavefrontSizeLog2())
4358 MI.eraseFromParent();
4364 if (!
I.isPreISelOpcode()) {
4366 return selectCOPY(
I);
4370 switch (
I.getOpcode()) {
4371 case TargetOpcode::G_AND:
4372 case TargetOpcode::G_OR:
4373 case TargetOpcode::G_XOR:
4374 if (selectBITOP3(
I))
4378 return selectG_AND_OR_XOR(
I);
4379 case TargetOpcode::G_ADD:
4380 case TargetOpcode::G_SUB:
4381 case TargetOpcode::G_PTR_ADD:
4384 return selectG_ADD_SUB(
I);
4385 case TargetOpcode::G_UADDO:
4386 case TargetOpcode::G_USUBO:
4387 case TargetOpcode::G_UADDE:
4388 case TargetOpcode::G_USUBE:
4389 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
4390 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4391 case AMDGPU::G_AMDGPU_MAD_I64_I32:
4392 return selectG_AMDGPU_MAD_64_32(
I);
4393 case TargetOpcode::G_INTTOPTR:
4394 case TargetOpcode::G_BITCAST:
4395 case TargetOpcode::G_PTRTOINT:
4396 case TargetOpcode::G_FREEZE:
4397 return selectCOPY(
I);
4398 case TargetOpcode::G_FNEG:
4401 return selectG_FNEG(
I);
4402 case TargetOpcode::G_FABS:
4405 return selectG_FABS(
I);
4406 case TargetOpcode::G_EXTRACT:
4407 return selectG_EXTRACT(
I);
4408 case TargetOpcode::G_MERGE_VALUES:
4409 case TargetOpcode::G_CONCAT_VECTORS:
4410 return selectG_MERGE_VALUES(
I);
4411 case TargetOpcode::G_UNMERGE_VALUES:
4412 return selectG_UNMERGE_VALUES(
I);
4413 case TargetOpcode::G_BUILD_VECTOR:
4414 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4415 return selectG_BUILD_VECTOR(
I);
4416 case TargetOpcode::G_IMPLICIT_DEF:
4417 return selectG_IMPLICIT_DEF(
I);
4418 case TargetOpcode::G_INSERT:
4419 return selectG_INSERT(
I);
4420 case TargetOpcode::G_INTRINSIC:
4421 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4422 return selectG_INTRINSIC(
I);
4423 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4424 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4425 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4426 case TargetOpcode::G_ICMP:
4427 case TargetOpcode::G_FCMP:
4428 if (selectG_ICMP_or_FCMP(
I))
4431 case TargetOpcode::G_LOAD:
4432 case TargetOpcode::G_ZEXTLOAD:
4433 case TargetOpcode::G_SEXTLOAD:
4434 case TargetOpcode::G_STORE:
4435 case TargetOpcode::G_ATOMIC_CMPXCHG:
4436 case TargetOpcode::G_ATOMICRMW_XCHG:
4437 case TargetOpcode::G_ATOMICRMW_ADD:
4438 case TargetOpcode::G_ATOMICRMW_SUB:
4439 case TargetOpcode::G_ATOMICRMW_AND:
4440 case TargetOpcode::G_ATOMICRMW_OR:
4441 case TargetOpcode::G_ATOMICRMW_XOR:
4442 case TargetOpcode::G_ATOMICRMW_MIN:
4443 case TargetOpcode::G_ATOMICRMW_MAX:
4444 case TargetOpcode::G_ATOMICRMW_UMIN:
4445 case TargetOpcode::G_ATOMICRMW_UMAX:
4446 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4447 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4448 case TargetOpcode::G_ATOMICRMW_USUB_COND:
4449 case TargetOpcode::G_ATOMICRMW_USUB_SAT:
4450 case TargetOpcode::G_ATOMICRMW_FADD:
4451 case TargetOpcode::G_ATOMICRMW_FMIN:
4452 case TargetOpcode::G_ATOMICRMW_FMAX:
4453 return selectG_LOAD_STORE_ATOMICRMW(
I);
4454 case TargetOpcode::G_SELECT:
4455 return selectG_SELECT(
I);
4456 case TargetOpcode::G_TRUNC:
4457 return selectG_TRUNC(
I);
4458 case TargetOpcode::G_SEXT:
4459 case TargetOpcode::G_ZEXT:
4460 case TargetOpcode::G_ANYEXT:
4461 case TargetOpcode::G_SEXT_INREG:
4465 if (MRI->getType(
I.getOperand(1).getReg()) !=
LLT::scalar(1) &&
4468 return selectG_SZA_EXT(
I);
4469 case TargetOpcode::G_FPEXT:
4470 if (selectG_FPEXT(
I))
4473 case TargetOpcode::G_BRCOND:
4474 return selectG_BRCOND(
I);
4475 case TargetOpcode::G_GLOBAL_VALUE:
4476 return selectG_GLOBAL_VALUE(
I);
4477 case TargetOpcode::G_PTRMASK:
4478 return selectG_PTRMASK(
I);
4479 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4480 return selectG_EXTRACT_VECTOR_ELT(
I);
4481 case TargetOpcode::G_INSERT_VECTOR_ELT:
4482 return selectG_INSERT_VECTOR_ELT(
I);
4483 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4484 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4485 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4486 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4487 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4490 assert(Intr &&
"not an image intrinsic with image pseudo");
4491 return selectImageIntrinsic(
I, Intr);
4493 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
4494 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4495 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
4496 return selectBVHIntersectRayIntrinsic(
I);
4497 case AMDGPU::G_SBFX:
4498 case AMDGPU::G_UBFX:
4499 return selectG_SBFX_UBFX(
I);
4500 case AMDGPU::G_SI_CALL:
4501 I.setDesc(TII.get(AMDGPU::SI_CALL));
4503 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4504 return selectWaveAddress(
I);
4505 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
4506 I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
4509 case AMDGPU::G_STACKRESTORE:
4510 return selectStackRestore(
I);
4512 return selectPHI(
I);
4513 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4514 return selectCOPY_SCC_VCC(
I);
4515 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4516 return selectCOPY_VCC_SCC(
I);
4517 case AMDGPU::G_AMDGPU_READANYLANE:
4518 return selectReadAnyLane(
I);
4519 case TargetOpcode::G_CONSTANT:
4520 case TargetOpcode::G_FCONSTANT:
4528AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4535std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4536 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4540 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4541 Src =
MI->getOperand(1).getReg();
4544 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4549 if (
LHS &&
LHS->isZero()) {
4551 Src =
MI->getOperand(2).getReg();
4555 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4556 Src =
MI->getOperand(1).getReg();
4563 return std::pair(Src, Mods);
4566Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4568 bool ForceVGPR)
const {
4569 if ((Mods != 0 || ForceVGPR) &&
4570 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
4577 TII.
get(AMDGPU::COPY), VGPRSrc)
4589AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4591 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); }
4596AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4599 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4602 [=](MachineInstrBuilder &MIB) {
4603 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4605 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4606 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4607 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4612AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4615 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4620 [=](MachineInstrBuilder &MIB) {
4621 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4623 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4624 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4625 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4630AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4632 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); },
4633 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4634 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4639AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4642 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4645 [=](MachineInstrBuilder &MIB) {
4646 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4648 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4653AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4657 std::tie(Src, Mods) =
4658 selectVOP3ModsImpl(Root.
getReg(),
false);
4661 [=](MachineInstrBuilder &MIB) {
4662 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4664 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4669AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4672 std::tie(Src, Mods) =
4673 selectVOP3ModsImpl(Root.
getReg(),
true,
4677 [=](MachineInstrBuilder &MIB) {
4678 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4680 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4685AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4688 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4691 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4716 if (
MI->getOpcode() != AMDGPU::G_TRUNC)
4721 return DstSize * 2 == SrcSize;
4727 if (
MI->getOpcode() != AMDGPU::G_LSHR)
4731 std::optional<ValueAndVReg> ShiftAmt;
4732 if (
mi_match(
MI->getOperand(0).getReg(), MRI,
4735 unsigned Shift = ShiftAmt->Value.getZExtValue();
4736 return Shift * 2 == SrcSize;
4744 if (
MI->getOpcode() != AMDGPU::G_SHL)
4748 std::optional<ValueAndVReg> ShiftAmt;
4749 if (
mi_match(
MI->getOperand(0).getReg(), MRI,
4752 unsigned Shift = ShiftAmt->Value.getZExtValue();
4753 return Shift * 2 == SrcSize;
4761 if (
MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
4763 return MI->getNumOperands() == 3 &&
MI->getOperand(0).isDef() &&
4764 MI->getOperand(1).isDef() && !
MI->getOperand(2).isDef();
4934static std::optional<std::pair<Register, SrcStatus>>
4939 unsigned Opc =
MI->getOpcode();
4943 case AMDGPU::G_BITCAST:
4944 return std::optional<std::pair<Register, SrcStatus>>(
4945 {
MI->getOperand(1).getReg(), Curr.second});
4947 if (
MI->getOperand(1).getReg().isPhysical())
4948 return std::nullopt;
4949 return std::optional<std::pair<Register, SrcStatus>>(
4950 {
MI->getOperand(1).getReg(), Curr.second});
4951 case AMDGPU::G_FNEG: {
4954 return std::nullopt;
4955 return std::optional<std::pair<Register, SrcStatus>>(
4956 {
MI->getOperand(1).getReg(), Stat});
4963 switch (Curr.second) {
4966 return std::optional<std::pair<Register, SrcStatus>>(
4969 if (Curr.first ==
MI->getOperand(0).getReg())
4970 return std::optional<std::pair<Register, SrcStatus>>(
4972 return std::optional<std::pair<Register, SrcStatus>>(
4984 return std::optional<std::pair<Register, SrcStatus>>(
4988 if (Curr.first ==
MI->getOperand(0).getReg())
4989 return std::optional<std::pair<Register, SrcStatus>>(
4991 return std::optional<std::pair<Register, SrcStatus>>(
4997 return std::optional<std::pair<Register, SrcStatus>>(
5002 return std::optional<std::pair<Register, SrcStatus>>(
5007 return std::optional<std::pair<Register, SrcStatus>>(
5012 return std::optional<std::pair<Register, SrcStatus>>(
5018 return std::nullopt;
5028 bool HasNeg =
false;
5030 bool HasOpsel =
true;
5035 unsigned Opc =
MI->getOpcode();
5037 if (
Opc == TargetOpcode::G_INTRINSIC) {
5040 if (IntrinsicID == Intrinsic::amdgcn_fdot2)
5067 while (
Depth <= MaxDepth && Curr.has_value()) {
5070 Statlist.push_back(Curr.value());
5077static std::pair<Register, SrcStatus>
5084 while (
Depth <= MaxDepth && Curr.has_value()) {
5090 LastSameOrNeg = Curr.value();
5095 return LastSameOrNeg;
5102 return Width1 == Width2;
5137 return isSameBitWidth(NewReg, RootReg, MRI) && IsHalfState(LoStat) &&
5138 IsHalfState(HiStat);
5141std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
5147 return {RootReg, Mods};
5150 SearchOptions SO(RootReg, MRI);
5161 MachineInstr *
MI = MRI.getVRegDef(Stat.first);
5163 if (
MI->getOpcode() != AMDGPU::G_BUILD_VECTOR ||
MI->getNumOperands() != 3 ||
5164 (IsDOT && Subtarget->hasDOTOpSelHazard())) {
5166 return {Stat.first, Mods};
5172 if (StatlistHi.
empty()) {
5174 return {Stat.first, Mods};
5180 if (StatlistLo.
empty()) {
5182 return {Stat.first, Mods};
5185 for (
int I = StatlistHi.
size() - 1;
I >= 0;
I--) {
5186 for (
int J = StatlistLo.
size() - 1; J >= 0; J--) {
5187 if (StatlistHi[
I].first == StatlistLo[J].first &&
5189 StatlistHi[
I].first, RootReg, TII, MRI))
5190 return {StatlistHi[
I].first,
5191 updateMods(StatlistHi[
I].second, StatlistLo[J].second, Mods)};
5197 return {Stat.first, Mods};
5207 return RB->
getID() == RBNo;
5224 if (
checkRB(RootReg, AMDGPU::SGPRRegBankID, RBI, MRI,
TRI) ||
5225 checkRB(NewReg, AMDGPU::VGPRRegBankID, RBI, MRI,
TRI))
5229 if (
MI->getOpcode() == AMDGPU::COPY && NewReg ==
MI->getOperand(1).getReg()) {
5238 BuildMI(*BB,
MI,
MI->getDebugLoc(),
TII.get(AMDGPU::COPY), DstReg)
5246AMDGPUInstructionSelector::selectVOP3PRetHelper(
MachineOperand &Root,
5251 std::tie(
Reg, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI, IsDOT);
5255 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
5256 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5261AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
5263 return selectVOP3PRetHelper(Root);
5267AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
5269 return selectVOP3PRetHelper(Root,
true);
5273AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
5276 "expected i1 value");
5282 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5290 switch (Elts.
size()) {
5292 DstRegClass = &AMDGPU::VReg_256RegClass;
5295 DstRegClass = &AMDGPU::VReg_128RegClass;
5298 DstRegClass = &AMDGPU::VReg_64RegClass;
5305 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
5307 for (
unsigned i = 0; i < Elts.
size(); ++i) {
5318 if (ModOpcode == TargetOpcode::G_FNEG) {
5322 for (
auto El : Elts) {
5328 if (Elts.size() != NegAbsElts.
size()) {
5337 assert(ModOpcode == TargetOpcode::G_FABS);
5345AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
5351 assert(BV->getNumSources() > 0);
5353 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
5354 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
5357 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
5358 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
5365 if (BV->getNumSources() == EltsF32.
size()) {
5371 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5372 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5376AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
5382 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5390 if (CV->getNumSources() == EltsV2F16.
size()) {
5397 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5398 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5402AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
5408 assert(CV->getNumSources() > 0);
5409 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
5411 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
5415 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5416 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
5423 if (CV->getNumSources() == EltsV2F16.
size()) {
5430 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5431 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5435AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
5436 std::optional<FPValueAndVReg> FPValReg;
5438 if (TII.isInlineConstant(FPValReg->Value)) {
5439 return {{[=](MachineInstrBuilder &MIB) {
5440 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
5450 if (TII.isInlineConstant(ICst)) {
5460AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
5466 std::optional<ValueAndVReg> ShiftAmt;
5468 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5469 ShiftAmt->Value.getZExtValue() % 8 == 0) {
5470 Key = ShiftAmt->Value.getZExtValue() / 8;
5475 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5476 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5481AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
5488 std::optional<ValueAndVReg> ShiftAmt;
5490 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5491 ShiftAmt->Value.getZExtValue() == 16) {
5497 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5498 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5503AMDGPUInstructionSelector::selectSWMMACIndex32(
MachineOperand &Root)
const {
5510 S32 = matchAnyExtendFromS32(Src);
5514 if (
Def->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
5519 Src =
Def->getOperand(2).getReg();
5526 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5527 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5532AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
5535 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5539 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5540 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5546AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
5549 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5555 [=](MachineInstrBuilder &MIB) {
5557 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5559 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5564AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
5567 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5573 [=](MachineInstrBuilder &MIB) {
5575 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5577 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5584bool AMDGPUInstructionSelector::selectScaleOffset(
MachineOperand &Root,
5586 bool IsSigned)
const {
5587 if (!Subtarget->hasScaleOffset())
5591 MachineMemOperand *MMO = *
MI.memoperands_begin();
5603 OffsetReg =
Def->Reg;
5618 m_BinOp(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO : AMDGPU::S_MUL_U64,
5622 (
Mul->getOpcode() == (IsSigned ? AMDGPU::G_AMDGPU_MAD_I64_I32
5623 : AMDGPU::G_AMDGPU_MAD_U64_U32) ||
5624 (IsSigned &&
Mul->getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32 &&
5625 VT->signBitIsZero(
Mul->getOperand(2).getReg()))) &&
5638bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
5642 bool *ScaleOffset)
const {
5644 MachineBasicBlock *
MBB =
MI->getParent();
5649 getAddrModeInfo(*
MI, *MRI, AddrInfo);
5651 if (AddrInfo.
empty())
5654 const GEPInfo &GEPI = AddrInfo[0];
5655 std::optional<int64_t> EncodedImm;
5658 *ScaleOffset =
false;
5663 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
5664 AddrInfo.
size() > 1) {
5665 const GEPInfo &GEPI2 = AddrInfo[1];
5666 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
5667 Register OffsetReg = GEPI2.SgprParts[1];
5670 selectScaleOffset(Root, OffsetReg,
false );
5671 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5673 Base = GEPI2.SgprParts[0];
5674 *SOffset = OffsetReg;
5683 auto SKnown =
VT->getKnownBits(*SOffset);
5684 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
5696 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
5697 Base = GEPI.SgprParts[0];
5703 if (SOffset && GEPI.SgprParts.size() == 1 &&
isUInt<32>(GEPI.Imm) &&
5709 Base = GEPI.SgprParts[0];
5710 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5711 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
5716 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
5717 Register OffsetReg = GEPI.SgprParts[1];
5719 *ScaleOffset = selectScaleOffset(Root, OffsetReg,
false );
5720 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5722 Base = GEPI.SgprParts[0];
5723 *SOffset = OffsetReg;
5732AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
5735 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset,
5737 return std::nullopt;
5739 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5740 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset); }}};
5744AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
5746 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
5748 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
5749 return std::nullopt;
5751 const GEPInfo &GEPInfo = AddrInfo[0];
5752 Register PtrReg = GEPInfo.SgprParts[0];
5753 std::optional<int64_t> EncodedImm =
5756 return std::nullopt;
5759 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrReg); },
5760 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }
5765AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
5768 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr,
5770 return std::nullopt;
5773 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5774 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5775 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5779AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
5783 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset, &ScaleOffset))
5784 return std::nullopt;
5787 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5788 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5790 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5793std::pair<Register, int>
5794AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
5795 uint64_t FlatVariant)
const {
5800 if (!STI.hasFlatInstOffsets())
5804 int64_t ConstOffset;
5806 std::tie(PtrBase, ConstOffset, IsInBounds) =
5807 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5813 if (ConstOffset == 0 ||
5815 !isFlatScratchBaseLegal(Root.
getReg())) ||
5819 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
5820 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
5823 return std::pair(PtrBase, ConstOffset);
5827AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
5831 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5832 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5837AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
5841 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5842 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5847AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
5851 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5852 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5858AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root,
5860 bool NeedIOffset)
const {
5863 int64_t ConstOffset;
5864 int64_t ImmOffset = 0;
5868 std::tie(PtrBase, ConstOffset, std::ignore) =
5869 getPtrBaseWithConstantOffset(Addr, *MRI);
5871 if (ConstOffset != 0) {
5876 ImmOffset = ConstOffset;
5879 if (isSGPR(PtrBaseDef->Reg)) {
5880 if (ConstOffset > 0) {
5886 int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
5888 std::tie(SplitImmOffset, RemainderOffset) =
5893 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
5896 MachineBasicBlock *
MBB =
MI->getParent();
5898 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5900 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5902 .
addImm(RemainderOffset);
5906 [=](MachineInstrBuilder &MIB) {
5909 [=](MachineInstrBuilder &MIB) {
5912 [=](MachineInstrBuilder &MIB) { MIB.
addImm(SplitImmOffset); },
5913 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5916 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrBase); },
5917 [=](MachineInstrBuilder &MIB) {
5920 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5930 unsigned NumLiterals =
5931 !TII.isInlineConstant(APInt(32,
Lo_32(ConstOffset))) +
5932 !TII.isInlineConstant(APInt(32,
Hi_32(ConstOffset)));
5933 if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
5934 return std::nullopt;
5941 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5946 if (isSGPR(SAddr)) {
5947 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
5951 bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
5952 Subtarget->hasSignedGVSOffset());
5953 if (
Register VOffset = matchExtendFromS32OrS32(
5954 PtrBaseOffset, Subtarget->hasSignedGVSOffset())) {
5956 return {{[=](MachineInstrBuilder &MIB) {
5959 [=](MachineInstrBuilder &MIB) {
5962 [=](MachineInstrBuilder &MIB) {
5965 [=](MachineInstrBuilder &MIB) {
5969 return {{[=](MachineInstrBuilder &MIB) {
5972 [=](MachineInstrBuilder &MIB) {
5975 [=](MachineInstrBuilder &MIB) {
5985 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
5986 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
5987 return std::nullopt;
5992 MachineBasicBlock *
MBB =
MI->getParent();
5993 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5995 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
6000 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
6001 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
6002 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6003 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
6006 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
6007 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
6008 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
6013AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
6014 return selectGlobalSAddr(Root, 0);
6018AMDGPUInstructionSelector::selectGlobalSAddrCPol(
MachineOperand &Root)
const {
6024 return selectGlobalSAddr(Root, PassedCPol);
6028AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(
MachineOperand &Root)
const {
6034 return selectGlobalSAddr(Root, PassedCPol);
6038AMDGPUInstructionSelector::selectGlobalSAddrGLC(
MachineOperand &Root)
const {
6043AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
6050 return selectGlobalSAddr(Root, PassedCPol,
false);
6054AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0(
6061 return selectGlobalSAddr(Root, PassedCPol,
false);
6065AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
6068 int64_t ConstOffset;
6069 int64_t ImmOffset = 0;
6073 std::tie(PtrBase, ConstOffset, std::ignore) =
6074 getPtrBaseWithConstantOffset(Addr, *MRI);
6076 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
6080 ImmOffset = ConstOffset;
6084 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
6085 int FI = AddrDef->MI->getOperand(1).
getIndex();
6088 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
6094 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
6095 Register LHS = AddrDef->MI->getOperand(1).getReg();
6096 Register RHS = AddrDef->MI->getOperand(2).getReg();
6100 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
6101 isSGPR(RHSDef->Reg)) {
6102 int FI = LHSDef->MI->getOperand(1).getIndex();
6106 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6108 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
6116 return std::nullopt;
6119 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SAddr); },
6120 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
6125bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
6127 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
6133 auto VKnown =
VT->getKnownBits(VAddr);
6136 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
6137 uint64_t
SMax = SKnown.getMaxValue().getZExtValue();
6138 return (VMax & 3) + (
SMax & 3) >= 4;
6142AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
6145 int64_t ConstOffset;
6146 int64_t ImmOffset = 0;
6150 std::tie(PtrBase, ConstOffset, std::ignore) =
6151 getPtrBaseWithConstantOffset(Addr, *MRI);
6154 if (ConstOffset != 0 &&
6158 ImmOffset = ConstOffset;
6162 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
6163 return std::nullopt;
6165 Register RHS = AddrDef->MI->getOperand(2).getReg();
6166 if (RBI.getRegBank(
RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
6167 return std::nullopt;
6169 Register LHS = AddrDef->MI->getOperand(1).getReg();
6172 if (OrigAddr != Addr) {
6173 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
6174 return std::nullopt;
6176 if (!isFlatScratchBaseLegalSV(OrigAddr))
6177 return std::nullopt;
6180 if (checkFlatScratchSVSSwizzleBug(
RHS,
LHS, ImmOffset))
6181 return std::nullopt;
6183 unsigned CPol = selectScaleOffset(Root,
RHS,
true )
6187 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
6188 int FI = LHSDef->MI->getOperand(1).getIndex();
6190 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
6192 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6193 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
6202 return std::nullopt;
6205 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
6206 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
LHS); },
6207 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6208 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
6213AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
6215 MachineBasicBlock *
MBB =
MI->getParent();
6217 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6222 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6227 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
6231 return {{[=](MachineInstrBuilder &MIB) {
6234 [=](MachineInstrBuilder &MIB) {
6237 [=](MachineInstrBuilder &MIB) {
6242 [=](MachineInstrBuilder &MIB) {
6251 std::optional<int> FI;
6254 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6256 int64_t ConstOffset;
6257 std::tie(PtrBase, ConstOffset, std::ignore) =
6258 getPtrBaseWithConstantOffset(VAddr, *MRI);
6259 if (ConstOffset != 0) {
6260 if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
6261 (!STI.privateMemoryResourceIsRangeChecked() ||
6262 VT->signBitIsZero(PtrBase))) {
6263 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
6264 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
6270 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
6274 return {{[=](MachineInstrBuilder &MIB) {
6277 [=](MachineInstrBuilder &MIB) {
6283 [=](MachineInstrBuilder &MIB) {
6288 [=](MachineInstrBuilder &MIB) {
6293bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
6298 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6303 return VT->signBitIsZero(
Base);
6306bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
6308 unsigned Size)
const {
6309 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
6314 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6319 return VT->signBitIsZero(
Base);
6324 return Addr->
getOpcode() == TargetOpcode::G_OR ||
6325 (Addr->
getOpcode() == TargetOpcode::G_PTR_ADD &&
6332bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
6340 if (STI.hasSignedScratchOffsets())
6346 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
6347 std::optional<ValueAndVReg> RhsValReg =
6353 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
6354 RhsValReg->Value.getSExtValue() > -0x40000000)
6358 return VT->signBitIsZero(
LHS);
6363bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
6371 if (STI.hasSignedScratchOffsets())
6376 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6381bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
6385 if (STI.hasSignedScratchOffsets())
6390 std::optional<DefinitionAndSourceRegister> BaseDef =
6392 std::optional<ValueAndVReg> RHSOffset =
6402 (RHSOffset->Value.getSExtValue() < 0 &&
6403 RHSOffset->Value.getSExtValue() > -0x40000000)))
6406 Register LHS = BaseDef->MI->getOperand(1).getReg();
6407 Register RHS = BaseDef->MI->getOperand(2).getReg();
6408 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6411bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
6412 unsigned ShAmtBits)
const {
6413 assert(
MI.getOpcode() == TargetOpcode::G_AND);
6415 std::optional<APInt>
RHS =
6420 if (
RHS->countr_one() >= ShAmtBits)
6423 const APInt &LHSKnownZeros =
VT->getKnownZeroes(
MI.getOperand(1).getReg());
6424 return (LHSKnownZeros | *
RHS).countr_one() >= ShAmtBits;
6428AMDGPUInstructionSelector::selectMUBUFScratchOffset(
6431 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6433 std::optional<DefinitionAndSourceRegister>
Def =
6435 assert(Def &&
"this shouldn't be an optional result");
6440 [=](MachineInstrBuilder &MIB) {
6443 [=](MachineInstrBuilder &MIB) {
6446 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
6457 if (!TII.isLegalMUBUFImmOffset(
Offset))
6465 [=](MachineInstrBuilder &MIB) {
6468 [=](MachineInstrBuilder &MIB) {
6476 !TII.isLegalMUBUFImmOffset(
Offset))
6480 [=](MachineInstrBuilder &MIB) {
6483 [=](MachineInstrBuilder &MIB) {
6490std::pair<Register, unsigned>
6491AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
6492 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6493 int64_t ConstAddr = 0;
6497 std::tie(PtrBase,
Offset, std::ignore) =
6498 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6501 if (isDSOffsetLegal(PtrBase,
Offset)) {
6503 return std::pair(PtrBase,
Offset);
6505 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6514 return std::pair(Root.
getReg(), 0);
6518AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
6521 std::tie(
Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
6523 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6529AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
6530 return selectDSReadWrite2(Root, 4);
6534AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
6535 return selectDSReadWrite2(Root, 8);
6539AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
6540 unsigned Size)
const {
6545 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6547 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset+1); }
6551std::pair<Register, unsigned>
6552AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
6553 unsigned Size)
const {
6554 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6555 int64_t ConstAddr = 0;
6559 std::tie(PtrBase,
Offset, std::ignore) =
6560 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6563 int64_t OffsetValue0 =
Offset;
6565 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
6567 return std::pair(PtrBase, OffsetValue0 /
Size);
6569 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6577 return std::pair(Root.
getReg(), 0);
6585std::tuple<Register, int64_t, bool>
6586AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
6589 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
6590 return {Root, 0,
false};
6593 std::optional<ValueAndVReg> MaybeOffset =
6596 return {Root, 0,
false};
6616 B.buildInstr(AMDGPU::S_MOV_B32)
6619 B.buildInstr(AMDGPU::S_MOV_B32)
6626 B.buildInstr(AMDGPU::REG_SEQUENCE)
6629 .addImm(AMDGPU::sub0)
6631 .addImm(AMDGPU::sub1);
6636 B.buildInstr(AMDGPU::S_MOV_B64)
6641 B.buildInstr(AMDGPU::REG_SEQUENCE)
6644 .addImm(AMDGPU::sub0_sub1)
6646 .addImm(AMDGPU::sub2_sub3);
6653 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6662 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6669AMDGPUInstructionSelector::MUBUFAddressData
6670AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
6671 MUBUFAddressData
Data;
6677 std::tie(PtrBase,
Offset, std::ignore) =
6678 getPtrBaseWithConstantOffset(Src, *MRI);
6684 if (MachineInstr *InputAdd
6686 Data.N2 = InputAdd->getOperand(1).getReg();
6687 Data.N3 = InputAdd->getOperand(2).getReg();
6702bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr)
const {
6708 const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
6709 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
6715void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
6717 if (TII.isLegalMUBUFImmOffset(ImmOffset))
6721 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6722 B.buildInstr(AMDGPU::S_MOV_B32)
6728bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
6733 if (!STI.hasAddr64() || STI.useFlatForGlobal())
6736 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6737 if (!shouldUseAddr64(AddrData))
6743 Offset = AddrData.Offset;
6749 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6751 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6764 }
else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6775 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6779bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
6784 if (STI.useFlatForGlobal())
6787 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6788 if (shouldUseAddr64(AddrData))
6794 Offset = AddrData.Offset;
6800 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6805AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
6811 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
6817 [=](MachineInstrBuilder &MIB) {
6820 [=](MachineInstrBuilder &MIB) {
6823 [=](MachineInstrBuilder &MIB) {
6826 else if (STI.hasRestrictedSOffset())
6827 MIB.
addReg(AMDGPU::SGPR_NULL);
6831 [=](MachineInstrBuilder &MIB) {
6841AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
6846 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
6850 [=](MachineInstrBuilder &MIB) {
6853 [=](MachineInstrBuilder &MIB) {
6856 else if (STI.hasRestrictedSOffset())
6857 MIB.
addReg(AMDGPU::SGPR_NULL);
6869AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
6874 SOffset = AMDGPU::SGPR_NULL;
6876 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); }}};
6880static std::optional<uint64_t>
6884 if (!OffsetVal || !
isInt<32>(*OffsetVal))
6885 return std::nullopt;
6886 return Lo_32(*OffsetVal);
6890AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
6891 std::optional<uint64_t> OffsetVal =
6896 std::optional<int64_t> EncodedImm =
6901 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6905AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
6912 std::optional<int64_t> EncodedImm =
6917 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6921AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
6929 return std::nullopt;
6931 std::optional<int64_t> EncodedOffset =
6934 return std::nullopt;
6937 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
6938 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedOffset); }}};
6941std::pair<Register, unsigned>
6942AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
6943 bool &Matched)
const {
6948 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
6958 const auto CheckAbsNeg = [&]() {
6963 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
6994AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
6999 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
7004 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
7005 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
7010AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
7014 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
7017 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
7018 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
7022bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
7026 Register CCReg =
I.getOperand(0).getReg();
7031 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
7032 .
addImm(
I.getOperand(2).getImm());
7036 I.eraseFromParent();
7037 return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
7041bool AMDGPUInstructionSelector::selectSGetBarrierState(
7045 const MachineOperand &BarOp =
I.getOperand(2);
7046 std::optional<int64_t> BarValImm =
7050 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
7054 MachineInstrBuilder MIB;
7055 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
7056 : AMDGPU::S_GET_BARRIER_STATE_M0;
7059 auto DstReg =
I.getOperand(0).getReg();
7060 const TargetRegisterClass *DstRC =
7061 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
7062 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
7068 I.eraseFromParent();
7073 if (HasInlineConst) {
7077 case Intrinsic::amdgcn_s_barrier_join:
7078 return AMDGPU::S_BARRIER_JOIN_IMM;
7079 case Intrinsic::amdgcn_s_wakeup_barrier:
7080 return AMDGPU::S_WAKEUP_BARRIER_IMM;
7081 case Intrinsic::amdgcn_s_get_named_barrier_state:
7082 return AMDGPU::S_GET_BARRIER_STATE_IMM;
7088 case Intrinsic::amdgcn_s_barrier_join:
7089 return AMDGPU::S_BARRIER_JOIN_M0;
7090 case Intrinsic::amdgcn_s_wakeup_barrier:
7091 return AMDGPU::S_WAKEUP_BARRIER_M0;
7092 case Intrinsic::amdgcn_s_get_named_barrier_state:
7093 return AMDGPU::S_GET_BARRIER_STATE_M0;
7098bool AMDGPUInstructionSelector::selectNamedBarrierInit(
7102 const MachineOperand &BarOp =
I.getOperand(1);
7103 const MachineOperand &CntOp =
I.getOperand(2);
7106 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7112 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7119 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7125 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7126 constexpr unsigned ShAmt = 16;
7132 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7142 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
7143 ? AMDGPU::S_BARRIER_INIT_M0
7144 : AMDGPU::S_BARRIER_SIGNAL_M0;
7145 MachineInstrBuilder MIB;
7148 I.eraseFromParent();
7152bool AMDGPUInstructionSelector::selectNamedBarrierInst(
7156 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
7159 std::optional<int64_t> BarValImm =
7164 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7170 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7176 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
7181 MachineInstrBuilder MIB;
7185 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
7186 auto DstReg =
I.getOperand(0).getReg();
7187 const TargetRegisterClass *DstRC =
7188 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
7189 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
7195 auto BarId = ((*BarValImm) >> 4) & 0x3F;
7199 I.eraseFromParent();
7206 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7207 "Expected G_CONSTANT");
7208 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
7214 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7215 "Expected G_CONSTANT");
7216 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
7222 const MachineOperand &
Op =
MI.getOperand(1);
7223 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1);
7224 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
7227void AMDGPUInstructionSelector::renderCountTrailingOnesImm(
7229 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7230 "Expected G_CONSTANT");
7231 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().countTrailingOnes());
7239 const MachineOperand &
Op =
MI.getOperand(
OpIdx);
7256 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7260void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
7262 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7267void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
7269 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7275void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
7277 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7282void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
7284 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7290void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
7292 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7297void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
7299 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7304void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
7306 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7311void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
7313 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7322 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7331 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7338void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
7340 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7341 const uint32_t Cpol =
MI.getOperand(
OpIdx).getImm() &
7356 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
7358 assert(ExpVal != INT_MIN);
7376 if (
MI.getOperand(
OpIdx).getImm())
7378 MIB.
addImm((int64_t)Mods);
7385 if (
MI.getOperand(
OpIdx).getImm())
7387 MIB.
addImm((int64_t)Mods);
7393 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7401 MIB.
addImm((int64_t)Mods);
7407 uint32_t
V =
MI.getOperand(2).getImm();
7410 if (!Subtarget->hasSafeCUPrefetch())
7416void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
7418 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7427bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
7428 return TII.isInlineConstant(Imm);
7431bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
7432 return TII.isInlineConstant(Imm);
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static Register getLegalRegBank(Register NewReg, Register RootReg, const AMDGPURegisterBankInfo &RBI, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const SIInstrInfo &TII)
static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is shift left with half bits, such as reg0:2n =G_SHL reg1:2n, CONST(n)
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
static bool checkRB(Register Reg, unsigned int RBNo, const AMDGPURegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods)
static bool isTruncHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is truncating to half, such as reg0:n = G_TRUNC reg1:2n
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static TypeClass isVectorOfTwoOrScalar(Register Reg, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static bool isSameBitWidth(Register Reg1, Register Reg2, const MachineRegisterInfo &MRI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static bool isAsyncLDSDMA(Intrinsic::ID Intr)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelValueTracking &ValueTracking)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::pair< Register, SrcStatus > getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static std::optional< std::pair< Register, SrcStatus > > calcNextStatus(std::pair< Register, SrcStatus > Curr, const MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg, Register RootReg, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static SmallVector< std::pair< Register, SrcStatus > > getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static bool isUnmergeHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test function, if the MI is reg0:n, reg1:n = G_UNMERGE_VALUES reg2:2n
static SrcStatus getNegStatus(Register Reg, SrcStatus S, const MachineRegisterInfo &MRI)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is logic shift right with half bits, such as reg0:2n =G_LSHR reg1:2n,...
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool isAllZeros(StringRef Arr)
Return true if the array is empty or all zeros.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Machine Check Debug Module
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
This is used to control valid status that current MI supports.
bool checkOptions(SrcStatus Stat) const
SearchOptions(Register Reg, const MachineRegisterInfo &MRI)
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelValueTracking *VT, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
LLVM_ABI DILocation * get() const
Get the underlying DILocation.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
TypeSize getValue() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setReturnAddressIsTaken(bool s)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const RegisterBank * getRegBankOrNull(Register Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis providing profile information.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static bool isGenericOpcode(unsigned Opc)
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
LLVM_READONLY int32_t getGlobalSaddrOp(uint32_t Opcode)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
SpecificRegisterMatch m_SpecificReg(Register RequestedReg)
Matches a register only if it is equal to RequestedReg.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, TargetOpcode::G_MUL, true > m_GMul(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
NodeAddr< DefNode * > Def
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
FunctionAddr VTableAddr Value
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr RegState getUndefRegState(bool B)
unsigned AtomicNoRetBaseOpcode
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.