29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
64 MRI = &
MF.getRegInfo();
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
83 auto &RegClassOrBank = MRI.getRegClassOrRegBank(
Reg);
84 const TargetRegisterClass *RC =
87 const LLT Ty = MRI.getType(
Reg);
91 return MRI.getVRegDef(
Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
105 MachineOperand &Dst =
MI.getOperand(0);
106 MachineOperand &Src =
MI.getOperand(1);
112 const TargetRegisterClass *DstRC
113 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
114 const TargetRegisterClass *SrcRC
115 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
116 if (!DstRC || DstRC != SrcRC)
119 if (!RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) ||
120 !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
122 const MCInstrDesc &MCID =
MI.getDesc();
124 MI.getOperand(0).setIsEarlyClobber(
true);
129bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
132 I.setDesc(TII.get(TargetOpcode::COPY));
134 const MachineOperand &Src =
I.getOperand(1);
135 MachineOperand &Dst =
I.getOperand(0);
139 if (isVCC(DstReg, *MRI)) {
140 if (SrcReg == AMDGPU::SCC) {
141 const TargetRegisterClass *RC
142 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
145 return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
148 if (!isVCC(SrcReg, *MRI)) {
150 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
153 const TargetRegisterClass *SrcRC
154 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
156 std::optional<ValueAndVReg> ConstVal =
160 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
162 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
164 Register MaskedReg = MRI->createVirtualRegister(SrcRC);
171 assert(Subtarget->useRealTrue16Insts());
172 const int64_t NoMods = 0;
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
179 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
186 bool IsSGPR = TRI.isSGPRClass(SrcRC);
187 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
194 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
200 if (!MRI->getRegClassOrNull(SrcReg))
201 MRI->setRegClass(SrcReg, SrcRC);
206 const TargetRegisterClass *RC =
207 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
208 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
214 for (
const MachineOperand &MO :
I.operands()) {
215 if (MO.getReg().isPhysical())
218 const TargetRegisterClass *RC =
219 TRI.getConstrainedRegClassForOperand(MO, *MRI);
222 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
227bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
230 Register VCCReg =
I.getOperand(1).getReg();
234 if (STI.hasScalarCompareEq64()) {
236 STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
239 Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
240 Cmp =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
248 Register DstReg =
I.getOperand(0).getReg();
252 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
255bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
259 Register DstReg =
I.getOperand(0).getReg();
260 Register SrcReg =
I.getOperand(1).getReg();
261 std::optional<ValueAndVReg> Arg =
265 const int64_t
Value = Arg->Value.getZExtValue();
267 unsigned Opcode = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
274 return RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI);
280 unsigned SelectOpcode =
281 STI.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
290bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
291 Register DstReg =
I.getOperand(0).getReg();
292 Register SrcReg =
I.getOperand(1).getReg();
297 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
304bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
305 const Register DefReg =
I.getOperand(0).getReg();
306 const LLT DefTy = MRI->getType(DefReg);
318 MRI->getRegClassOrRegBank(DefReg);
320 const TargetRegisterClass *DefRC =
329 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
338 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
339 const Register SrcReg =
I.getOperand(i).getReg();
341 const RegisterBank *RB = MRI->getRegBankOrNull(SrcReg);
343 const LLT SrcTy = MRI->getType(SrcReg);
344 const TargetRegisterClass *SrcRC =
345 TRI.getRegClassForTypeOnBank(SrcTy, *RB);
346 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
351 I.setDesc(TII.get(TargetOpcode::PHI));
352 return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
358 unsigned SubIdx)
const {
362 Register DstReg = MRI->createVirtualRegister(&SubRC);
365 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
367 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
393 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
395 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
397 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
403bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
404 Register DstReg =
I.getOperand(0).getReg();
405 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
407 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
408 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
409 DstRB->
getID() != AMDGPU::VCCRegBankID)
412 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
424bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
427 Register DstReg =
I.getOperand(0).getReg();
429 LLT Ty = MRI->getType(DstReg);
434 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
435 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
436 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
440 const unsigned Opc =
Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
443 .
add(
I.getOperand(1))
444 .
add(
I.getOperand(2))
450 if (STI.hasAddNoCarry()) {
451 const unsigned Opc =
Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
452 I.setDesc(TII.get(
Opc));
458 const unsigned Opc =
Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
460 Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
464 .
add(
I.getOperand(1))
465 .
add(
I.getOperand(2))
471 assert(!
Sub &&
"illegal sub should not reach here");
473 const TargetRegisterClass &RC
474 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
475 const TargetRegisterClass &HalfRC
476 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
478 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
479 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
480 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
481 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
483 Register DstLo = MRI->createVirtualRegister(&HalfRC);
484 Register DstHi = MRI->createVirtualRegister(&HalfRC);
487 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
490 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
495 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
496 Register CarryReg = MRI->createVirtualRegister(CarryRC);
497 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
502 MachineInstr *Addc =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
513 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
520 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
527bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
532 Register Dst0Reg =
I.getOperand(0).getReg();
533 Register Dst1Reg =
I.getOperand(1).getReg();
534 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
535 I.getOpcode() == AMDGPU::G_UADDE;
536 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
537 I.getOpcode() == AMDGPU::G_USUBE;
539 if (isVCC(Dst1Reg, *MRI)) {
540 unsigned NoCarryOpc =
541 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
542 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
543 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
549 Register Src0Reg =
I.getOperand(2).getReg();
550 Register Src1Reg =
I.getOperand(3).getReg();
553 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
554 .
addReg(
I.getOperand(4).getReg());
557 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
558 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
560 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
561 .
add(
I.getOperand(2))
562 .
add(
I.getOperand(3));
564 if (MRI->use_nodbg_empty(Dst1Reg)) {
565 CarryInst.setOperandDead(3);
567 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
569 if (!MRI->getRegClassOrNull(Dst1Reg))
570 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
573 if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
574 !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
575 !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
579 !RBI.constrainGenericRegister(
I.getOperand(4).getReg(),
580 AMDGPU::SReg_32RegClass, *MRI))
587bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
591 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
592 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() &&
593 MRI->use_nodbg_empty(
I.getOperand(1).getReg());
596 if (Subtarget->hasMADIntraFwdBug())
597 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
598 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
600 Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
601 : AMDGPU::V_MAD_NC_I64_I32_e64;
603 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
608 I.setDesc(TII.get(
Opc));
610 I.addImplicitDefUseOperands(*
MF);
611 I.getOperand(0).setIsEarlyClobber(
true);
616bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
618 Register DstReg =
I.getOperand(0).getReg();
619 Register SrcReg =
I.getOperand(1).getReg();
620 LLT DstTy = MRI->getType(DstReg);
621 LLT SrcTy = MRI->getType(SrcReg);
626 unsigned Offset =
I.getOperand(2).getImm();
627 if (
Offset % 32 != 0 || DstSize > 128)
635 const TargetRegisterClass *DstRC =
636 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
637 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
640 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
641 const TargetRegisterClass *SrcRC =
642 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
647 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
652 *SrcRC,
I.getOperand(1));
654 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
661bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
662 MachineBasicBlock *BB =
MI.getParent();
664 LLT DstTy = MRI->getType(DstReg);
665 LLT SrcTy = MRI->getType(
MI.getOperand(1).getReg());
672 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
674 const TargetRegisterClass *DstRC =
675 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
679 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
680 MachineInstrBuilder MIB =
681 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
682 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
683 MachineOperand &Src =
MI.getOperand(
I + 1);
687 const TargetRegisterClass *SrcRC
688 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
689 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
693 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
696 MI.eraseFromParent();
700bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
701 MachineBasicBlock *BB =
MI.getParent();
702 const int NumDst =
MI.getNumOperands() - 1;
704 MachineOperand &Src =
MI.getOperand(NumDst);
708 LLT DstTy = MRI->getType(DstReg0);
709 LLT SrcTy = MRI->getType(SrcReg);
714 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
716 const TargetRegisterClass *SrcRC =
717 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
718 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
724 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
725 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
726 MachineOperand &Dst =
MI.getOperand(
I);
727 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
728 .
addReg(SrcReg, 0, SubRegs[
I]);
731 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
732 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
735 const TargetRegisterClass *DstRC =
736 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
737 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
741 MI.eraseFromParent();
745bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
746 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
747 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
751 LLT SrcTy = MRI->getType(Src0);
755 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
756 return selectG_MERGE_VALUES(
MI);
763 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
767 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
768 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
771 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
772 DstBank->
getID() == AMDGPU::VGPRRegBankID);
773 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
776 MachineBasicBlock *BB =
MI.getParent();
786 const int64_t K0 = ConstSrc0->Value.getSExtValue();
787 const int64_t K1 = ConstSrc1->Value.getSExtValue();
788 uint32_t Lo16 =
static_cast<uint32_t
>(K0) & 0xffff;
789 uint32_t Hi16 =
static_cast<uint32_t
>(K1) & 0xffff;
790 uint32_t
Imm = Lo16 | (Hi16 << 16);
795 MI.eraseFromParent();
796 return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI);
801 MI.eraseFromParent();
802 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
813 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
814 MI.setDesc(TII.get(AMDGPU::COPY));
817 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
818 return RBI.constrainGenericRegister(Dst, RC, *MRI) &&
819 RBI.constrainGenericRegister(Src0, RC, *MRI);
824 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
825 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
831 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
838 MI.eraseFromParent();
863 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
864 if (Shift0 && Shift1) {
865 Opc = AMDGPU::S_PACK_HH_B32_B16;
866 MI.getOperand(1).setReg(ShiftSrc0);
867 MI.getOperand(2).setReg(ShiftSrc1);
869 Opc = AMDGPU::S_PACK_LH_B32_B16;
870 MI.getOperand(2).setReg(ShiftSrc1);
874 if (ConstSrc1 && ConstSrc1->Value == 0) {
876 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
881 MI.eraseFromParent();
884 if (STI.hasSPackHL()) {
885 Opc = AMDGPU::S_PACK_HL_B32_B16;
886 MI.getOperand(1).setReg(ShiftSrc0);
890 MI.setDesc(TII.get(
Opc));
894bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
895 const MachineOperand &MO =
I.getOperand(0);
899 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
900 if ((!RC && !MRI->getRegBankOrNull(MO.
getReg())) ||
901 (RC && RBI.constrainGenericRegister(MO.
getReg(), *RC, *MRI))) {
902 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
909bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
912 Register DstReg =
I.getOperand(0).getReg();
913 Register Src0Reg =
I.getOperand(1).getReg();
914 Register Src1Reg =
I.getOperand(2).getReg();
915 LLT Src1Ty = MRI->getType(Src1Reg);
917 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
920 int64_t
Offset =
I.getOperand(3).getImm();
923 if (
Offset % 32 != 0 || InsSize % 32 != 0)
930 unsigned SubReg = TRI.getSubRegFromChannel(
Offset / 32, InsSize / 32);
931 if (
SubReg == AMDGPU::NoSubRegister)
934 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
935 const TargetRegisterClass *DstRC =
936 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
940 const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
941 const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
942 const TargetRegisterClass *Src0RC =
943 TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
944 const TargetRegisterClass *Src1RC =
945 TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
949 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
950 if (!Src0RC || !Src1RC)
953 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
954 !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
955 !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
959 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
968bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
971 Register OffsetReg =
MI.getOperand(2).getReg();
972 Register WidthReg =
MI.getOperand(3).getReg();
974 assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID &&
975 "scalar BFX instructions are expanded in regbankselect");
976 assert(MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
977 "64-bit vector BFX instructions are expanded in regbankselect");
980 MachineBasicBlock *
MBB =
MI.getParent();
982 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
983 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
988 MI.eraseFromParent();
992bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
993 if (STI.getLDSBankCount() != 16)
999 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
1000 !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
1001 !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
1011 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1013 MachineBasicBlock *
MBB =
MI.getParent();
1017 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1020 .
addImm(
MI.getOperand(3).getImm());
1033 MI.eraseFromParent();
1042bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1044 if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
1047 MachineBasicBlock *
MBB =
MI.getParent();
1051 Register LaneSelect =
MI.getOperand(3).getReg();
1054 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1056 std::optional<ValueAndVReg> ConstSelect =
1062 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1065 std::optional<ValueAndVReg> ConstVal =
1071 STI.hasInv2PiInlineImm())) {
1072 MIB.
addImm(ConstVal->Value.getSExtValue());
1080 RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
1082 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1090 MI.eraseFromParent();
1096bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1100 LLT Ty = MRI->getType(Dst0);
1103 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1105 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1112 MachineBasicBlock *
MBB =
MI.getParent();
1116 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1118 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1131 MI.eraseFromParent();
1135bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1137 switch (IntrinsicID) {
1138 case Intrinsic::amdgcn_if_break: {
1143 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1144 .
add(
I.getOperand(0))
1145 .
add(
I.getOperand(2))
1146 .
add(
I.getOperand(3));
1148 Register DstReg =
I.getOperand(0).getReg();
1149 Register Src0Reg =
I.getOperand(2).getReg();
1150 Register Src1Reg =
I.getOperand(3).getReg();
1152 I.eraseFromParent();
1155 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1159 case Intrinsic::amdgcn_interp_p1_f16:
1160 return selectInterpP1F16(
I);
1161 case Intrinsic::amdgcn_wqm:
1162 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1163 case Intrinsic::amdgcn_softwqm:
1164 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1165 case Intrinsic::amdgcn_strict_wwm:
1166 case Intrinsic::amdgcn_wwm:
1167 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1168 case Intrinsic::amdgcn_strict_wqm:
1169 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1170 case Intrinsic::amdgcn_writelane:
1171 return selectWritelane(
I);
1172 case Intrinsic::amdgcn_div_scale:
1173 return selectDivScale(
I);
1174 case Intrinsic::amdgcn_icmp:
1175 case Intrinsic::amdgcn_fcmp:
1178 return selectIntrinsicCmp(
I);
1179 case Intrinsic::amdgcn_ballot:
1180 return selectBallot(
I);
1181 case Intrinsic::amdgcn_reloc_constant:
1182 return selectRelocConstant(
I);
1183 case Intrinsic::amdgcn_groupstaticsize:
1184 return selectGroupStaticSize(
I);
1185 case Intrinsic::returnaddress:
1186 return selectReturnAddress(
I);
1187 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1188 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1189 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1190 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1191 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1192 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1193 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1194 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1195 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1196 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1197 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1198 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1199 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1200 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1201 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1202 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1203 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1204 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1205 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1206 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1207 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1208 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1209 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1210 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1211 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1212 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1213 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1214 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1215 return selectSMFMACIntrin(
I);
1216 case Intrinsic::amdgcn_permlane16_swap:
1217 case Intrinsic::amdgcn_permlane32_swap:
1218 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1219 case Intrinsic::amdgcn_wave_shuffle:
1220 return selectWaveShuffleIntrin(
I);
1231 if (
Size == 16 && !ST.has16BitInsts())
1234 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1235 unsigned FakeS16Opc,
unsigned S32Opc,
1238 return ST.hasTrue16BitInsts()
1239 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1250 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1251 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1252 AMDGPU::V_CMP_NE_U64_e64);
1254 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1255 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1256 AMDGPU::V_CMP_EQ_U64_e64);
1258 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1259 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1260 AMDGPU::V_CMP_GT_I64_e64);
1262 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1263 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1264 AMDGPU::V_CMP_GE_I64_e64);
1266 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1267 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1268 AMDGPU::V_CMP_LT_I64_e64);
1270 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1271 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1272 AMDGPU::V_CMP_LE_I64_e64);
1274 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1275 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1276 AMDGPU::V_CMP_GT_U64_e64);
1278 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1279 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1280 AMDGPU::V_CMP_GE_U64_e64);
1282 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1283 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1284 AMDGPU::V_CMP_LT_U64_e64);
1286 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1287 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1288 AMDGPU::V_CMP_LE_U64_e64);
1291 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1292 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1293 AMDGPU::V_CMP_EQ_F64_e64);
1295 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1296 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1297 AMDGPU::V_CMP_GT_F64_e64);
1299 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1300 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1301 AMDGPU::V_CMP_GE_F64_e64);
1303 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1304 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1305 AMDGPU::V_CMP_LT_F64_e64);
1307 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1308 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1309 AMDGPU::V_CMP_LE_F64_e64);
1311 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1312 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1313 AMDGPU::V_CMP_NEQ_F64_e64);
1315 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1316 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1317 AMDGPU::V_CMP_O_F64_e64);
1319 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1320 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1321 AMDGPU::V_CMP_U_F64_e64);
1323 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1324 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1325 AMDGPU::V_CMP_NLG_F64_e64);
1327 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1328 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1329 AMDGPU::V_CMP_NLE_F64_e64);
1331 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1332 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1333 AMDGPU::V_CMP_NLT_F64_e64);
1335 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1336 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1337 AMDGPU::V_CMP_NGE_F64_e64);
1339 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1340 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1341 AMDGPU::V_CMP_NGT_F64_e64);
1343 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1344 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1345 AMDGPU::V_CMP_NEQ_F64_e64);
1347 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1348 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1349 AMDGPU::V_CMP_TRU_F64_e64);
1351 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1352 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1353 AMDGPU::V_CMP_F_F64_e64);
1358 unsigned Size)
const {
1360 if (!STI.hasScalarCompareEq64())
1365 return AMDGPU::S_CMP_LG_U64;
1367 return AMDGPU::S_CMP_EQ_U64;
1376 return AMDGPU::S_CMP_LG_U32;
1378 return AMDGPU::S_CMP_EQ_U32;
1380 return AMDGPU::S_CMP_GT_I32;
1382 return AMDGPU::S_CMP_GE_I32;
1384 return AMDGPU::S_CMP_LT_I32;
1386 return AMDGPU::S_CMP_LE_I32;
1388 return AMDGPU::S_CMP_GT_U32;
1390 return AMDGPU::S_CMP_GE_U32;
1392 return AMDGPU::S_CMP_LT_U32;
1394 return AMDGPU::S_CMP_LE_U32;
1396 return AMDGPU::S_CMP_EQ_F32;
1398 return AMDGPU::S_CMP_GT_F32;
1400 return AMDGPU::S_CMP_GE_F32;
1402 return AMDGPU::S_CMP_LT_F32;
1404 return AMDGPU::S_CMP_LE_F32;
1406 return AMDGPU::S_CMP_LG_F32;
1408 return AMDGPU::S_CMP_O_F32;
1410 return AMDGPU::S_CMP_U_F32;
1412 return AMDGPU::S_CMP_NLG_F32;
1414 return AMDGPU::S_CMP_NLE_F32;
1416 return AMDGPU::S_CMP_NLT_F32;
1418 return AMDGPU::S_CMP_NGE_F32;
1420 return AMDGPU::S_CMP_NGT_F32;
1422 return AMDGPU::S_CMP_NEQ_F32;
1429 if (!STI.hasSALUFloatInsts())
1434 return AMDGPU::S_CMP_EQ_F16;
1436 return AMDGPU::S_CMP_GT_F16;
1438 return AMDGPU::S_CMP_GE_F16;
1440 return AMDGPU::S_CMP_LT_F16;
1442 return AMDGPU::S_CMP_LE_F16;
1444 return AMDGPU::S_CMP_LG_F16;
1446 return AMDGPU::S_CMP_O_F16;
1448 return AMDGPU::S_CMP_U_F16;
1450 return AMDGPU::S_CMP_NLG_F16;
1452 return AMDGPU::S_CMP_NLE_F16;
1454 return AMDGPU::S_CMP_NLT_F16;
1456 return AMDGPU::S_CMP_NGE_F16;
1458 return AMDGPU::S_CMP_NGT_F16;
1460 return AMDGPU::S_CMP_NEQ_F16;
1469bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1474 Register SrcReg =
I.getOperand(2).getReg();
1475 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1479 Register CCReg =
I.getOperand(0).getReg();
1480 if (!isVCC(CCReg, *MRI)) {
1481 int Opcode = getS_CMPOpcode(Pred,
Size);
1484 MachineInstr *ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode))
1485 .
add(
I.getOperand(2))
1486 .
add(
I.getOperand(3));
1487 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1491 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
1492 I.eraseFromParent();
1496 if (
I.getOpcode() == AMDGPU::G_FCMP)
1503 MachineInstrBuilder ICmp;
1506 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1508 .
add(
I.getOperand(2))
1510 .
add(
I.getOperand(3))
1513 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1514 .
add(
I.getOperand(2))
1515 .
add(
I.getOperand(3));
1519 *TRI.getBoolRC(), *MRI);
1521 I.eraseFromParent();
1525bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1526 Register Dst =
I.getOperand(0).getReg();
1527 if (isVCC(Dst, *MRI))
1530 LLT DstTy = MRI->getType(Dst);
1536 Register SrcReg =
I.getOperand(2).getReg();
1537 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1545 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1546 I.eraseFromParent();
1547 return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1554 MachineInstrBuilder SelectedMI;
1555 MachineOperand &
LHS =
I.getOperand(2);
1556 MachineOperand &
RHS =
I.getOperand(3);
1557 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1558 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1560 copyToVGPRIfSrcFolded(Src0, Src0Mods,
LHS, &
I,
true);
1562 copyToVGPRIfSrcFolded(Src1, Src1Mods,
RHS, &
I,
true);
1563 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1565 SelectedMI.
addImm(Src0Mods);
1566 SelectedMI.
addReg(Src0Reg);
1568 SelectedMI.
addImm(Src1Mods);
1569 SelectedMI.
addReg(Src1Reg);
1575 RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1579 I.eraseFromParent();
1590 if (
MI->getParent() !=
MBB)
1594 if (
MI->getOpcode() == AMDGPU::COPY) {
1595 auto DstRB =
MRI.getRegBankOrNull(
MI->getOperand(0).getReg());
1596 auto SrcRB =
MRI.getRegBankOrNull(
MI->getOperand(1).getReg());
1597 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1598 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1615bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1618 Register DstReg =
I.getOperand(0).getReg();
1619 Register SrcReg =
I.getOperand(2).getReg();
1620 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();
1621 const unsigned WaveSize = STI.getWavefrontSize();
1625 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1628 std::optional<ValueAndVReg> Arg =
1633 if (BallotSize != WaveSize) {
1634 Dst = MRI->createVirtualRegister(TRI.getBoolRC());
1638 const int64_t
Value = Arg->Value.getZExtValue();
1641 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1648 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1654 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1658 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1669 if (BallotSize != WaveSize) {
1670 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1672 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1679 I.eraseFromParent();
1683bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1684 Register DstReg =
I.getOperand(0).getReg();
1685 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
1686 const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
1687 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
1690 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1692 Module *
M =
MF->getFunction().getParent();
1693 const MDNode *
Metadata =
I.getOperand(2).getMetadata();
1700 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1703 I.eraseFromParent();
1707bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1710 Register DstReg =
I.getOperand(0).getReg();
1711 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1712 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1713 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1721 const SIMachineFunctionInfo *MFI =
MF->getInfo<SIMachineFunctionInfo>();
1724 Module *
M =
MF->getFunction().getParent();
1725 const GlobalValue *GV =
1730 I.eraseFromParent();
1734bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1739 MachineOperand &Dst =
I.getOperand(0);
1741 unsigned Depth =
I.getOperand(2).getImm();
1743 const TargetRegisterClass *RC
1744 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1746 !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
1751 MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1754 I.eraseFromParent();
1758 MachineFrameInfo &MFI =
MF.getFrameInfo();
1763 Register ReturnAddrReg = TRI.getReturnAddressReg(
MF);
1765 AMDGPU::SReg_64RegClass,
DL);
1768 I.eraseFromParent();
1772bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1775 MachineBasicBlock *BB =
MI.getParent();
1776 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1777 .
add(
MI.getOperand(1));
1780 MI.eraseFromParent();
1782 if (!MRI->getRegClassOrNull(
Reg))
1783 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1787bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1789 MachineBasicBlock *
MBB =
MI.getParent();
1793 unsigned IndexOperand =
MI.getOperand(7).getImm();
1794 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1795 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1797 if (WaveDone && !WaveRelease) {
1801 Fn,
"ds_ordered_count: wave_done requires wave_release",
DL));
1804 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1805 IndexOperand &= ~0x3f;
1806 unsigned CountDw = 0;
1809 CountDw = (IndexOperand >> 24) & 0xf;
1810 IndexOperand &= ~(0xf << 24);
1812 if (CountDw < 1 || CountDw > 4) {
1815 Fn,
"ds_ordered_count: dword count must be between 1 and 4",
DL));
1823 Fn,
"ds_ordered_count: bad index operand",
DL));
1826 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1829 unsigned Offset0 = OrderedCountIndex << 2;
1830 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
1833 Offset1 |= (CountDw - 1) << 6;
1836 Offset1 |= ShaderType << 2;
1838 unsigned Offset = Offset0 | (Offset1 << 8);
1846 MachineInstrBuilder
DS =
1847 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1852 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
1856 MI.eraseFromParent();
1862 case Intrinsic::amdgcn_ds_gws_init:
1863 return AMDGPU::DS_GWS_INIT;
1864 case Intrinsic::amdgcn_ds_gws_barrier:
1865 return AMDGPU::DS_GWS_BARRIER;
1866 case Intrinsic::amdgcn_ds_gws_sema_v:
1867 return AMDGPU::DS_GWS_SEMA_V;
1868 case Intrinsic::amdgcn_ds_gws_sema_br:
1869 return AMDGPU::DS_GWS_SEMA_BR;
1870 case Intrinsic::amdgcn_ds_gws_sema_p:
1871 return AMDGPU::DS_GWS_SEMA_P;
1872 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1873 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1879bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1881 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1882 !STI.hasGWSSemaReleaseAll()))
1886 const bool HasVSrc =
MI.getNumOperands() == 3;
1887 assert(HasVSrc ||
MI.getNumOperands() == 2);
1889 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1890 const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
1891 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1897 MachineBasicBlock *
MBB =
MI.getParent();
1900 MachineInstr *Readfirstlane =
nullptr;
1905 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1906 Readfirstlane = OffsetDef;
1911 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1921 std::tie(BaseOffset, ImmOffset) =
1924 if (Readfirstlane) {
1927 if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
1933 if (!RBI.constrainGenericRegister(BaseOffset,
1934 AMDGPU::SReg_32RegClass, *MRI))
1938 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1953 const MCInstrDesc &InstrDesc = TII.get(
Opc);
1958 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
1959 const TargetRegisterClass *DataRC = TII.getRegClass(InstrDesc, Data0Idx);
1960 const TargetRegisterClass *SubRC =
1961 TRI.getSubRegisterClass(DataRC, AMDGPU::sub0);
1965 if (!RBI.constrainGenericRegister(VSrc, *DataRC, *MRI))
1975 Register DataReg = MRI->createVirtualRegister(DataRC);
1976 if (!RBI.constrainGenericRegister(VSrc, *SubRC, *MRI))
1979 Register UndefReg = MRI->createVirtualRegister(SubRC);
1998 MI.eraseFromParent();
2002bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
2003 bool IsAppend)
const {
2004 Register PtrBase =
MI.getOperand(2).getReg();
2005 LLT PtrTy = MRI->getType(PtrBase);
2009 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
2012 if (!isDSOffsetLegal(PtrBase,
Offset)) {
2013 PtrBase =
MI.getOperand(2).getReg();
2017 MachineBasicBlock *
MBB =
MI.getParent();
2019 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2023 if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
2030 MI.eraseFromParent();
2034bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
2035 MachineFunction *
MF =
MI.getMF();
2036 SIMachineFunctionInfo *MFInfo =
MF->getInfo<SIMachineFunctionInfo>();
2047 TFE = TexFailCtrl & 0x1;
2049 LWE = TexFailCtrl & 0x2;
2052 return TexFailCtrl == 0;
2055bool AMDGPUInstructionSelector::selectImageIntrinsic(
2057 MachineBasicBlock *
MBB =
MI.getParent();
2063 Register ResultDef =
MI.getOperand(0).getReg();
2064 if (MRI->use_nodbg_empty(ResultDef))
2068 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2076 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2078 Register VDataIn = AMDGPU::NoRegister;
2079 Register VDataOut = AMDGPU::NoRegister;
2081 int NumVDataDwords = -1;
2082 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2083 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2089 Unorm =
MI.getOperand(ArgOffset + Intr->
UnormIndex).getImm() != 0;
2093 bool IsTexFail =
false;
2095 TFE, LWE, IsTexFail))
2098 const int Flags =
MI.getOperand(ArgOffset + Intr->
NumArgs).getImm();
2099 const bool IsA16 = (
Flags & 1) != 0;
2100 const bool IsG16 = (
Flags & 2) != 0;
2103 if (IsA16 && !STI.hasG16() && !IsG16)
2107 unsigned DMaskLanes = 0;
2109 if (BaseOpcode->
Atomic) {
2111 VDataOut =
MI.getOperand(0).getReg();
2112 VDataIn =
MI.getOperand(2).getReg();
2113 LLT Ty = MRI->getType(VDataIn);
2116 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2121 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2123 DMask = Is64Bit ? 0xf : 0x3;
2124 NumVDataDwords = Is64Bit ? 4 : 2;
2126 DMask = Is64Bit ? 0x3 : 0x1;
2127 NumVDataDwords = Is64Bit ? 2 : 1;
2130 DMask =
MI.getOperand(ArgOffset + Intr->
DMaskIndex).getImm();
2133 if (BaseOpcode->
Store) {
2134 VDataIn =
MI.getOperand(1).getReg();
2135 VDataTy = MRI->getType(VDataIn);
2140 VDataOut =
MI.getOperand(0).getReg();
2141 VDataTy = MRI->getType(VDataOut);
2142 NumVDataDwords = DMaskLanes;
2144 if (IsD16 && !STI.hasUnpackedD16VMem())
2145 NumVDataDwords = (DMaskLanes + 1) / 2;
2150 if (Subtarget->hasG16() && IsG16) {
2151 const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
2154 IntrOpcode = G16MappingInfo->
G16;
2158 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2168 int NumVAddrRegs = 0;
2169 int NumVAddrDwords = 0;
2172 MachineOperand &AddrOp =
MI.getOperand(ArgOffset +
I);
2173 if (!AddrOp.
isReg())
2181 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
2188 NumVAddrRegs != 1 &&
2189 (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
2190 : NumVAddrDwords == NumVAddrRegs);
2191 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2202 NumVDataDwords, NumVAddrDwords);
2203 }
else if (IsGFX11Plus) {
2205 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2206 : AMDGPU::MIMGEncGfx11Default,
2207 NumVDataDwords, NumVAddrDwords);
2208 }
else if (IsGFX10Plus) {
2210 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2211 : AMDGPU::MIMGEncGfx10Default,
2212 NumVDataDwords, NumVAddrDwords);
2214 if (Subtarget->hasGFX90AInsts()) {
2216 NumVDataDwords, NumVAddrDwords);
2220 <<
"requested image instruction is not supported on this GPU\n");
2227 NumVDataDwords, NumVAddrDwords);
2230 NumVDataDwords, NumVAddrDwords);
2240 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
2242 Register TmpReg = MRI->createVirtualRegister(
2243 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2244 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2247 if (!MRI->use_empty(VDataOut)) {
2260 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2261 MachineOperand &SrcOp =
MI.getOperand(ArgOffset + Intr->
VAddrStart +
I);
2262 if (SrcOp.
isReg()) {
2281 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2283 MIB.
addImm(IsA16 ? -1 : 0);
2285 if (!Subtarget->hasGFX90AInsts()) {
2297 MIB.
addImm(IsD16 ? -1 : 0);
2299 MI.eraseFromParent();
2301 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
2307bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2313 MachineBasicBlock *
MBB =
MI.getParent();
2318 unsigned Offset =
MI.getOperand(6).getImm();
2322 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2323 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2324 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2326 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2327 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2329 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2330 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2342 MI.eraseFromParent();
2346bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2349 switch (IntrinsicID) {
2350 case Intrinsic::amdgcn_end_cf:
2351 return selectEndCfIntrinsic(
I);
2352 case Intrinsic::amdgcn_ds_ordered_add:
2353 case Intrinsic::amdgcn_ds_ordered_swap:
2354 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2355 case Intrinsic::amdgcn_ds_gws_init:
2356 case Intrinsic::amdgcn_ds_gws_barrier:
2357 case Intrinsic::amdgcn_ds_gws_sema_v:
2358 case Intrinsic::amdgcn_ds_gws_sema_br:
2359 case Intrinsic::amdgcn_ds_gws_sema_p:
2360 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2361 return selectDSGWSIntrinsic(
I, IntrinsicID);
2362 case Intrinsic::amdgcn_ds_append:
2363 return selectDSAppendConsume(
I,
true);
2364 case Intrinsic::amdgcn_ds_consume:
2365 return selectDSAppendConsume(
I,
false);
2366 case Intrinsic::amdgcn_init_whole_wave:
2367 return selectInitWholeWave(
I);
2368 case Intrinsic::amdgcn_raw_buffer_load_lds:
2369 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2370 case Intrinsic::amdgcn_struct_buffer_load_lds:
2371 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2372 return selectBufferLoadLds(
I);
2377 case Intrinsic::amdgcn_load_to_lds:
2378 case Intrinsic::amdgcn_global_load_lds:
2379 return selectGlobalLoadLds(
I);
2380 case Intrinsic::amdgcn_exp_compr:
2381 if (!STI.hasCompressedExport()) {
2383 F.getContext().diagnose(
2384 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2389 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2390 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2391 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2392 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2393 return selectDSBvhStackIntrinsic(
I);
2394 case Intrinsic::amdgcn_s_barrier_init:
2395 case Intrinsic::amdgcn_s_barrier_signal_var:
2396 return selectNamedBarrierInit(
I, IntrinsicID);
2397 case Intrinsic::amdgcn_s_wakeup_barrier: {
2398 if (!STI.hasSWakeupBarrier()) {
2400 F.getContext().diagnose(
2401 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2405 return selectNamedBarrierInst(
I, IntrinsicID);
2407 case Intrinsic::amdgcn_s_barrier_join:
2408 case Intrinsic::amdgcn_s_get_named_barrier_state:
2409 return selectNamedBarrierInst(
I, IntrinsicID);
2410 case Intrinsic::amdgcn_s_get_barrier_state:
2411 return selectSGetBarrierState(
I, IntrinsicID);
2412 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2413 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2418bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2425 Register DstReg =
I.getOperand(0).getReg();
2426 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
2428 const MachineOperand &CCOp =
I.getOperand(1);
2430 if (!isVCC(CCReg, *MRI)) {
2431 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2432 AMDGPU::S_CSELECT_B32;
2433 MachineInstr *CopySCC =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
2439 if (!MRI->getRegClassOrNull(CCReg))
2440 MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
2442 .
add(
I.getOperand(2))
2443 .
add(
I.getOperand(3));
2448 I.eraseFromParent();
2457 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2459 .
add(
I.getOperand(3))
2461 .
add(
I.getOperand(2))
2462 .
add(
I.getOperand(1));
2465 I.eraseFromParent();
2469bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2470 Register DstReg =
I.getOperand(0).getReg();
2471 Register SrcReg =
I.getOperand(1).getReg();
2472 const LLT DstTy = MRI->getType(DstReg);
2473 const LLT SrcTy = MRI->getType(SrcReg);
2476 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
2477 const RegisterBank *DstRB;
2483 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
2488 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2493 const TargetRegisterClass *SrcRC =
2494 TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
2495 const TargetRegisterClass *DstRC =
2496 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
2497 if (!SrcRC || !DstRC)
2500 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
2501 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
2506 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2507 assert(STI.useRealTrue16Insts());
2511 .
addReg(SrcReg, 0, AMDGPU::lo16);
2512 I.eraseFromParent();
2520 Register LoReg = MRI->createVirtualRegister(DstRC);
2521 Register HiReg = MRI->createVirtualRegister(DstRC);
2523 .
addReg(SrcReg, 0, AMDGPU::sub0);
2525 .
addReg(SrcReg, 0, AMDGPU::sub1);
2527 if (IsVALU && STI.hasSDWA()) {
2530 MachineInstr *MovSDWA =
2531 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2541 Register TmpReg0 = MRI->createVirtualRegister(DstRC);
2542 Register TmpReg1 = MRI->createVirtualRegister(DstRC);
2543 Register ImmReg = MRI->createVirtualRegister(DstRC);
2545 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2555 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2556 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2557 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2569 And.setOperandDead(3);
2570 Or.setOperandDead(3);
2574 I.eraseFromParent();
2582 unsigned SubRegIdx = DstSize < 32
2583 ?
static_cast<unsigned>(AMDGPU::sub0)
2584 : TRI.getSubRegFromChannel(0, DstSize / 32);
2585 if (SubRegIdx == AMDGPU::NoSubRegister)
2590 const TargetRegisterClass *SrcWithSubRC
2591 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2595 if (SrcWithSubRC != SrcRC) {
2596 if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
2600 I.getOperand(1).setSubReg(SubRegIdx);
2603 I.setDesc(TII.get(TargetOpcode::COPY));
2610 int SignedMask =
static_cast<int>(Mask);
2611 return SignedMask >= -16 && SignedMask <= 64;
2615const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2624 return &RBI.getRegBankFromRegClass(*RC, LLT());
2628bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2629 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2630 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2633 const Register DstReg =
I.getOperand(0).getReg();
2634 const Register SrcReg =
I.getOperand(1).getReg();
2636 const LLT DstTy = MRI->getType(DstReg);
2637 const LLT SrcTy = MRI->getType(SrcReg);
2638 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2645 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2648 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2650 return selectCOPY(
I);
2652 const TargetRegisterClass *SrcRC =
2653 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2654 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
2655 const TargetRegisterClass *DstRC =
2656 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2658 Register UndefReg = MRI->createVirtualRegister(SrcRC);
2659 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2665 I.eraseFromParent();
2667 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
2668 RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
2671 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2677 MachineInstr *ExtI =
2681 I.eraseFromParent();
2685 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2686 MachineInstr *ExtI =
2691 I.eraseFromParent();
2695 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2696 const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
2697 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2698 if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
2701 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2702 const unsigned SextOpc = SrcSize == 8 ?
2703 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2706 I.eraseFromParent();
2707 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2712 if (DstSize > 32 && SrcSize == 32) {
2713 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2714 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2729 I.eraseFromParent();
2730 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass,
2734 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2735 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2738 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2740 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2741 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2742 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2744 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2755 I.eraseFromParent();
2756 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
2771 I.eraseFromParent();
2772 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2806 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2813 assert(Mask.size() == 2);
2815 if (Mask[0] == 1 && Mask[1] <= 1) {
2823bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2824 if (!Subtarget->hasSALUFloatInsts())
2827 Register Dst =
I.getOperand(0).getReg();
2828 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2829 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2832 Register Src =
I.getOperand(1).getReg();
2838 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2840 I.eraseFromParent();
2841 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
2848bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2861 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2862 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2867 MachineInstr *Fabs =
getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
2871 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2872 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2875 MachineBasicBlock *BB =
MI.getParent();
2877 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2878 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2879 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2880 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2882 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2883 .
addReg(Src, 0, AMDGPU::sub0);
2884 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2885 .
addReg(Src, 0, AMDGPU::sub1);
2886 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2890 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2895 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2900 MI.eraseFromParent();
2905bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2907 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2908 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2913 MachineBasicBlock *BB =
MI.getParent();
2915 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2916 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2917 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2918 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2920 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2921 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2924 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2925 .
addReg(Src, 0, AMDGPU::sub0);
2926 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2927 .
addReg(Src, 0, AMDGPU::sub1);
2928 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2933 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2937 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2943 MI.eraseFromParent();
2948 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2951void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2954 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2955 const MachineInstr *PtrMI =
2956 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2960 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2965 for (
unsigned i = 1; i != 3; ++i) {
2966 const MachineOperand &GEPOp = PtrMI->
getOperand(i);
2967 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.
getReg());
2972 assert(GEPInfo.Imm == 0);
2976 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.
getReg(), MRI, TRI);
2977 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2978 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2980 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2984 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2987bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2988 return RBI.getRegBank(
Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
2991bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2992 if (!
MI.hasOneMemOperand())
2995 const MachineMemOperand *MMO = *
MI.memoperands_begin();
3008 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
3009 return RBI.getRegBank(
MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
3010 AMDGPU::SGPRRegBankID;
3013 return I &&
I->getMetadata(
"amdgpu.uniform");
3017 for (
const GEPInfo &GEPInfo : AddrInfo) {
3018 if (!GEPInfo.VgprParts.empty())
3024void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
3025 const LLT PtrTy = MRI->getType(
I.getOperand(1).getReg());
3028 STI.ldsRequiresM0Init()) {
3032 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
3037bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
3044 if (
Reg.isPhysical())
3048 const unsigned Opcode =
MI.getOpcode();
3050 if (Opcode == AMDGPU::COPY)
3053 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
3054 Opcode == AMDGPU::G_XOR)
3059 return GI->is(Intrinsic::amdgcn_class);
3061 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
3064bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
3066 MachineOperand &CondOp =
I.getOperand(0);
3072 const TargetRegisterClass *ConstrainRC;
3079 if (!isVCC(CondReg, *MRI)) {
3083 CondPhysReg = AMDGPU::SCC;
3084 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3085 ConstrainRC = &AMDGPU::SReg_32RegClass;
3092 const bool Is64 = STI.isWave64();
3093 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3094 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3096 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
3097 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3104 CondPhysReg = TRI.getVCC();
3105 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3106 ConstrainRC = TRI.getBoolRC();
3109 if (!MRI->getRegClassOrNull(CondReg))
3110 MRI->setRegClass(CondReg, ConstrainRC);
3112 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3115 .
addMBB(
I.getOperand(1).getMBB());
3117 I.eraseFromParent();
3121bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3123 Register DstReg =
I.getOperand(0).getReg();
3124 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3125 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3126 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3130 return RBI.constrainGenericRegister(
3131 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3134bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3135 Register DstReg =
I.getOperand(0).getReg();
3136 Register SrcReg =
I.getOperand(1).getReg();
3137 Register MaskReg =
I.getOperand(2).getReg();
3138 LLT Ty = MRI->getType(DstReg);
3139 LLT MaskTy = MRI->getType(MaskReg);
3143 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3144 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3145 const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
3146 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3152 APInt MaskOnes =
VT->getKnownOnes(MaskReg).zext(64);
3156 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3157 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3160 !CanCopyLow32 && !CanCopyHi32) {
3161 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3165 I.eraseFromParent();
3169 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3170 const TargetRegisterClass &RegRC
3171 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3173 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
3174 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
3175 const TargetRegisterClass *MaskRC =
3176 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3178 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3179 !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3180 !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
3185 "ptrmask should have been narrowed during legalize");
3187 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3193 I.eraseFromParent();
3197 Register HiReg = MRI->createVirtualRegister(&RegRC);
3198 Register LoReg = MRI->createVirtualRegister(&RegRC);
3201 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3202 .
addReg(SrcReg, 0, AMDGPU::sub0);
3203 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3204 .
addReg(SrcReg, 0, AMDGPU::sub1);
3213 Register MaskLo = MRI->createVirtualRegister(&RegRC);
3214 MaskedLo = MRI->createVirtualRegister(&RegRC);
3216 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3217 .
addReg(MaskReg, 0, AMDGPU::sub0);
3218 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3227 Register MaskHi = MRI->createVirtualRegister(&RegRC);
3228 MaskedHi = MRI->createVirtualRegister(&RegRC);
3230 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3231 .
addReg(MaskReg, 0, AMDGPU::sub1);
3232 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3237 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3242 I.eraseFromParent();
3248static std::pair<Register, unsigned>
3255 std::tie(IdxBaseReg,
Offset) =
3257 if (IdxBaseReg == AMDGPU::NoRegister) {
3261 IdxBaseReg = IdxReg;
3268 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3269 return std::pair(IdxReg, SubRegs[0]);
3270 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3273bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3279 LLT DstTy = MRI->getType(DstReg);
3280 LLT SrcTy = MRI->getType(SrcReg);
3282 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3283 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3284 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3288 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3291 const TargetRegisterClass *SrcRC =
3292 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3293 const TargetRegisterClass *DstRC =
3294 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3295 if (!SrcRC || !DstRC)
3297 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3298 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3299 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3302 MachineBasicBlock *BB =
MI.getParent();
3310 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3314 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3317 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3321 MI.eraseFromParent();
3328 if (!STI.useVGPRIndexMode()) {
3329 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3331 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3334 MI.eraseFromParent();
3338 const MCInstrDesc &GPRIDXDesc =
3339 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC),
true);
3345 MI.eraseFromParent();
3350bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3357 LLT VecTy = MRI->getType(DstReg);
3358 LLT ValTy = MRI->getType(ValReg);
3362 const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
3363 const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
3364 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3370 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3373 const TargetRegisterClass *VecRC =
3374 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3375 const TargetRegisterClass *ValRC =
3376 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3378 if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
3379 !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
3380 !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
3381 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3384 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3388 std::tie(IdxReg,
SubReg) =
3391 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3392 STI.useVGPRIndexMode();
3394 MachineBasicBlock *BB =
MI.getParent();
3398 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3401 const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
3402 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3407 MI.eraseFromParent();
3411 const MCInstrDesc &GPRIDXDesc =
3412 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC),
false);
3419 MI.eraseFromParent();
3423bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3424 if (!Subtarget->hasVMemToLDSLoad())
3427 unsigned Size =
MI.getOperand(3).getImm();
3430 const bool HasVIndex =
MI.getNumOperands() == 9;
3434 VIndex =
MI.getOperand(4).getReg();
3438 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3439 std::optional<ValueAndVReg> MaybeVOffset =
3441 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3447 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3448 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3449 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3450 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3453 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3454 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3455 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3456 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3459 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3460 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3461 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3462 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3465 if (!Subtarget->hasLDSLoadB96_B128())
3468 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3469 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3470 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3471 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3474 if (!Subtarget->hasLDSLoadB96_B128())
3477 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3478 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3479 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3480 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3484 MachineBasicBlock *
MBB =
MI.getParent();
3487 .
add(
MI.getOperand(2));
3491 if (HasVIndex && HasVOffset) {
3492 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
3493 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3500 }
else if (HasVIndex) {
3502 }
else if (HasVOffset) {
3506 MIB.
add(
MI.getOperand(1));
3507 MIB.
add(
MI.getOperand(5 + OpOffset));
3508 MIB.
add(
MI.getOperand(6 + OpOffset));
3510 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3518 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3523 MachinePointerInfo StorePtrI = LoadPtrI;
3534 MachineMemOperand *StoreMMO =
3540 MI.eraseFromParent();
3552 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3558 return Def->getOperand(1).getReg();
3572 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3580 return Def->getOperand(1).getReg();
3582 if (
VT->signBitIsZero(
Reg))
3583 return matchZeroExtendFromS32(
Reg);
3591AMDGPUInstructionSelector::matchZeroExtendFromS32OrS32(
Register Reg)
const {
3593 : matchZeroExtendFromS32(
Reg);
3599AMDGPUInstructionSelector::matchSignExtendFromS32OrS32(
Register Reg)
const {
3601 : matchSignExtendFromS32(
Reg);
3605AMDGPUInstructionSelector::matchExtendFromS32OrS32(
Register Reg,
3606 bool IsSigned)
const {
3608 return matchSignExtendFromS32OrS32(
Reg);
3610 return matchZeroExtendFromS32OrS32(
Reg);
3620 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3627 return Def->getOperand(1).getReg();
3632bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3633 if (!Subtarget->hasVMemToLDSLoad())
3637 unsigned Size =
MI.getOperand(3).getImm();
3643 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3646 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3649 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3652 if (!Subtarget->hasLDSLoadB96_B128())
3654 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3657 if (!Subtarget->hasLDSLoadB96_B128())
3659 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3663 MachineBasicBlock *
MBB =
MI.getParent();
3666 .
add(
MI.getOperand(2));
3672 if (!isSGPR(Addr)) {
3674 if (isSGPR(AddrDef->Reg)) {
3675 Addr = AddrDef->Reg;
3676 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3679 if (isSGPR(SAddr)) {
3680 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3681 if (
Register Off = matchZeroExtendFromS32(PtrBaseOffset)) {
3692 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3704 MIB.
add(
MI.getOperand(4));
3706 unsigned Aux =
MI.getOperand(5).getImm();
3709 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3711 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3712 MachinePointerInfo StorePtrI = LoadPtrI;
3721 MachineMemOperand *StoreMMO =
3723 sizeof(int32_t),
Align(4));
3727 MI.eraseFromParent();
3731bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
3733 unsigned OpcodeOpIdx =
3734 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
3735 MI.setDesc(TII.get(
MI.getOperand(OpcodeOpIdx).getImm()));
3736 MI.removeOperand(OpcodeOpIdx);
3737 MI.addImplicitDefUseOperands(*
MI.getMF());
3743bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3746 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3747 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3749 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3750 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3752 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3753 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3755 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3756 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3758 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3759 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3761 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3762 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3764 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3765 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3767 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3768 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3770 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3771 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3773 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3774 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3776 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3777 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3779 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3780 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3782 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3783 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3785 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3786 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3788 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3789 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3791 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3792 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3794 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3795 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3797 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3798 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3800 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3801 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3803 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3804 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3806 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3807 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3809 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3810 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3812 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3813 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3815 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3816 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3818 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3819 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3821 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3822 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3824 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3825 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3827 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3828 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3834 auto VDst_In =
MI.getOperand(4);
3836 MI.setDesc(TII.get(
Opc));
3837 MI.removeOperand(4);
3838 MI.removeOperand(1);
3839 MI.addOperand(VDst_In);
3840 MI.addImplicitDefUseOperands(*
MI.getMF());
3841 const MCInstrDesc &MCID =
MI.getDesc();
3843 MI.getOperand(0).setIsEarlyClobber(
true);
3848bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3850 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3851 !Subtarget->hasPermlane16Swap())
3853 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3854 !Subtarget->hasPermlane32Swap())
3857 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3858 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3859 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3861 MI.removeOperand(2);
3862 MI.setDesc(TII.get(Opcode));
3865 MachineOperand &FI =
MI.getOperand(4);
3871bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3874 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3875 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3876 MachineBasicBlock *
MBB =
MI.getParent();
3880 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3881 .
addImm(Subtarget->getWavefrontSizeLog2())
3886 .
addImm(Subtarget->getWavefrontSizeLog2())
3890 const TargetRegisterClass &RC =
3891 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3892 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
3895 MI.eraseFromParent();
3899bool AMDGPUInstructionSelector::selectWaveShuffleIntrin(
3902 MachineBasicBlock *
MBB =
MI.getParent();
3909 const LLT DstTy = MRI->getType(DstReg);
3911 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3912 const TargetRegisterClass *DstRC =
3913 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
3918 if (!Subtarget->supportsBPermute())
3922 if (Subtarget->supportsWaveWideBPermute()) {
3923 Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
3924 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), ShiftIdxReg)
3934 assert(Subtarget->isWave64());
3938 MRI->createVirtualRegister(TRI.getRegClass(AMDGPU::SReg_32RegClassID));
3939 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefValReg);
3941 Register UndefExecReg = MRI->createVirtualRegister(
3942 TRI.getRegClass(AMDGPU::SReg_64_XEXECRegClassID));
3943 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefExecReg);
3945 Register PoisonValReg = MRI->createVirtualRegister(DstRC);
3946 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_SET_INACTIVE_B32), PoisonValReg)
3954 Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
3955 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), ShiftIdxReg)
3959 Register PoisonIdxReg = MRI->createVirtualRegister(DstRC);
3960 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_SET_INACTIVE_B32), PoisonIdxReg)
3968 Register SameSidePermReg = MRI->createVirtualRegister(DstRC);
3969 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::DS_BPERMUTE_B32), SameSidePermReg)
3974 Register SwappedValReg = MRI->createVirtualRegister(DstRC);
3975 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_PERMLANE64_B32), SwappedValReg)
3978 Register OppSidePermReg = MRI->createVirtualRegister(DstRC);
3979 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::DS_BPERMUTE_B32), OppSidePermReg)
3984 Register WWMSwapPermReg = MRI->createVirtualRegister(DstRC);
3985 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::STRICT_WWM), WWMSwapPermReg)
3992 Register ThreadIDReg = MRI->createVirtualRegister(DstRC);
3993 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_MBCNT_LO_U32_B32_e64), ThreadIDReg)
3997 Register XORReg = MRI->createVirtualRegister(DstRC);
4002 Register ANDReg = MRI->createVirtualRegister(DstRC);
4007 Register CompareReg = MRI->createVirtualRegister(
4008 TRI.getRegClass(AMDGPU::SReg_64_XEXECRegClassID));
4009 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_CMP_EQ_U32_e64), CompareReg)
4014 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
4022 MI.eraseFromParent();
4031 unsigned NumOpcodes = 0;
4044 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4055 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4069 if (Src.size() == 3) {
4076 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4077 if (Src[
I] ==
LHS) {
4087 Bits = SrcBits[Src.size()];
4093 switch (
MI->getOpcode()) {
4094 case TargetOpcode::G_AND:
4095 case TargetOpcode::G_OR:
4096 case TargetOpcode::G_XOR: {
4101 if (!getOperandBits(
LHS, LHSBits) ||
4102 !getOperandBits(
RHS, RHSBits)) {
4104 return std::make_pair(0, 0);
4110 NumOpcodes +=
Op.first;
4111 LHSBits =
Op.second;
4116 NumOpcodes +=
Op.first;
4117 RHSBits =
Op.second;
4122 return std::make_pair(0, 0);
4126 switch (
MI->getOpcode()) {
4127 case TargetOpcode::G_AND:
4128 TTbl = LHSBits & RHSBits;
4130 case TargetOpcode::G_OR:
4131 TTbl = LHSBits | RHSBits;
4133 case TargetOpcode::G_XOR:
4134 TTbl = LHSBits ^ RHSBits;
4140 return std::make_pair(NumOpcodes + 1, TTbl);
4143bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
4144 if (!Subtarget->hasBitOp3Insts())
4148 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4149 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
4155 unsigned NumOpcodes;
4157 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
4161 if (NumOpcodes < 2 || Src.empty())
4164 const bool IsB32 = MRI->getType(DstReg) ==
LLT::scalar(32);
4165 if (NumOpcodes == 2 && IsB32) {
4173 }
else if (NumOpcodes < 4) {
4180 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
4181 if (!IsB32 && STI.hasTrue16BitInsts())
4182 Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
4183 : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
4184 unsigned CBL = STI.getConstantBusLimit(
Opc);
4185 MachineBasicBlock *
MBB =
MI.getParent();
4188 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4189 const RegisterBank *RB = RBI.getRegBank(Src[
I], *MRI, TRI);
4190 if (RB->
getID() != AMDGPU::SGPRRegBankID)
4196 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4207 while (Src.size() < 3)
4208 Src.push_back(Src[0]);
4225 MI.eraseFromParent();
4230bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
4232 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
4235 MachineInstr *
DefMI = MRI->getVRegDef(SrcReg);
4237 Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
4239 MachineBasicBlock *
MBB =
MI.getParent();
4243 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4246 .
addImm(Subtarget->getWavefrontSizeLog2())
4253 MI.eraseFromParent();
4259 if (!
I.isPreISelOpcode()) {
4261 return selectCOPY(
I);
4265 switch (
I.getOpcode()) {
4266 case TargetOpcode::G_AND:
4267 case TargetOpcode::G_OR:
4268 case TargetOpcode::G_XOR:
4269 if (selectBITOP3(
I))
4273 return selectG_AND_OR_XOR(
I);
4274 case TargetOpcode::G_ADD:
4275 case TargetOpcode::G_SUB:
4276 case TargetOpcode::G_PTR_ADD:
4279 return selectG_ADD_SUB(
I);
4280 case TargetOpcode::G_UADDO:
4281 case TargetOpcode::G_USUBO:
4282 case TargetOpcode::G_UADDE:
4283 case TargetOpcode::G_USUBE:
4284 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
4285 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4286 case AMDGPU::G_AMDGPU_MAD_I64_I32:
4287 return selectG_AMDGPU_MAD_64_32(
I);
4288 case TargetOpcode::G_INTTOPTR:
4289 case TargetOpcode::G_BITCAST:
4290 case TargetOpcode::G_PTRTOINT:
4291 case TargetOpcode::G_FREEZE:
4292 return selectCOPY(
I);
4293 case TargetOpcode::G_FNEG:
4296 return selectG_FNEG(
I);
4297 case TargetOpcode::G_FABS:
4300 return selectG_FABS(
I);
4301 case TargetOpcode::G_EXTRACT:
4302 return selectG_EXTRACT(
I);
4303 case TargetOpcode::G_MERGE_VALUES:
4304 case TargetOpcode::G_CONCAT_VECTORS:
4305 return selectG_MERGE_VALUES(
I);
4306 case TargetOpcode::G_UNMERGE_VALUES:
4307 return selectG_UNMERGE_VALUES(
I);
4308 case TargetOpcode::G_BUILD_VECTOR:
4309 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4310 return selectG_BUILD_VECTOR(
I);
4311 case TargetOpcode::G_IMPLICIT_DEF:
4312 return selectG_IMPLICIT_DEF(
I);
4313 case TargetOpcode::G_INSERT:
4314 return selectG_INSERT(
I);
4315 case TargetOpcode::G_INTRINSIC:
4316 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4317 return selectG_INTRINSIC(
I);
4318 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4319 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4320 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4321 case TargetOpcode::G_ICMP:
4322 case TargetOpcode::G_FCMP:
4323 if (selectG_ICMP_or_FCMP(
I))
4326 case TargetOpcode::G_LOAD:
4327 case TargetOpcode::G_ZEXTLOAD:
4328 case TargetOpcode::G_SEXTLOAD:
4329 case TargetOpcode::G_STORE:
4330 case TargetOpcode::G_ATOMIC_CMPXCHG:
4331 case TargetOpcode::G_ATOMICRMW_XCHG:
4332 case TargetOpcode::G_ATOMICRMW_ADD:
4333 case TargetOpcode::G_ATOMICRMW_SUB:
4334 case TargetOpcode::G_ATOMICRMW_AND:
4335 case TargetOpcode::G_ATOMICRMW_OR:
4336 case TargetOpcode::G_ATOMICRMW_XOR:
4337 case TargetOpcode::G_ATOMICRMW_MIN:
4338 case TargetOpcode::G_ATOMICRMW_MAX:
4339 case TargetOpcode::G_ATOMICRMW_UMIN:
4340 case TargetOpcode::G_ATOMICRMW_UMAX:
4341 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4342 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4343 case TargetOpcode::G_ATOMICRMW_USUB_COND:
4344 case TargetOpcode::G_ATOMICRMW_USUB_SAT:
4345 case TargetOpcode::G_ATOMICRMW_FADD:
4346 case TargetOpcode::G_ATOMICRMW_FMIN:
4347 case TargetOpcode::G_ATOMICRMW_FMAX:
4348 return selectG_LOAD_STORE_ATOMICRMW(
I);
4349 case TargetOpcode::G_SELECT:
4350 return selectG_SELECT(
I);
4351 case TargetOpcode::G_TRUNC:
4352 return selectG_TRUNC(
I);
4353 case TargetOpcode::G_SEXT:
4354 case TargetOpcode::G_ZEXT:
4355 case TargetOpcode::G_ANYEXT:
4356 case TargetOpcode::G_SEXT_INREG:
4360 if (MRI->getType(
I.getOperand(1).getReg()) !=
LLT::scalar(1) &&
4363 return selectG_SZA_EXT(
I);
4364 case TargetOpcode::G_FPEXT:
4365 if (selectG_FPEXT(
I))
4368 case TargetOpcode::G_BRCOND:
4369 return selectG_BRCOND(
I);
4370 case TargetOpcode::G_GLOBAL_VALUE:
4371 return selectG_GLOBAL_VALUE(
I);
4372 case TargetOpcode::G_PTRMASK:
4373 return selectG_PTRMASK(
I);
4374 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4375 return selectG_EXTRACT_VECTOR_ELT(
I);
4376 case TargetOpcode::G_INSERT_VECTOR_ELT:
4377 return selectG_INSERT_VECTOR_ELT(
I);
4378 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4379 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4380 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4381 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4382 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4385 assert(Intr &&
"not an image intrinsic with image pseudo");
4386 return selectImageIntrinsic(
I, Intr);
4388 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
4389 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4390 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
4391 return selectBVHIntersectRayIntrinsic(
I);
4392 case AMDGPU::G_SBFX:
4393 case AMDGPU::G_UBFX:
4394 return selectG_SBFX_UBFX(
I);
4395 case AMDGPU::G_SI_CALL:
4396 I.setDesc(TII.get(AMDGPU::SI_CALL));
4398 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4399 return selectWaveAddress(
I);
4400 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
4401 I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
4404 case AMDGPU::G_STACKRESTORE:
4405 return selectStackRestore(
I);
4407 return selectPHI(
I);
4408 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4409 return selectCOPY_SCC_VCC(
I);
4410 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4411 return selectCOPY_VCC_SCC(
I);
4412 case AMDGPU::G_AMDGPU_READANYLANE:
4413 return selectReadAnyLane(
I);
4414 case TargetOpcode::G_CONSTANT:
4415 case TargetOpcode::G_FCONSTANT:
4423AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4430std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4431 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4435 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4436 Src =
MI->getOperand(1).getReg();
4439 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4444 if (
LHS &&
LHS->isZero()) {
4446 Src =
MI->getOperand(2).getReg();
4450 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4451 Src =
MI->getOperand(1).getReg();
4458 return std::pair(Src, Mods);
4461Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4463 bool ForceVGPR)
const {
4464 if ((Mods != 0 || ForceVGPR) &&
4465 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
4472 TII.
get(AMDGPU::COPY), VGPRSrc)
4484AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4486 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); }
4491AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4494 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4497 [=](MachineInstrBuilder &MIB) {
4498 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4500 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4501 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4502 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4507AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4510 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4515 [=](MachineInstrBuilder &MIB) {
4516 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4518 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4519 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4520 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4525AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4527 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); },
4528 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4529 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4534AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4537 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4540 [=](MachineInstrBuilder &MIB) {
4541 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4543 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4548AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4552 std::tie(Src, Mods) =
4553 selectVOP3ModsImpl(Root.
getReg(),
false);
4556 [=](MachineInstrBuilder &MIB) {
4557 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4559 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4564AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4567 std::tie(Src, Mods) =
4568 selectVOP3ModsImpl(Root.
getReg(),
true,
4572 [=](MachineInstrBuilder &MIB) {
4573 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4575 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4580AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4583 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4586 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4611 if (
MI->getOpcode() != AMDGPU::G_TRUNC)
4614 unsigned DstSize =
MRI.getType(
MI->getOperand(0).getReg()).getSizeInBits();
4615 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4616 return DstSize * 2 == SrcSize;
4622 if (
MI->getOpcode() != AMDGPU::G_LSHR)
4626 std::optional<ValueAndVReg> ShiftAmt;
4629 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4630 unsigned Shift = ShiftAmt->Value.getZExtValue();
4631 return Shift * 2 == SrcSize;
4639 if (
MI->getOpcode() != AMDGPU::G_SHL)
4643 std::optional<ValueAndVReg> ShiftAmt;
4646 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4647 unsigned Shift = ShiftAmt->Value.getZExtValue();
4648 return Shift * 2 == SrcSize;
4656 if (
MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
4658 return MI->getNumOperands() == 3 &&
MI->getOperand(0).isDef() &&
4659 MI->getOperand(1).isDef() && !
MI->getOperand(2).isDef();
4829static std::optional<std::pair<Register, SrcStatus>>
4834 unsigned Opc =
MI->getOpcode();
4838 case AMDGPU::G_BITCAST:
4839 return std::optional<std::pair<Register, SrcStatus>>(
4840 {
MI->getOperand(1).getReg(), Curr.second});
4842 if (
MI->getOperand(1).getReg().isPhysical())
4843 return std::nullopt;
4844 return std::optional<std::pair<Register, SrcStatus>>(
4845 {
MI->getOperand(1).getReg(), Curr.second});
4846 case AMDGPU::G_FNEG: {
4849 return std::nullopt;
4850 return std::optional<std::pair<Register, SrcStatus>>(
4851 {
MI->getOperand(1).getReg(), Stat});
4858 switch (Curr.second) {
4861 return std::optional<std::pair<Register, SrcStatus>>(
4864 if (Curr.first ==
MI->getOperand(0).getReg())
4865 return std::optional<std::pair<Register, SrcStatus>>(
4867 return std::optional<std::pair<Register, SrcStatus>>(
4879 return std::optional<std::pair<Register, SrcStatus>>(
4883 if (Curr.first ==
MI->getOperand(0).getReg())
4884 return std::optional<std::pair<Register, SrcStatus>>(
4886 return std::optional<std::pair<Register, SrcStatus>>(
4892 return std::optional<std::pair<Register, SrcStatus>>(
4897 return std::optional<std::pair<Register, SrcStatus>>(
4902 return std::optional<std::pair<Register, SrcStatus>>(
4907 return std::optional<std::pair<Register, SrcStatus>>(
4913 return std::nullopt;
4923 bool HasNeg =
false;
4925 bool HasOpsel =
true;
4930 unsigned Opc =
MI->getOpcode();
4932 if (
Opc < TargetOpcode::GENERIC_OP_END) {
4935 }
else if (
Opc == TargetOpcode::G_INTRINSIC) {
4938 if (IntrinsicID == Intrinsic::amdgcn_fdot2)
4962 while (
Depth <= MaxDepth && Curr.has_value()) {
4965 Statlist.push_back(Curr.value());
4972static std::pair<Register, SrcStatus>
4979 while (
Depth <= MaxDepth && Curr.has_value()) {
4985 LastSameOrNeg = Curr.value();
4990 return LastSameOrNeg;
4995 unsigned Width1 =
MRI.getType(Reg1).getSizeInBits();
4996 unsigned Width2 =
MRI.getType(Reg2).getSizeInBits();
4997 return Width1 == Width2;
5033 IsHalfState(HiStat);
5036std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
5042 return {RootReg, Mods};
5045 SearchOptions SO(RootReg, MRI);
5056 MachineInstr *
MI = MRI.getVRegDef(Stat.first);
5058 if (
MI->getOpcode() != AMDGPU::G_BUILD_VECTOR ||
MI->getNumOperands() != 3 ||
5059 (IsDOT && Subtarget->hasDOTOpSelHazard())) {
5061 return {Stat.first, Mods};
5067 if (StatlistHi.
empty()) {
5069 return {Stat.first, Mods};
5075 if (StatlistLo.
empty()) {
5077 return {Stat.first, Mods};
5080 for (
int I = StatlistHi.
size() - 1;
I >= 0;
I--) {
5081 for (
int J = StatlistLo.
size() - 1; J >= 0; J--) {
5082 if (StatlistHi[
I].first == StatlistLo[J].first &&
5084 StatlistHi[
I].first, RootReg, TII, MRI))
5085 return {StatlistHi[
I].first,
5086 updateMods(StatlistHi[
I].second, StatlistLo[J].second, Mods)};
5092 return {Stat.first, Mods};
5102 return RB->
getID() == RBNo;
5119 if (
checkRB(RootReg, AMDGPU::SGPRRegBankID, RBI,
MRI,
TRI) ||
5124 if (
MI->getOpcode() == AMDGPU::COPY && NewReg ==
MI->getOperand(1).getReg()) {
5130 Register DstReg =
MRI.cloneVirtualRegister(RootReg);
5133 BuildMI(*BB,
MI,
MI->getDebugLoc(),
TII.get(AMDGPU::COPY), DstReg)
5141AMDGPUInstructionSelector::selectVOP3PRetHelper(
MachineOperand &Root,
5146 std::tie(
Reg, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI, IsDOT);
5150 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
5151 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5156AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
5158 return selectVOP3PRetHelper(Root);
5162AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
5164 return selectVOP3PRetHelper(Root,
true);
5168AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
5171 "expected i1 value");
5177 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5185 switch (Elts.
size()) {
5187 DstRegClass = &AMDGPU::VReg_256RegClass;
5190 DstRegClass = &AMDGPU::VReg_128RegClass;
5193 DstRegClass = &AMDGPU::VReg_64RegClass;
5200 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
5201 .addDef(
MRI.createVirtualRegister(DstRegClass));
5202 for (
unsigned i = 0; i < Elts.
size(); ++i) {
5213 if (ModOpcode == TargetOpcode::G_FNEG) {
5217 for (
auto El : Elts) {
5223 if (Elts.size() != NegAbsElts.
size()) {
5232 assert(ModOpcode == TargetOpcode::G_FABS);
5240AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
5246 assert(BV->getNumSources() > 0);
5248 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
5249 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
5252 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
5253 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
5260 if (BV->getNumSources() == EltsF32.
size()) {
5266 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5267 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5271AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
5277 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5285 if (CV->getNumSources() == EltsV2F16.
size()) {
5292 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5293 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5297AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
5303 assert(CV->getNumSources() > 0);
5304 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
5306 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
5310 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5311 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
5318 if (CV->getNumSources() == EltsV2F16.
size()) {
5325 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5326 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5330AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
5331 std::optional<FPValueAndVReg> FPValReg;
5333 if (TII.isInlineConstant(FPValReg->Value)) {
5334 return {{[=](MachineInstrBuilder &MIB) {
5335 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
5345 if (TII.isInlineConstant(ICst)) {
5355AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
5361 std::optional<ValueAndVReg> ShiftAmt;
5363 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5364 ShiftAmt->Value.getZExtValue() % 8 == 0) {
5365 Key = ShiftAmt->Value.getZExtValue() / 8;
5370 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5371 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5376AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
5383 std::optional<ValueAndVReg> ShiftAmt;
5385 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5386 ShiftAmt->Value.getZExtValue() == 16) {
5392 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5393 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5398AMDGPUInstructionSelector::selectSWMMACIndex32(
MachineOperand &Root)
const {
5405 S32 = matchAnyExtendFromS32(Src);
5409 if (
Def->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
5414 Src =
Def->getOperand(2).getReg();
5421 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5422 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5427AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
5430 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5434 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5435 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5441AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
5444 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5450 [=](MachineInstrBuilder &MIB) {
5452 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5454 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5459AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
5462 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5468 [=](MachineInstrBuilder &MIB) {
5470 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5472 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5479bool AMDGPUInstructionSelector::selectScaleOffset(
MachineOperand &Root,
5481 bool IsSigned)
const {
5482 if (!Subtarget->hasScaleOffset())
5486 MachineMemOperand *MMO = *
MI.memoperands_begin();
5498 OffsetReg =
Def->Reg;
5513 m_BinOp(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO : AMDGPU::S_MUL_U64,
5517 (
Mul->getOpcode() == (IsSigned ? AMDGPU::G_AMDGPU_MAD_I64_I32
5518 : AMDGPU::G_AMDGPU_MAD_U64_U32) ||
5519 (IsSigned &&
Mul->getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32 &&
5520 VT->signBitIsZero(
Mul->getOperand(2).getReg()))) &&
5533bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
5537 bool *ScaleOffset)
const {
5539 MachineBasicBlock *
MBB =
MI->getParent();
5544 getAddrModeInfo(*
MI, *MRI, AddrInfo);
5546 if (AddrInfo.
empty())
5549 const GEPInfo &GEPI = AddrInfo[0];
5550 std::optional<int64_t> EncodedImm;
5553 *ScaleOffset =
false;
5558 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
5559 AddrInfo.
size() > 1) {
5560 const GEPInfo &GEPI2 = AddrInfo[1];
5561 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
5562 Register OffsetReg = GEPI2.SgprParts[1];
5565 selectScaleOffset(Root, OffsetReg,
false );
5566 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5568 Base = GEPI2.SgprParts[0];
5569 *SOffset = OffsetReg;
5578 auto SKnown =
VT->getKnownBits(*SOffset);
5579 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
5591 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
5592 Base = GEPI.SgprParts[0];
5598 if (SOffset && GEPI.SgprParts.size() == 1 &&
isUInt<32>(GEPI.Imm) &&
5604 Base = GEPI.SgprParts[0];
5605 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5606 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
5611 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
5612 Register OffsetReg = GEPI.SgprParts[1];
5614 *ScaleOffset = selectScaleOffset(Root, OffsetReg,
false );
5615 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5617 Base = GEPI.SgprParts[0];
5618 *SOffset = OffsetReg;
5627AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
5630 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset,
5632 return std::nullopt;
5634 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5635 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset); }}};
5639AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
5641 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
5643 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
5644 return std::nullopt;
5646 const GEPInfo &GEPInfo = AddrInfo[0];
5647 Register PtrReg = GEPInfo.SgprParts[0];
5648 std::optional<int64_t> EncodedImm =
5651 return std::nullopt;
5654 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrReg); },
5655 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }
5660AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
5663 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr,
5665 return std::nullopt;
5668 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5669 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5670 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5674AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
5678 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset, &ScaleOffset))
5679 return std::nullopt;
5682 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5683 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5685 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5688std::pair<Register, int>
5689AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
5690 uint64_t FlatVariant)
const {
5695 if (!STI.hasFlatInstOffsets())
5699 int64_t ConstOffset;
5701 std::tie(PtrBase, ConstOffset, IsInBounds) =
5702 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5708 if (ConstOffset == 0 ||
5710 !isFlatScratchBaseLegal(Root.
getReg())) ||
5714 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
5715 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
5718 return std::pair(PtrBase, ConstOffset);
5722AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
5726 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5727 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5732AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
5736 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5737 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5742AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
5746 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5747 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5753AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root,
5755 bool NeedIOffset)
const {
5758 int64_t ConstOffset;
5759 int64_t ImmOffset = 0;
5763 std::tie(PtrBase, ConstOffset, std::ignore) =
5764 getPtrBaseWithConstantOffset(Addr, *MRI);
5766 if (ConstOffset != 0) {
5771 ImmOffset = ConstOffset;
5774 if (isSGPR(PtrBaseDef->Reg)) {
5775 if (ConstOffset > 0) {
5781 int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
5783 std::tie(SplitImmOffset, RemainderOffset) =
5788 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
5791 MachineBasicBlock *
MBB =
MI->getParent();
5793 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5795 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5797 .
addImm(RemainderOffset);
5801 [=](MachineInstrBuilder &MIB) {
5804 [=](MachineInstrBuilder &MIB) {
5807 [=](MachineInstrBuilder &MIB) { MIB.
addImm(SplitImmOffset); },
5808 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5811 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrBase); },
5812 [=](MachineInstrBuilder &MIB) {
5815 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5825 unsigned NumLiterals =
5826 !TII.isInlineConstant(APInt(32,
Lo_32(ConstOffset))) +
5827 !TII.isInlineConstant(APInt(32,
Hi_32(ConstOffset)));
5828 if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
5829 return std::nullopt;
5836 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5841 if (isSGPR(SAddr)) {
5842 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
5846 bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
5847 Subtarget->hasSignedGVSOffset());
5848 if (
Register VOffset = matchExtendFromS32OrS32(
5849 PtrBaseOffset, Subtarget->hasSignedGVSOffset())) {
5851 return {{[=](MachineInstrBuilder &MIB) {
5854 [=](MachineInstrBuilder &MIB) {
5857 [=](MachineInstrBuilder &MIB) {
5860 [=](MachineInstrBuilder &MIB) {
5864 return {{[=](MachineInstrBuilder &MIB) {
5867 [=](MachineInstrBuilder &MIB) {
5870 [=](MachineInstrBuilder &MIB) {
5880 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
5881 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
5882 return std::nullopt;
5887 MachineBasicBlock *
MBB =
MI->getParent();
5888 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5890 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
5895 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
5896 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
5897 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5898 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
5901 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
5902 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
5903 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
5908AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
5909 return selectGlobalSAddr(Root, 0);
5913AMDGPUInstructionSelector::selectGlobalSAddrCPol(
MachineOperand &Root)
const {
5919 return selectGlobalSAddr(Root, PassedCPol);
5923AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(
MachineOperand &Root)
const {
5929 return selectGlobalSAddr(Root, PassedCPol);
5933AMDGPUInstructionSelector::selectGlobalSAddrGLC(
MachineOperand &Root)
const {
5938AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
5945 return selectGlobalSAddr(Root, PassedCPol,
false);
5949AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0(
5956 return selectGlobalSAddr(Root, PassedCPol,
false);
5960AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
5963 int64_t ConstOffset;
5964 int64_t ImmOffset = 0;
5968 std::tie(PtrBase, ConstOffset, std::ignore) =
5969 getPtrBaseWithConstantOffset(Addr, *MRI);
5971 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
5975 ImmOffset = ConstOffset;
5979 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5980 int FI = AddrDef->MI->getOperand(1).
getIndex();
5983 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
5989 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5990 Register LHS = AddrDef->MI->getOperand(1).getReg();
5991 Register RHS = AddrDef->MI->getOperand(2).getReg();
5995 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
5996 isSGPR(RHSDef->Reg)) {
5997 int FI = LHSDef->MI->getOperand(1).getIndex();
6001 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6003 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
6011 return std::nullopt;
6014 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SAddr); },
6015 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
6020bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
6022 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
6028 auto VKnown =
VT->getKnownBits(VAddr);
6031 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
6032 uint64_t
SMax = SKnown.getMaxValue().getZExtValue();
6033 return (VMax & 3) + (
SMax & 3) >= 4;
6037AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
6040 int64_t ConstOffset;
6041 int64_t ImmOffset = 0;
6045 std::tie(PtrBase, ConstOffset, std::ignore) =
6046 getPtrBaseWithConstantOffset(Addr, *MRI);
6049 if (ConstOffset != 0 &&
6053 ImmOffset = ConstOffset;
6057 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
6058 return std::nullopt;
6060 Register RHS = AddrDef->MI->getOperand(2).getReg();
6061 if (RBI.getRegBank(
RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
6062 return std::nullopt;
6064 Register LHS = AddrDef->MI->getOperand(1).getReg();
6067 if (OrigAddr != Addr) {
6068 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
6069 return std::nullopt;
6071 if (!isFlatScratchBaseLegalSV(OrigAddr))
6072 return std::nullopt;
6075 if (checkFlatScratchSVSSwizzleBug(
RHS,
LHS, ImmOffset))
6076 return std::nullopt;
6078 unsigned CPol = selectScaleOffset(Root,
RHS,
true )
6082 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
6083 int FI = LHSDef->MI->getOperand(1).getIndex();
6085 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
6087 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6088 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
6097 return std::nullopt;
6100 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
6101 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
LHS); },
6102 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6103 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
6108AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
6110 MachineBasicBlock *
MBB =
MI->getParent();
6112 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6117 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6122 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
6126 return {{[=](MachineInstrBuilder &MIB) {
6129 [=](MachineInstrBuilder &MIB) {
6132 [=](MachineInstrBuilder &MIB) {
6137 [=](MachineInstrBuilder &MIB) {
6146 std::optional<int> FI;
6149 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6151 int64_t ConstOffset;
6152 std::tie(PtrBase, ConstOffset, std::ignore) =
6153 getPtrBaseWithConstantOffset(VAddr, *MRI);
6154 if (ConstOffset != 0) {
6155 if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
6156 (!STI.privateMemoryResourceIsRangeChecked() ||
6157 VT->signBitIsZero(PtrBase))) {
6158 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
6159 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
6165 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
6169 return {{[=](MachineInstrBuilder &MIB) {
6172 [=](MachineInstrBuilder &MIB) {
6178 [=](MachineInstrBuilder &MIB) {
6183 [=](MachineInstrBuilder &MIB) {
6188bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
6193 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6198 return VT->signBitIsZero(
Base);
6201bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
6203 unsigned Size)
const {
6204 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
6209 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6214 return VT->signBitIsZero(
Base);
6219 return Addr->
getOpcode() == TargetOpcode::G_OR ||
6220 (Addr->
getOpcode() == TargetOpcode::G_PTR_ADD &&
6227bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
6235 if (STI.hasSignedScratchOffsets())
6241 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
6242 std::optional<ValueAndVReg> RhsValReg =
6248 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
6249 RhsValReg->Value.getSExtValue() > -0x40000000)
6253 return VT->signBitIsZero(
LHS);
6258bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
6266 if (STI.hasSignedScratchOffsets())
6271 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6276bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
6280 if (STI.hasSignedScratchOffsets())
6285 std::optional<DefinitionAndSourceRegister> BaseDef =
6287 std::optional<ValueAndVReg> RHSOffset =
6297 (RHSOffset->Value.getSExtValue() < 0 &&
6298 RHSOffset->Value.getSExtValue() > -0x40000000)))
6301 Register LHS = BaseDef->MI->getOperand(1).getReg();
6302 Register RHS = BaseDef->MI->getOperand(2).getReg();
6303 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6306bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
6307 unsigned ShAmtBits)
const {
6308 assert(
MI.getOpcode() == TargetOpcode::G_AND);
6310 std::optional<APInt>
RHS =
6315 if (
RHS->countr_one() >= ShAmtBits)
6318 const APInt &LHSKnownZeros =
VT->getKnownZeroes(
MI.getOperand(1).getReg());
6319 return (LHSKnownZeros | *
RHS).countr_one() >= ShAmtBits;
6323AMDGPUInstructionSelector::selectMUBUFScratchOffset(
6326 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6328 std::optional<DefinitionAndSourceRegister>
Def =
6330 assert(Def &&
"this shouldn't be an optional result");
6335 [=](MachineInstrBuilder &MIB) {
6338 [=](MachineInstrBuilder &MIB) {
6341 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
6352 if (!TII.isLegalMUBUFImmOffset(
Offset))
6360 [=](MachineInstrBuilder &MIB) {
6363 [=](MachineInstrBuilder &MIB) {
6371 !TII.isLegalMUBUFImmOffset(
Offset))
6375 [=](MachineInstrBuilder &MIB) {
6378 [=](MachineInstrBuilder &MIB) {
6385std::pair<Register, unsigned>
6386AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
6387 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6388 int64_t ConstAddr = 0;
6392 std::tie(PtrBase,
Offset, std::ignore) =
6393 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6396 if (isDSOffsetLegal(PtrBase,
Offset)) {
6398 return std::pair(PtrBase,
Offset);
6400 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6409 return std::pair(Root.
getReg(), 0);
6413AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
6416 std::tie(
Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
6418 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6424AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
6425 return selectDSReadWrite2(Root, 4);
6429AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
6430 return selectDSReadWrite2(Root, 8);
6434AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
6435 unsigned Size)
const {
6440 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6442 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset+1); }
6446std::pair<Register, unsigned>
6447AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
6448 unsigned Size)
const {
6449 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6450 int64_t ConstAddr = 0;
6454 std::tie(PtrBase,
Offset, std::ignore) =
6455 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6458 int64_t OffsetValue0 =
Offset;
6460 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
6462 return std::pair(PtrBase, OffsetValue0 /
Size);
6464 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6472 return std::pair(Root.
getReg(), 0);
6480std::tuple<Register, int64_t, bool>
6481AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
6484 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
6485 return {Root, 0,
false};
6488 std::optional<ValueAndVReg> MaybeOffset =
6491 return {Root, 0,
false};
6506 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6507 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6508 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6509 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6511 B.buildInstr(AMDGPU::S_MOV_B32)
6514 B.buildInstr(AMDGPU::S_MOV_B32)
6521 B.buildInstr(AMDGPU::REG_SEQUENCE)
6524 .addImm(AMDGPU::sub0)
6526 .addImm(AMDGPU::sub1);
6530 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6531 B.buildInstr(AMDGPU::S_MOV_B64)
6536 B.buildInstr(AMDGPU::REG_SEQUENCE)
6539 .addImm(AMDGPU::sub0_sub1)
6541 .addImm(AMDGPU::sub2_sub3);
6548 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6557 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6564AMDGPUInstructionSelector::MUBUFAddressData
6565AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
6566 MUBUFAddressData
Data;
6572 std::tie(PtrBase,
Offset, std::ignore) =
6573 getPtrBaseWithConstantOffset(Src, *MRI);
6579 if (MachineInstr *InputAdd
6581 Data.N2 = InputAdd->getOperand(1).getReg();
6582 Data.N3 = InputAdd->getOperand(2).getReg();
6597bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr)
const {
6603 const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
6604 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
6610void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
6612 if (TII.isLegalMUBUFImmOffset(ImmOffset))
6616 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6617 B.buildInstr(AMDGPU::S_MOV_B32)
6623bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
6628 if (!STI.hasAddr64() || STI.useFlatForGlobal())
6631 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6632 if (!shouldUseAddr64(AddrData))
6638 Offset = AddrData.Offset;
6644 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6646 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6659 }
else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6670 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6674bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
6679 if (STI.useFlatForGlobal())
6682 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6683 if (shouldUseAddr64(AddrData))
6689 Offset = AddrData.Offset;
6695 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6700AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
6706 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
6712 [=](MachineInstrBuilder &MIB) {
6715 [=](MachineInstrBuilder &MIB) {
6718 [=](MachineInstrBuilder &MIB) {
6721 else if (STI.hasRestrictedSOffset())
6722 MIB.
addReg(AMDGPU::SGPR_NULL);
6726 [=](MachineInstrBuilder &MIB) {
6736AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
6741 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
6745 [=](MachineInstrBuilder &MIB) {
6748 [=](MachineInstrBuilder &MIB) {
6751 else if (STI.hasRestrictedSOffset())
6752 MIB.
addReg(AMDGPU::SGPR_NULL);
6764AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
6769 SOffset = AMDGPU::SGPR_NULL;
6771 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); }}};
6775static std::optional<uint64_t>
6779 if (!OffsetVal || !
isInt<32>(*OffsetVal))
6780 return std::nullopt;
6781 return Lo_32(*OffsetVal);
6785AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
6786 std::optional<uint64_t> OffsetVal =
6791 std::optional<int64_t> EncodedImm =
6796 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6800AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
6807 std::optional<int64_t> EncodedImm =
6812 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6816AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
6824 return std::nullopt;
6826 std::optional<int64_t> EncodedOffset =
6829 return std::nullopt;
6832 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
6833 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedOffset); }}};
6836std::pair<Register, unsigned>
6837AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
6838 bool &Matched)
const {
6843 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
6853 const auto CheckAbsNeg = [&]() {
6858 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
6889AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
6894 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
6899 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
6900 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
6905AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
6909 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
6912 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
6913 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
6917bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
6921 Register CCReg =
I.getOperand(0).getReg();
6926 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
6927 .
addImm(
I.getOperand(2).getImm());
6931 I.eraseFromParent();
6932 return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
6936bool AMDGPUInstructionSelector::selectSGetBarrierState(
6940 const MachineOperand &BarOp =
I.getOperand(2);
6941 std::optional<int64_t> BarValImm =
6945 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6949 MachineInstrBuilder MIB;
6950 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
6951 : AMDGPU::S_GET_BARRIER_STATE_M0;
6954 auto DstReg =
I.getOperand(0).getReg();
6955 const TargetRegisterClass *DstRC =
6956 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6957 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
6963 I.eraseFromParent();
6968 if (HasInlineConst) {
6972 case Intrinsic::amdgcn_s_barrier_join:
6973 return AMDGPU::S_BARRIER_JOIN_IMM;
6974 case Intrinsic::amdgcn_s_wakeup_barrier:
6975 return AMDGPU::S_WAKEUP_BARRIER_IMM;
6976 case Intrinsic::amdgcn_s_get_named_barrier_state:
6977 return AMDGPU::S_GET_BARRIER_STATE_IMM;
6983 case Intrinsic::amdgcn_s_barrier_join:
6984 return AMDGPU::S_BARRIER_JOIN_M0;
6985 case Intrinsic::amdgcn_s_wakeup_barrier:
6986 return AMDGPU::S_WAKEUP_BARRIER_M0;
6987 case Intrinsic::amdgcn_s_get_named_barrier_state:
6988 return AMDGPU::S_GET_BARRIER_STATE_M0;
6993bool AMDGPUInstructionSelector::selectNamedBarrierInit(
6997 const MachineOperand &BarOp =
I.getOperand(1);
6998 const MachineOperand &CntOp =
I.getOperand(2);
7001 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7007 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7014 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7020 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7021 constexpr unsigned ShAmt = 16;
7027 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7037 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
7038 ? AMDGPU::S_BARRIER_INIT_M0
7039 : AMDGPU::S_BARRIER_SIGNAL_M0;
7040 MachineInstrBuilder MIB;
7043 I.eraseFromParent();
7047bool AMDGPUInstructionSelector::selectNamedBarrierInst(
7051 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
7054 std::optional<int64_t> BarValImm =
7059 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7065 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7071 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
7076 MachineInstrBuilder MIB;
7080 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
7081 auto DstReg =
I.getOperand(0).getReg();
7082 const TargetRegisterClass *DstRC =
7083 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
7084 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
7090 auto BarId = ((*BarValImm) >> 4) & 0x3F;
7094 I.eraseFromParent();
7101 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7102 "Expected G_CONSTANT");
7103 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
7109 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7110 "Expected G_CONSTANT");
7111 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
7117 const MachineOperand &
Op =
MI.getOperand(1);
7118 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1);
7119 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
7125 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7126 "Expected G_CONSTANT");
7127 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
7135 const MachineOperand &
Op =
MI.getOperand(
OpIdx);
7152 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7156void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
7158 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7163void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
7165 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7171void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
7173 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7178void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
7180 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7186void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
7188 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7193void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
7195 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7200void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
7202 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7207void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
7209 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7218 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7227 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7234void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
7236 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7237 const uint32_t Cpol =
MI.getOperand(
OpIdx).getImm() &
7252 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
7254 assert(ExpVal != INT_MIN);
7272 if (
MI.getOperand(
OpIdx).getImm())
7274 MIB.
addImm((int64_t)Mods);
7281 if (
MI.getOperand(
OpIdx).getImm())
7283 MIB.
addImm((int64_t)Mods);
7289 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7297 MIB.
addImm((int64_t)Mods);
7303 uint32_t
V =
MI.getOperand(2).getImm();
7306 if (!Subtarget->hasSafeCUPrefetch())
7312void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
7314 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7323bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
7324 return TII.isInlineConstant(Imm);
7327bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
7328 return TII.isInlineConstant(Imm);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static Register getLegalRegBank(Register NewReg, Register RootReg, const AMDGPURegisterBankInfo &RBI, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const SIInstrInfo &TII)
static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is shift left with half bits, such as reg0:2n =G_SHL reg1:2n, CONST(n)
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
static bool checkRB(Register Reg, unsigned int RBNo, const AMDGPURegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods)
static bool isTruncHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is truncating to half, such as reg0:n = G_TRUNC reg1:2n
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static TypeClass isVectorOfTwoOrScalar(Register Reg, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static bool isSameBitWidth(Register Reg1, Register Reg2, const MachineRegisterInfo &MRI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelValueTracking &ValueTracking)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::pair< Register, SrcStatus > getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static std::optional< std::pair< Register, SrcStatus > > calcNextStatus(std::pair< Register, SrcStatus > Curr, const MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg, Register RootReg, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static SmallVector< std::pair< Register, SrcStatus > > getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static bool isUnmergeHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test function, if the MI is reg0:n, reg1:n = G_UNMERGE_VALUES reg2:2n
static SrcStatus getNegStatus(Register Reg, SrcStatus S, const MachineRegisterInfo &MRI)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is logic shift right with half bits, such as reg0:2n =G_LSHR reg1:2n,...
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Machine Check Debug Module
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
This is used to control valid status that current MI supports.
bool checkOptions(SrcStatus Stat) const
SearchOptions(Register Reg, const MachineRegisterInfo &MRI)
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelValueTracking *VT, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
LLVM_ABI DILocation * get() const
Get the underlying DILocation.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
TypeSize getValue() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setReturnAddressIsTaken(bool s)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis providing profile information.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
SpecificRegisterMatch m_SpecificReg(Register RequestedReg)
Matches a register only if it is equal to RequestedReg.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, TargetOpcode::G_MUL, true > m_GMul(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
NodeAddr< DefNode * > Def
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
FunctionAddr VTableAddr Value
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
@ Default
The result values are uniform if and only if all operands are uniform.
unsigned AtomicNoRetBaseOpcode
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.