29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
64 MRI = &
MF.getRegInfo();
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
83 auto &RegClassOrBank = MRI.getRegClassOrRegBank(
Reg);
84 const TargetRegisterClass *RC =
87 const LLT Ty = MRI.getType(
Reg);
91 return MRI.getVRegDef(
Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
105 MachineOperand &Dst =
MI.getOperand(0);
106 MachineOperand &Src =
MI.getOperand(1);
112 const TargetRegisterClass *DstRC
113 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
114 const TargetRegisterClass *SrcRC
115 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
116 if (!DstRC || DstRC != SrcRC)
119 if (!RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) ||
120 !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
122 const MCInstrDesc &MCID =
MI.getDesc();
124 MI.getOperand(0).setIsEarlyClobber(
true);
129bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
132 I.setDesc(TII.get(TargetOpcode::COPY));
134 const MachineOperand &Src =
I.getOperand(1);
135 MachineOperand &Dst =
I.getOperand(0);
139 if (isVCC(DstReg, *MRI)) {
140 if (SrcReg == AMDGPU::SCC) {
141 const TargetRegisterClass *RC
142 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
145 return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
148 if (!isVCC(SrcReg, *MRI)) {
150 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
153 const TargetRegisterClass *SrcRC
154 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
156 std::optional<ValueAndVReg> ConstVal =
160 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
162 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
164 Register MaskedReg = MRI->createVirtualRegister(SrcRC);
171 assert(Subtarget->useRealTrue16Insts());
172 const int64_t NoMods = 0;
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
179 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
186 bool IsSGPR = TRI.isSGPRClass(SrcRC);
187 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
194 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
200 if (!MRI->getRegClassOrNull(SrcReg))
201 MRI->setRegClass(SrcReg, SrcRC);
206 const TargetRegisterClass *RC =
207 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
208 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
214 for (
const MachineOperand &MO :
I.operands()) {
215 if (MO.getReg().isPhysical())
218 const TargetRegisterClass *RC =
219 TRI.getConstrainedRegClassForOperand(MO, *MRI);
222 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
227bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
230 Register VCCReg =
I.getOperand(1).getReg();
234 if (STI.hasScalarCompareEq64()) {
236 STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
239 Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
240 Cmp =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
248 Register DstReg =
I.getOperand(0).getReg();
252 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
255bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
259 Register DstReg =
I.getOperand(0).getReg();
260 Register SrcReg =
I.getOperand(1).getReg();
261 std::optional<ValueAndVReg> Arg =
265 const int64_t
Value = Arg->Value.getZExtValue();
267 unsigned Opcode = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
274 return RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI);
280 unsigned SelectOpcode =
281 STI.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
290bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
291 Register DstReg =
I.getOperand(0).getReg();
292 Register SrcReg =
I.getOperand(1).getReg();
297 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
304bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
305 const Register DefReg =
I.getOperand(0).getReg();
306 const LLT DefTy = MRI->getType(DefReg);
318 MRI->getRegClassOrRegBank(DefReg);
320 const TargetRegisterClass *DefRC =
329 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
338 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
339 const Register SrcReg =
I.getOperand(i).getReg();
341 const RegisterBank *RB = MRI->getRegBankOrNull(SrcReg);
343 const LLT SrcTy = MRI->getType(SrcReg);
344 const TargetRegisterClass *SrcRC =
345 TRI.getRegClassForTypeOnBank(SrcTy, *RB);
346 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
351 I.setDesc(TII.get(TargetOpcode::PHI));
352 return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
358 unsigned SubIdx)
const {
362 Register DstReg = MRI->createVirtualRegister(&SubRC);
365 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
367 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
393 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
395 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
397 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
403bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
404 Register DstReg =
I.getOperand(0).getReg();
405 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
407 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
408 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
409 DstRB->
getID() != AMDGPU::VCCRegBankID)
412 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
424bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
427 Register DstReg =
I.getOperand(0).getReg();
429 LLT Ty = MRI->getType(DstReg);
434 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
435 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
436 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
440 const unsigned Opc =
Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
443 .
add(
I.getOperand(1))
444 .
add(
I.getOperand(2))
450 if (STI.hasAddNoCarry()) {
451 const unsigned Opc =
Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
452 I.setDesc(TII.get(
Opc));
458 const unsigned Opc =
Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
460 Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
464 .
add(
I.getOperand(1))
465 .
add(
I.getOperand(2))
471 assert(!
Sub &&
"illegal sub should not reach here");
473 const TargetRegisterClass &RC
474 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
475 const TargetRegisterClass &HalfRC
476 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
478 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
479 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
480 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
481 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
483 Register DstLo = MRI->createVirtualRegister(&HalfRC);
484 Register DstHi = MRI->createVirtualRegister(&HalfRC);
487 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
490 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
495 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
496 Register CarryReg = MRI->createVirtualRegister(CarryRC);
497 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
502 MachineInstr *Addc =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
513 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
520 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
527bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
532 Register Dst0Reg =
I.getOperand(0).getReg();
533 Register Dst1Reg =
I.getOperand(1).getReg();
534 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
535 I.getOpcode() == AMDGPU::G_UADDE;
536 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
537 I.getOpcode() == AMDGPU::G_USUBE;
539 if (isVCC(Dst1Reg, *MRI)) {
540 unsigned NoCarryOpc =
541 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
542 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
543 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
549 Register Src0Reg =
I.getOperand(2).getReg();
550 Register Src1Reg =
I.getOperand(3).getReg();
553 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
554 .
addReg(
I.getOperand(4).getReg());
557 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
558 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
560 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
561 .
add(
I.getOperand(2))
562 .
add(
I.getOperand(3));
564 if (MRI->use_nodbg_empty(Dst1Reg)) {
565 CarryInst.setOperandDead(3);
567 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
569 if (!MRI->getRegClassOrNull(Dst1Reg))
570 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
573 if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
574 !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
575 !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
579 !RBI.constrainGenericRegister(
I.getOperand(4).getReg(),
580 AMDGPU::SReg_32RegClass, *MRI))
587bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
591 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
592 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() &&
593 MRI->use_nodbg_empty(
I.getOperand(1).getReg());
596 if (Subtarget->hasMADIntraFwdBug())
597 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
598 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
600 Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
601 : AMDGPU::V_MAD_NC_I64_I32_e64;
603 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
608 I.setDesc(TII.get(
Opc));
610 I.addImplicitDefUseOperands(*
MF);
611 I.getOperand(0).setIsEarlyClobber(
true);
616bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
618 Register DstReg =
I.getOperand(0).getReg();
619 Register SrcReg =
I.getOperand(1).getReg();
620 LLT DstTy = MRI->getType(DstReg);
621 LLT SrcTy = MRI->getType(SrcReg);
626 unsigned Offset =
I.getOperand(2).getImm();
627 if (
Offset % 32 != 0 || DstSize > 128)
635 const TargetRegisterClass *DstRC =
636 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
637 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
640 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
641 const TargetRegisterClass *SrcRC =
642 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
647 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
652 *SrcRC,
I.getOperand(1));
654 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
661bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
662 MachineBasicBlock *BB =
MI.getParent();
664 LLT DstTy = MRI->getType(DstReg);
665 LLT SrcTy = MRI->getType(
MI.getOperand(1).getReg());
672 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
674 const TargetRegisterClass *DstRC =
675 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
679 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
680 MachineInstrBuilder MIB =
681 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
682 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
683 MachineOperand &Src =
MI.getOperand(
I + 1);
687 const TargetRegisterClass *SrcRC
688 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
689 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
693 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
696 MI.eraseFromParent();
700bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
701 MachineBasicBlock *BB =
MI.getParent();
702 const int NumDst =
MI.getNumOperands() - 1;
704 MachineOperand &Src =
MI.getOperand(NumDst);
708 LLT DstTy = MRI->getType(DstReg0);
709 LLT SrcTy = MRI->getType(SrcReg);
714 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
716 const TargetRegisterClass *SrcRC =
717 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
718 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
724 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
725 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
726 MachineOperand &Dst =
MI.getOperand(
I);
727 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
728 .
addReg(SrcReg, 0, SubRegs[
I]);
731 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
732 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
735 const TargetRegisterClass *DstRC =
736 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
737 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
741 MI.eraseFromParent();
745bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
746 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
747 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
751 LLT SrcTy = MRI->getType(Src0);
755 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
756 return selectG_MERGE_VALUES(
MI);
763 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
767 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
768 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
771 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
772 DstBank->
getID() == AMDGPU::VGPRRegBankID);
773 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
776 MachineBasicBlock *BB =
MI.getParent();
786 const int64_t K0 = ConstSrc0->Value.getSExtValue();
787 const int64_t K1 = ConstSrc1->Value.getSExtValue();
788 uint32_t Lo16 =
static_cast<uint32_t
>(K0) & 0xffff;
789 uint32_t Hi16 =
static_cast<uint32_t
>(K1) & 0xffff;
790 uint32_t
Imm = Lo16 | (Hi16 << 16);
795 MI.eraseFromParent();
796 return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI);
801 MI.eraseFromParent();
802 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
813 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
814 MI.setDesc(TII.get(AMDGPU::COPY));
817 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
818 return RBI.constrainGenericRegister(Dst, RC, *MRI) &&
819 RBI.constrainGenericRegister(Src0, RC, *MRI);
824 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
825 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
831 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
838 MI.eraseFromParent();
863 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
864 if (Shift0 && Shift1) {
865 Opc = AMDGPU::S_PACK_HH_B32_B16;
866 MI.getOperand(1).setReg(ShiftSrc0);
867 MI.getOperand(2).setReg(ShiftSrc1);
869 Opc = AMDGPU::S_PACK_LH_B32_B16;
870 MI.getOperand(2).setReg(ShiftSrc1);
874 if (ConstSrc1 && ConstSrc1->Value == 0) {
876 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
881 MI.eraseFromParent();
884 if (STI.hasSPackHL()) {
885 Opc = AMDGPU::S_PACK_HL_B32_B16;
886 MI.getOperand(1).setReg(ShiftSrc0);
890 MI.setDesc(TII.get(
Opc));
894bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
895 const MachineOperand &MO =
I.getOperand(0);
899 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
900 if ((!RC && !MRI->getRegBankOrNull(MO.
getReg())) ||
901 (RC && RBI.constrainGenericRegister(MO.
getReg(), *RC, *MRI))) {
902 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
909bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
912 Register DstReg =
I.getOperand(0).getReg();
913 Register Src0Reg =
I.getOperand(1).getReg();
914 Register Src1Reg =
I.getOperand(2).getReg();
915 LLT Src1Ty = MRI->getType(Src1Reg);
917 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
920 int64_t
Offset =
I.getOperand(3).getImm();
923 if (
Offset % 32 != 0 || InsSize % 32 != 0)
930 unsigned SubReg = TRI.getSubRegFromChannel(
Offset / 32, InsSize / 32);
931 if (
SubReg == AMDGPU::NoSubRegister)
934 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
935 const TargetRegisterClass *DstRC =
936 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
940 const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
941 const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
942 const TargetRegisterClass *Src0RC =
943 TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
944 const TargetRegisterClass *Src1RC =
945 TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
949 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
950 if (!Src0RC || !Src1RC)
953 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
954 !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
955 !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
959 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
968bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
971 Register OffsetReg =
MI.getOperand(2).getReg();
972 Register WidthReg =
MI.getOperand(3).getReg();
974 assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID &&
975 "scalar BFX instructions are expanded in regbankselect");
976 assert(MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
977 "64-bit vector BFX instructions are expanded in regbankselect");
980 MachineBasicBlock *
MBB =
MI.getParent();
982 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
983 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
988 MI.eraseFromParent();
992bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
993 if (STI.getLDSBankCount() != 16)
999 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
1000 !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
1001 !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
1011 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1013 MachineBasicBlock *
MBB =
MI.getParent();
1017 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1020 .
addImm(
MI.getOperand(3).getImm());
1033 MI.eraseFromParent();
1042bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1044 if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
1047 MachineBasicBlock *
MBB =
MI.getParent();
1051 Register LaneSelect =
MI.getOperand(3).getReg();
1054 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1056 std::optional<ValueAndVReg> ConstSelect =
1062 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1065 std::optional<ValueAndVReg> ConstVal =
1071 STI.hasInv2PiInlineImm())) {
1072 MIB.
addImm(ConstVal->Value.getSExtValue());
1080 RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
1082 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1090 MI.eraseFromParent();
1096bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1100 LLT Ty = MRI->getType(Dst0);
1103 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1105 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1112 MachineBasicBlock *
MBB =
MI.getParent();
1116 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1118 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1131 MI.eraseFromParent();
1135bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1137 switch (IntrinsicID) {
1138 case Intrinsic::amdgcn_if_break: {
1143 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1144 .
add(
I.getOperand(0))
1145 .
add(
I.getOperand(2))
1146 .
add(
I.getOperand(3));
1148 Register DstReg =
I.getOperand(0).getReg();
1149 Register Src0Reg =
I.getOperand(2).getReg();
1150 Register Src1Reg =
I.getOperand(3).getReg();
1152 I.eraseFromParent();
1155 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1159 case Intrinsic::amdgcn_interp_p1_f16:
1160 return selectInterpP1F16(
I);
1161 case Intrinsic::amdgcn_wqm:
1162 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1163 case Intrinsic::amdgcn_softwqm:
1164 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1165 case Intrinsic::amdgcn_strict_wwm:
1166 case Intrinsic::amdgcn_wwm:
1167 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1168 case Intrinsic::amdgcn_strict_wqm:
1169 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1170 case Intrinsic::amdgcn_writelane:
1171 return selectWritelane(
I);
1172 case Intrinsic::amdgcn_div_scale:
1173 return selectDivScale(
I);
1174 case Intrinsic::amdgcn_icmp:
1175 case Intrinsic::amdgcn_fcmp:
1178 return selectIntrinsicCmp(
I);
1179 case Intrinsic::amdgcn_ballot:
1180 return selectBallot(
I);
1181 case Intrinsic::amdgcn_reloc_constant:
1182 return selectRelocConstant(
I);
1183 case Intrinsic::amdgcn_groupstaticsize:
1184 return selectGroupStaticSize(
I);
1185 case Intrinsic::returnaddress:
1186 return selectReturnAddress(
I);
1187 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1188 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1189 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1190 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1191 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1192 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1193 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1194 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1195 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1196 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1197 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1198 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1199 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1200 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1201 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1202 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1203 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1204 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1205 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1206 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1207 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1208 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1209 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1210 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1211 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1212 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1213 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1214 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1215 return selectSMFMACIntrin(
I);
1216 case Intrinsic::amdgcn_permlane16_swap:
1217 case Intrinsic::amdgcn_permlane32_swap:
1218 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1229 if (
Size == 16 && !ST.has16BitInsts())
1232 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1233 unsigned FakeS16Opc,
unsigned S32Opc,
1236 return ST.hasTrue16BitInsts()
1237 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1248 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1249 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1250 AMDGPU::V_CMP_NE_U64_e64);
1252 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1253 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1254 AMDGPU::V_CMP_EQ_U64_e64);
1256 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1257 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1258 AMDGPU::V_CMP_GT_I64_e64);
1260 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1261 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1262 AMDGPU::V_CMP_GE_I64_e64);
1264 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1265 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1266 AMDGPU::V_CMP_LT_I64_e64);
1268 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1269 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1270 AMDGPU::V_CMP_LE_I64_e64);
1272 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1273 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1274 AMDGPU::V_CMP_GT_U64_e64);
1276 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1277 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1278 AMDGPU::V_CMP_GE_U64_e64);
1280 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1281 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1282 AMDGPU::V_CMP_LT_U64_e64);
1284 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1285 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1286 AMDGPU::V_CMP_LE_U64_e64);
1289 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1290 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1291 AMDGPU::V_CMP_EQ_F64_e64);
1293 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1294 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1295 AMDGPU::V_CMP_GT_F64_e64);
1297 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1298 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1299 AMDGPU::V_CMP_GE_F64_e64);
1301 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1302 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1303 AMDGPU::V_CMP_LT_F64_e64);
1305 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1306 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1307 AMDGPU::V_CMP_LE_F64_e64);
1309 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1310 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1311 AMDGPU::V_CMP_NEQ_F64_e64);
1313 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1314 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1315 AMDGPU::V_CMP_O_F64_e64);
1317 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1318 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1319 AMDGPU::V_CMP_U_F64_e64);
1321 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1322 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1323 AMDGPU::V_CMP_NLG_F64_e64);
1325 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1326 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1327 AMDGPU::V_CMP_NLE_F64_e64);
1329 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1330 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1331 AMDGPU::V_CMP_NLT_F64_e64);
1333 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1334 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1335 AMDGPU::V_CMP_NGE_F64_e64);
1337 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1338 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1339 AMDGPU::V_CMP_NGT_F64_e64);
1341 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1342 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1343 AMDGPU::V_CMP_NEQ_F64_e64);
1345 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1346 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1347 AMDGPU::V_CMP_TRU_F64_e64);
1349 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1350 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1351 AMDGPU::V_CMP_F_F64_e64);
1356 unsigned Size)
const {
1358 if (!STI.hasScalarCompareEq64())
1363 return AMDGPU::S_CMP_LG_U64;
1365 return AMDGPU::S_CMP_EQ_U64;
1374 return AMDGPU::S_CMP_LG_U32;
1376 return AMDGPU::S_CMP_EQ_U32;
1378 return AMDGPU::S_CMP_GT_I32;
1380 return AMDGPU::S_CMP_GE_I32;
1382 return AMDGPU::S_CMP_LT_I32;
1384 return AMDGPU::S_CMP_LE_I32;
1386 return AMDGPU::S_CMP_GT_U32;
1388 return AMDGPU::S_CMP_GE_U32;
1390 return AMDGPU::S_CMP_LT_U32;
1392 return AMDGPU::S_CMP_LE_U32;
1394 return AMDGPU::S_CMP_EQ_F32;
1396 return AMDGPU::S_CMP_GT_F32;
1398 return AMDGPU::S_CMP_GE_F32;
1400 return AMDGPU::S_CMP_LT_F32;
1402 return AMDGPU::S_CMP_LE_F32;
1404 return AMDGPU::S_CMP_LG_F32;
1406 return AMDGPU::S_CMP_O_F32;
1408 return AMDGPU::S_CMP_U_F32;
1410 return AMDGPU::S_CMP_NLG_F32;
1412 return AMDGPU::S_CMP_NLE_F32;
1414 return AMDGPU::S_CMP_NLT_F32;
1416 return AMDGPU::S_CMP_NGE_F32;
1418 return AMDGPU::S_CMP_NGT_F32;
1420 return AMDGPU::S_CMP_NEQ_F32;
1427 if (!STI.hasSALUFloatInsts())
1432 return AMDGPU::S_CMP_EQ_F16;
1434 return AMDGPU::S_CMP_GT_F16;
1436 return AMDGPU::S_CMP_GE_F16;
1438 return AMDGPU::S_CMP_LT_F16;
1440 return AMDGPU::S_CMP_LE_F16;
1442 return AMDGPU::S_CMP_LG_F16;
1444 return AMDGPU::S_CMP_O_F16;
1446 return AMDGPU::S_CMP_U_F16;
1448 return AMDGPU::S_CMP_NLG_F16;
1450 return AMDGPU::S_CMP_NLE_F16;
1452 return AMDGPU::S_CMP_NLT_F16;
1454 return AMDGPU::S_CMP_NGE_F16;
1456 return AMDGPU::S_CMP_NGT_F16;
1458 return AMDGPU::S_CMP_NEQ_F16;
1467bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1472 Register SrcReg =
I.getOperand(2).getReg();
1473 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1477 Register CCReg =
I.getOperand(0).getReg();
1478 if (!isVCC(CCReg, *MRI)) {
1479 int Opcode = getS_CMPOpcode(Pred,
Size);
1482 MachineInstr *ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode))
1483 .
add(
I.getOperand(2))
1484 .
add(
I.getOperand(3));
1485 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1489 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
1490 I.eraseFromParent();
1494 if (
I.getOpcode() == AMDGPU::G_FCMP)
1501 MachineInstrBuilder ICmp;
1504 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1506 .
add(
I.getOperand(2))
1508 .
add(
I.getOperand(3))
1511 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1512 .
add(
I.getOperand(2))
1513 .
add(
I.getOperand(3));
1517 *TRI.getBoolRC(), *MRI);
1519 I.eraseFromParent();
1523bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1524 Register Dst =
I.getOperand(0).getReg();
1525 if (isVCC(Dst, *MRI))
1528 LLT DstTy = MRI->getType(Dst);
1534 Register SrcReg =
I.getOperand(2).getReg();
1535 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1543 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1544 I.eraseFromParent();
1545 return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1552 MachineInstrBuilder SelectedMI;
1553 MachineOperand &
LHS =
I.getOperand(2);
1554 MachineOperand &
RHS =
I.getOperand(3);
1555 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1556 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1558 copyToVGPRIfSrcFolded(Src0, Src0Mods,
LHS, &
I,
true);
1560 copyToVGPRIfSrcFolded(Src1, Src1Mods,
RHS, &
I,
true);
1561 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1563 SelectedMI.
addImm(Src0Mods);
1564 SelectedMI.
addReg(Src0Reg);
1566 SelectedMI.
addImm(Src1Mods);
1567 SelectedMI.
addReg(Src1Reg);
1573 RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1577 I.eraseFromParent();
1588 if (
MI->getParent() !=
MBB)
1592 if (
MI->getOpcode() == AMDGPU::COPY) {
1593 auto DstRB =
MRI.getRegBankOrNull(
MI->getOperand(0).getReg());
1594 auto SrcRB =
MRI.getRegBankOrNull(
MI->getOperand(1).getReg());
1595 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1596 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1613bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1616 Register DstReg =
I.getOperand(0).getReg();
1617 Register SrcReg =
I.getOperand(2).getReg();
1618 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();
1619 const unsigned WaveSize = STI.getWavefrontSize();
1623 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1626 std::optional<ValueAndVReg> Arg =
1631 if (BallotSize != WaveSize) {
1632 Dst = MRI->createVirtualRegister(TRI.getBoolRC());
1636 const int64_t
Value = Arg->Value.getZExtValue();
1639 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1646 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1652 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1656 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1667 if (BallotSize != WaveSize) {
1668 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1670 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1677 I.eraseFromParent();
1681bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1682 Register DstReg =
I.getOperand(0).getReg();
1683 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
1684 const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
1685 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
1688 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1690 Module *
M =
MF->getFunction().getParent();
1691 const MDNode *
Metadata =
I.getOperand(2).getMetadata();
1698 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1701 I.eraseFromParent();
1705bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1708 Register DstReg =
I.getOperand(0).getReg();
1709 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1710 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1711 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1719 const SIMachineFunctionInfo *MFI =
MF->getInfo<SIMachineFunctionInfo>();
1722 Module *
M =
MF->getFunction().getParent();
1723 const GlobalValue *GV =
1728 I.eraseFromParent();
1732bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1737 MachineOperand &Dst =
I.getOperand(0);
1739 unsigned Depth =
I.getOperand(2).getImm();
1741 const TargetRegisterClass *RC
1742 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1744 !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
1749 MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1752 I.eraseFromParent();
1756 MachineFrameInfo &MFI =
MF.getFrameInfo();
1761 Register ReturnAddrReg = TRI.getReturnAddressReg(
MF);
1763 AMDGPU::SReg_64RegClass,
DL);
1766 I.eraseFromParent();
1770bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1773 MachineBasicBlock *BB =
MI.getParent();
1774 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1775 .
add(
MI.getOperand(1));
1778 MI.eraseFromParent();
1780 if (!MRI->getRegClassOrNull(
Reg))
1781 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1785bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1787 MachineBasicBlock *
MBB =
MI.getParent();
1791 unsigned IndexOperand =
MI.getOperand(7).getImm();
1792 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1793 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1795 if (WaveDone && !WaveRelease) {
1799 Fn,
"ds_ordered_count: wave_done requires wave_release",
DL));
1802 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1803 IndexOperand &= ~0x3f;
1804 unsigned CountDw = 0;
1807 CountDw = (IndexOperand >> 24) & 0xf;
1808 IndexOperand &= ~(0xf << 24);
1810 if (CountDw < 1 || CountDw > 4) {
1813 Fn,
"ds_ordered_count: dword count must be between 1 and 4",
DL));
1821 Fn,
"ds_ordered_count: bad index operand",
DL));
1824 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1827 unsigned Offset0 = OrderedCountIndex << 2;
1828 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
1831 Offset1 |= (CountDw - 1) << 6;
1834 Offset1 |= ShaderType << 2;
1836 unsigned Offset = Offset0 | (Offset1 << 8);
1844 MachineInstrBuilder
DS =
1845 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1850 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
1854 MI.eraseFromParent();
1860 case Intrinsic::amdgcn_ds_gws_init:
1861 return AMDGPU::DS_GWS_INIT;
1862 case Intrinsic::amdgcn_ds_gws_barrier:
1863 return AMDGPU::DS_GWS_BARRIER;
1864 case Intrinsic::amdgcn_ds_gws_sema_v:
1865 return AMDGPU::DS_GWS_SEMA_V;
1866 case Intrinsic::amdgcn_ds_gws_sema_br:
1867 return AMDGPU::DS_GWS_SEMA_BR;
1868 case Intrinsic::amdgcn_ds_gws_sema_p:
1869 return AMDGPU::DS_GWS_SEMA_P;
1870 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1871 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1877bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1879 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1880 !STI.hasGWSSemaReleaseAll()))
1884 const bool HasVSrc =
MI.getNumOperands() == 3;
1885 assert(HasVSrc ||
MI.getNumOperands() == 2);
1887 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1888 const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
1889 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1895 MachineBasicBlock *
MBB =
MI.getParent();
1898 MachineInstr *Readfirstlane =
nullptr;
1903 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1904 Readfirstlane = OffsetDef;
1909 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1919 std::tie(BaseOffset, ImmOffset) =
1922 if (Readfirstlane) {
1925 if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
1931 if (!RBI.constrainGenericRegister(BaseOffset,
1932 AMDGPU::SReg_32RegClass, *MRI))
1936 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1951 const MCInstrDesc &InstrDesc = TII.get(
Opc);
1956 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
1957 const TargetRegisterClass *DataRC = TII.getRegClass(InstrDesc, Data0Idx);
1958 const TargetRegisterClass *SubRC =
1959 TRI.getSubRegisterClass(DataRC, AMDGPU::sub0);
1963 if (!RBI.constrainGenericRegister(VSrc, *DataRC, *MRI))
1973 Register DataReg = MRI->createVirtualRegister(DataRC);
1974 if (!RBI.constrainGenericRegister(VSrc, *SubRC, *MRI))
1977 Register UndefReg = MRI->createVirtualRegister(SubRC);
1996 MI.eraseFromParent();
2000bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
2001 bool IsAppend)
const {
2002 Register PtrBase =
MI.getOperand(2).getReg();
2003 LLT PtrTy = MRI->getType(PtrBase);
2007 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
2010 if (!isDSOffsetLegal(PtrBase,
Offset)) {
2011 PtrBase =
MI.getOperand(2).getReg();
2015 MachineBasicBlock *
MBB =
MI.getParent();
2017 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2021 if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
2028 MI.eraseFromParent();
2032bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
2033 MachineFunction *
MF =
MI.getMF();
2034 SIMachineFunctionInfo *MFInfo =
MF->getInfo<SIMachineFunctionInfo>();
2045 TFE = TexFailCtrl & 0x1;
2047 LWE = TexFailCtrl & 0x2;
2050 return TexFailCtrl == 0;
2053bool AMDGPUInstructionSelector::selectImageIntrinsic(
2055 MachineBasicBlock *
MBB =
MI.getParent();
2061 Register ResultDef =
MI.getOperand(0).getReg();
2062 if (MRI->use_nodbg_empty(ResultDef))
2066 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2074 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2076 Register VDataIn = AMDGPU::NoRegister;
2077 Register VDataOut = AMDGPU::NoRegister;
2079 int NumVDataDwords = -1;
2080 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2081 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2087 Unorm =
MI.getOperand(ArgOffset + Intr->
UnormIndex).getImm() != 0;
2091 bool IsTexFail =
false;
2093 TFE, LWE, IsTexFail))
2096 const int Flags =
MI.getOperand(ArgOffset + Intr->
NumArgs).getImm();
2097 const bool IsA16 = (
Flags & 1) != 0;
2098 const bool IsG16 = (
Flags & 2) != 0;
2101 if (IsA16 && !STI.hasG16() && !IsG16)
2105 unsigned DMaskLanes = 0;
2107 if (BaseOpcode->
Atomic) {
2109 VDataOut =
MI.getOperand(0).getReg();
2110 VDataIn =
MI.getOperand(2).getReg();
2111 LLT Ty = MRI->getType(VDataIn);
2114 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2119 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2121 DMask = Is64Bit ? 0xf : 0x3;
2122 NumVDataDwords = Is64Bit ? 4 : 2;
2124 DMask = Is64Bit ? 0x3 : 0x1;
2125 NumVDataDwords = Is64Bit ? 2 : 1;
2128 DMask =
MI.getOperand(ArgOffset + Intr->
DMaskIndex).getImm();
2131 if (BaseOpcode->
Store) {
2132 VDataIn =
MI.getOperand(1).getReg();
2133 VDataTy = MRI->getType(VDataIn);
2138 VDataOut =
MI.getOperand(0).getReg();
2139 VDataTy = MRI->getType(VDataOut);
2140 NumVDataDwords = DMaskLanes;
2142 if (IsD16 && !STI.hasUnpackedD16VMem())
2143 NumVDataDwords = (DMaskLanes + 1) / 2;
2148 if (Subtarget->hasG16() && IsG16) {
2149 const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
2152 IntrOpcode = G16MappingInfo->
G16;
2156 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2166 int NumVAddrRegs = 0;
2167 int NumVAddrDwords = 0;
2170 MachineOperand &AddrOp =
MI.getOperand(ArgOffset +
I);
2171 if (!AddrOp.
isReg())
2179 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
2186 NumVAddrRegs != 1 &&
2187 (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
2188 : NumVAddrDwords == NumVAddrRegs);
2189 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2200 NumVDataDwords, NumVAddrDwords);
2201 }
else if (IsGFX11Plus) {
2203 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2204 : AMDGPU::MIMGEncGfx11Default,
2205 NumVDataDwords, NumVAddrDwords);
2206 }
else if (IsGFX10Plus) {
2208 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2209 : AMDGPU::MIMGEncGfx10Default,
2210 NumVDataDwords, NumVAddrDwords);
2212 if (Subtarget->hasGFX90AInsts()) {
2214 NumVDataDwords, NumVAddrDwords);
2218 <<
"requested image instruction is not supported on this GPU\n");
2225 NumVDataDwords, NumVAddrDwords);
2228 NumVDataDwords, NumVAddrDwords);
2238 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
2240 Register TmpReg = MRI->createVirtualRegister(
2241 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2242 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2245 if (!MRI->use_empty(VDataOut)) {
2258 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2259 MachineOperand &SrcOp =
MI.getOperand(ArgOffset + Intr->
VAddrStart +
I);
2260 if (SrcOp.
isReg()) {
2279 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2281 MIB.
addImm(IsA16 ? -1 : 0);
2283 if (!Subtarget->hasGFX90AInsts()) {
2295 MIB.
addImm(IsD16 ? -1 : 0);
2297 MI.eraseFromParent();
2299 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
2305bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2311 MachineBasicBlock *
MBB =
MI.getParent();
2316 unsigned Offset =
MI.getOperand(6).getImm();
2320 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2321 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2322 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2324 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2325 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2327 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2328 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2340 MI.eraseFromParent();
2344bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2347 switch (IntrinsicID) {
2348 case Intrinsic::amdgcn_end_cf:
2349 return selectEndCfIntrinsic(
I);
2350 case Intrinsic::amdgcn_ds_ordered_add:
2351 case Intrinsic::amdgcn_ds_ordered_swap:
2352 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2353 case Intrinsic::amdgcn_ds_gws_init:
2354 case Intrinsic::amdgcn_ds_gws_barrier:
2355 case Intrinsic::amdgcn_ds_gws_sema_v:
2356 case Intrinsic::amdgcn_ds_gws_sema_br:
2357 case Intrinsic::amdgcn_ds_gws_sema_p:
2358 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2359 return selectDSGWSIntrinsic(
I, IntrinsicID);
2360 case Intrinsic::amdgcn_ds_append:
2361 return selectDSAppendConsume(
I,
true);
2362 case Intrinsic::amdgcn_ds_consume:
2363 return selectDSAppendConsume(
I,
false);
2364 case Intrinsic::amdgcn_init_whole_wave:
2365 return selectInitWholeWave(
I);
2366 case Intrinsic::amdgcn_raw_buffer_load_lds:
2367 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2368 case Intrinsic::amdgcn_struct_buffer_load_lds:
2369 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2370 return selectBufferLoadLds(
I);
2375 case Intrinsic::amdgcn_load_to_lds:
2376 case Intrinsic::amdgcn_global_load_lds:
2377 return selectGlobalLoadLds(
I);
2378 case Intrinsic::amdgcn_exp_compr:
2379 if (!STI.hasCompressedExport()) {
2381 F.getContext().diagnose(
2382 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2387 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2388 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2389 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2390 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2391 return selectDSBvhStackIntrinsic(
I);
2392 case Intrinsic::amdgcn_s_barrier_init:
2393 case Intrinsic::amdgcn_s_barrier_signal_var:
2394 return selectNamedBarrierInit(
I, IntrinsicID);
2395 case Intrinsic::amdgcn_s_barrier_join:
2396 case Intrinsic::amdgcn_s_get_named_barrier_state:
2397 return selectNamedBarrierInst(
I, IntrinsicID);
2398 case Intrinsic::amdgcn_s_get_barrier_state:
2399 return selectSGetBarrierState(
I, IntrinsicID);
2400 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2401 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2406bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2413 Register DstReg =
I.getOperand(0).getReg();
2414 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
2416 const MachineOperand &CCOp =
I.getOperand(1);
2418 if (!isVCC(CCReg, *MRI)) {
2419 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2420 AMDGPU::S_CSELECT_B32;
2421 MachineInstr *CopySCC =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
2427 if (!MRI->getRegClassOrNull(CCReg))
2428 MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
2430 .
add(
I.getOperand(2))
2431 .
add(
I.getOperand(3));
2436 I.eraseFromParent();
2445 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2447 .
add(
I.getOperand(3))
2449 .
add(
I.getOperand(2))
2450 .
add(
I.getOperand(1));
2453 I.eraseFromParent();
2457bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2458 Register DstReg =
I.getOperand(0).getReg();
2459 Register SrcReg =
I.getOperand(1).getReg();
2460 const LLT DstTy = MRI->getType(DstReg);
2461 const LLT SrcTy = MRI->getType(SrcReg);
2464 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
2465 const RegisterBank *DstRB;
2471 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
2476 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2481 const TargetRegisterClass *SrcRC =
2482 TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
2483 const TargetRegisterClass *DstRC =
2484 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
2485 if (!SrcRC || !DstRC)
2488 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
2489 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
2494 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2495 assert(STI.useRealTrue16Insts());
2499 .
addReg(SrcReg, 0, AMDGPU::lo16);
2500 I.eraseFromParent();
2508 Register LoReg = MRI->createVirtualRegister(DstRC);
2509 Register HiReg = MRI->createVirtualRegister(DstRC);
2511 .
addReg(SrcReg, 0, AMDGPU::sub0);
2513 .
addReg(SrcReg, 0, AMDGPU::sub1);
2515 if (IsVALU && STI.hasSDWA()) {
2518 MachineInstr *MovSDWA =
2519 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2529 Register TmpReg0 = MRI->createVirtualRegister(DstRC);
2530 Register TmpReg1 = MRI->createVirtualRegister(DstRC);
2531 Register ImmReg = MRI->createVirtualRegister(DstRC);
2533 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2543 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2544 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2545 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2557 And.setOperandDead(3);
2558 Or.setOperandDead(3);
2562 I.eraseFromParent();
2570 unsigned SubRegIdx = DstSize < 32
2571 ?
static_cast<unsigned>(AMDGPU::sub0)
2572 : TRI.getSubRegFromChannel(0, DstSize / 32);
2573 if (SubRegIdx == AMDGPU::NoSubRegister)
2578 const TargetRegisterClass *SrcWithSubRC
2579 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2583 if (SrcWithSubRC != SrcRC) {
2584 if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
2588 I.getOperand(1).setSubReg(SubRegIdx);
2591 I.setDesc(TII.get(TargetOpcode::COPY));
2598 int SignedMask =
static_cast<int>(Mask);
2599 return SignedMask >= -16 && SignedMask <= 64;
2603const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2612 return &RBI.getRegBankFromRegClass(*RC, LLT());
2616bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2617 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2618 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2621 const Register DstReg =
I.getOperand(0).getReg();
2622 const Register SrcReg =
I.getOperand(1).getReg();
2624 const LLT DstTy = MRI->getType(DstReg);
2625 const LLT SrcTy = MRI->getType(SrcReg);
2626 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2633 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2636 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2638 return selectCOPY(
I);
2640 const TargetRegisterClass *SrcRC =
2641 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2642 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
2643 const TargetRegisterClass *DstRC =
2644 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2646 Register UndefReg = MRI->createVirtualRegister(SrcRC);
2647 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2653 I.eraseFromParent();
2655 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
2656 RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
2659 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2665 MachineInstr *ExtI =
2669 I.eraseFromParent();
2673 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2674 MachineInstr *ExtI =
2679 I.eraseFromParent();
2683 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2684 const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
2685 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2686 if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
2689 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2690 const unsigned SextOpc = SrcSize == 8 ?
2691 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2694 I.eraseFromParent();
2695 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2700 if (DstSize > 32 && SrcSize == 32) {
2701 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2702 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2717 I.eraseFromParent();
2718 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass,
2722 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2723 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2726 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2728 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2729 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2730 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2732 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2743 I.eraseFromParent();
2744 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
2759 I.eraseFromParent();
2760 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2794 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2801 assert(Mask.size() == 2);
2803 if (Mask[0] == 1 && Mask[1] <= 1) {
2811bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2812 if (!Subtarget->hasSALUFloatInsts())
2815 Register Dst =
I.getOperand(0).getReg();
2816 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2817 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2820 Register Src =
I.getOperand(1).getReg();
2826 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2828 I.eraseFromParent();
2829 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
2836bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2849 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2850 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2855 MachineInstr *Fabs =
getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
2859 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2860 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2863 MachineBasicBlock *BB =
MI.getParent();
2865 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2866 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2867 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2868 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2870 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2871 .
addReg(Src, 0, AMDGPU::sub0);
2872 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2873 .
addReg(Src, 0, AMDGPU::sub1);
2874 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2878 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2883 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2888 MI.eraseFromParent();
2893bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2895 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2896 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2901 MachineBasicBlock *BB =
MI.getParent();
2903 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2904 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2905 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2906 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2908 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2909 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2912 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2913 .
addReg(Src, 0, AMDGPU::sub0);
2914 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2915 .
addReg(Src, 0, AMDGPU::sub1);
2916 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2921 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2925 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2931 MI.eraseFromParent();
2936 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2939void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2942 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2943 const MachineInstr *PtrMI =
2944 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2948 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2953 for (
unsigned i = 1; i != 3; ++i) {
2954 const MachineOperand &GEPOp = PtrMI->
getOperand(i);
2955 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.
getReg());
2960 assert(GEPInfo.Imm == 0);
2964 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.
getReg(), MRI, TRI);
2965 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2966 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2968 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2972 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2975bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2976 return RBI.getRegBank(
Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
2979bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2980 if (!
MI.hasOneMemOperand())
2983 const MachineMemOperand *MMO = *
MI.memoperands_begin();
2996 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2997 return RBI.getRegBank(
MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
2998 AMDGPU::SGPRRegBankID;
3001 return I &&
I->getMetadata(
"amdgpu.uniform");
3005 for (
const GEPInfo &GEPInfo : AddrInfo) {
3006 if (!GEPInfo.VgprParts.empty())
3012void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
3013 const LLT PtrTy = MRI->getType(
I.getOperand(1).getReg());
3016 STI.ldsRequiresM0Init()) {
3020 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
3025bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
3032 if (
Reg.isPhysical())
3036 const unsigned Opcode =
MI.getOpcode();
3038 if (Opcode == AMDGPU::COPY)
3041 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
3042 Opcode == AMDGPU::G_XOR)
3047 return GI->is(Intrinsic::amdgcn_class);
3049 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
3052bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
3054 MachineOperand &CondOp =
I.getOperand(0);
3060 const TargetRegisterClass *ConstrainRC;
3067 if (!isVCC(CondReg, *MRI)) {
3071 CondPhysReg = AMDGPU::SCC;
3072 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3073 ConstrainRC = &AMDGPU::SReg_32RegClass;
3080 const bool Is64 = STI.isWave64();
3081 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3082 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3084 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
3085 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3092 CondPhysReg = TRI.getVCC();
3093 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3094 ConstrainRC = TRI.getBoolRC();
3097 if (!MRI->getRegClassOrNull(CondReg))
3098 MRI->setRegClass(CondReg, ConstrainRC);
3100 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3103 .
addMBB(
I.getOperand(1).getMBB());
3105 I.eraseFromParent();
3109bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3111 Register DstReg =
I.getOperand(0).getReg();
3112 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3113 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3114 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3118 return RBI.constrainGenericRegister(
3119 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3122bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3123 Register DstReg =
I.getOperand(0).getReg();
3124 Register SrcReg =
I.getOperand(1).getReg();
3125 Register MaskReg =
I.getOperand(2).getReg();
3126 LLT Ty = MRI->getType(DstReg);
3127 LLT MaskTy = MRI->getType(MaskReg);
3131 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3132 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3133 const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
3134 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3140 APInt MaskOnes =
VT->getKnownOnes(MaskReg).zext(64);
3144 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3145 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3148 !CanCopyLow32 && !CanCopyHi32) {
3149 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3153 I.eraseFromParent();
3157 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3158 const TargetRegisterClass &RegRC
3159 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3161 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
3162 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
3163 const TargetRegisterClass *MaskRC =
3164 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3166 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3167 !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3168 !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
3173 "ptrmask should have been narrowed during legalize");
3175 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3181 I.eraseFromParent();
3185 Register HiReg = MRI->createVirtualRegister(&RegRC);
3186 Register LoReg = MRI->createVirtualRegister(&RegRC);
3189 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3190 .
addReg(SrcReg, 0, AMDGPU::sub0);
3191 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3192 .
addReg(SrcReg, 0, AMDGPU::sub1);
3201 Register MaskLo = MRI->createVirtualRegister(&RegRC);
3202 MaskedLo = MRI->createVirtualRegister(&RegRC);
3204 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3205 .
addReg(MaskReg, 0, AMDGPU::sub0);
3206 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3215 Register MaskHi = MRI->createVirtualRegister(&RegRC);
3216 MaskedHi = MRI->createVirtualRegister(&RegRC);
3218 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3219 .
addReg(MaskReg, 0, AMDGPU::sub1);
3220 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3225 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3230 I.eraseFromParent();
3236static std::pair<Register, unsigned>
3243 std::tie(IdxBaseReg,
Offset) =
3245 if (IdxBaseReg == AMDGPU::NoRegister) {
3249 IdxBaseReg = IdxReg;
3256 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3257 return std::pair(IdxReg, SubRegs[0]);
3258 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3261bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3267 LLT DstTy = MRI->getType(DstReg);
3268 LLT SrcTy = MRI->getType(SrcReg);
3270 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3271 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3272 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3276 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3279 const TargetRegisterClass *SrcRC =
3280 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3281 const TargetRegisterClass *DstRC =
3282 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3283 if (!SrcRC || !DstRC)
3285 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3286 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3287 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3290 MachineBasicBlock *BB =
MI.getParent();
3298 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3302 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3305 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3309 MI.eraseFromParent();
3316 if (!STI.useVGPRIndexMode()) {
3317 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3319 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3322 MI.eraseFromParent();
3326 const MCInstrDesc &GPRIDXDesc =
3327 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC),
true);
3333 MI.eraseFromParent();
3338bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3345 LLT VecTy = MRI->getType(DstReg);
3346 LLT ValTy = MRI->getType(ValReg);
3350 const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
3351 const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
3352 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3358 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3361 const TargetRegisterClass *VecRC =
3362 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3363 const TargetRegisterClass *ValRC =
3364 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3366 if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
3367 !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
3368 !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
3369 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3372 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3376 std::tie(IdxReg,
SubReg) =
3379 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3380 STI.useVGPRIndexMode();
3382 MachineBasicBlock *BB =
MI.getParent();
3386 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3389 const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
3390 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3395 MI.eraseFromParent();
3399 const MCInstrDesc &GPRIDXDesc =
3400 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC),
false);
3407 MI.eraseFromParent();
3411bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3412 if (!Subtarget->hasVMemToLDSLoad())
3415 unsigned Size =
MI.getOperand(3).getImm();
3418 const bool HasVIndex =
MI.getNumOperands() == 9;
3422 VIndex =
MI.getOperand(4).getReg();
3426 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3427 std::optional<ValueAndVReg> MaybeVOffset =
3429 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3435 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3436 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3437 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3438 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3441 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3442 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3443 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3444 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3447 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3448 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3449 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3450 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3453 if (!Subtarget->hasLDSLoadB96_B128())
3456 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3457 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3458 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3459 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3462 if (!Subtarget->hasLDSLoadB96_B128())
3465 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3466 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3467 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3468 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3472 MachineBasicBlock *
MBB =
MI.getParent();
3475 .
add(
MI.getOperand(2));
3479 if (HasVIndex && HasVOffset) {
3480 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
3481 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3488 }
else if (HasVIndex) {
3490 }
else if (HasVOffset) {
3494 MIB.
add(
MI.getOperand(1));
3495 MIB.
add(
MI.getOperand(5 + OpOffset));
3496 MIB.
add(
MI.getOperand(6 + OpOffset));
3498 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3506 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3511 MachinePointerInfo StorePtrI = LoadPtrI;
3522 MachineMemOperand *StoreMMO =
3528 MI.eraseFromParent();
3540 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3546 return Def->getOperand(1).getReg();
3560 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3568 return Def->getOperand(1).getReg();
3570 if (
VT->signBitIsZero(
Reg))
3571 return matchZeroExtendFromS32(
Reg);
3579AMDGPUInstructionSelector::matchZeroExtendFromS32OrS32(
Register Reg)
const {
3581 : matchZeroExtendFromS32(
Reg);
3587AMDGPUInstructionSelector::matchSignExtendFromS32OrS32(
Register Reg)
const {
3589 : matchSignExtendFromS32(
Reg);
3593AMDGPUInstructionSelector::matchExtendFromS32OrS32(
Register Reg,
3594 bool IsSigned)
const {
3596 return matchSignExtendFromS32OrS32(
Reg);
3598 return matchZeroExtendFromS32OrS32(
Reg);
3608 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3615 return Def->getOperand(1).getReg();
3620bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3621 if (!Subtarget->hasVMemToLDSLoad())
3625 unsigned Size =
MI.getOperand(3).getImm();
3631 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3634 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3637 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3640 if (!Subtarget->hasLDSLoadB96_B128())
3642 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3645 if (!Subtarget->hasLDSLoadB96_B128())
3647 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3651 MachineBasicBlock *
MBB =
MI.getParent();
3654 .
add(
MI.getOperand(2));
3660 if (!isSGPR(Addr)) {
3662 if (isSGPR(AddrDef->Reg)) {
3663 Addr = AddrDef->Reg;
3664 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3667 if (isSGPR(SAddr)) {
3668 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3669 if (
Register Off = matchZeroExtendFromS32(PtrBaseOffset)) {
3680 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3692 MIB.
add(
MI.getOperand(4));
3694 unsigned Aux =
MI.getOperand(5).getImm();
3697 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3699 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3700 MachinePointerInfo StorePtrI = LoadPtrI;
3709 MachineMemOperand *StoreMMO =
3711 sizeof(int32_t),
Align(4));
3715 MI.eraseFromParent();
3719bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
3721 unsigned OpcodeOpIdx =
3722 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
3723 MI.setDesc(TII.get(
MI.getOperand(OpcodeOpIdx).getImm()));
3724 MI.removeOperand(OpcodeOpIdx);
3725 MI.addImplicitDefUseOperands(*
MI.getMF());
3731bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3734 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3735 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3737 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3738 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3740 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3741 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3743 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3744 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3746 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3747 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3749 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3750 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3752 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3753 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3755 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3756 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3758 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3759 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3761 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3762 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3764 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3765 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3767 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3768 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3770 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3771 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3773 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3774 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3776 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3777 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3779 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3780 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3782 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3783 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3785 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3786 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3788 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3789 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3791 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3792 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3794 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3795 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3797 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3798 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3800 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3801 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3803 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3804 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3806 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3807 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3809 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3810 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3812 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3813 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3815 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3816 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3822 auto VDst_In =
MI.getOperand(4);
3824 MI.setDesc(TII.get(
Opc));
3825 MI.removeOperand(4);
3826 MI.removeOperand(1);
3827 MI.addOperand(VDst_In);
3828 MI.addImplicitDefUseOperands(*
MI.getMF());
3829 const MCInstrDesc &MCID =
MI.getDesc();
3831 MI.getOperand(0).setIsEarlyClobber(
true);
3836bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3838 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3839 !Subtarget->hasPermlane16Swap())
3841 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3842 !Subtarget->hasPermlane32Swap())
3845 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3846 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3847 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3849 MI.removeOperand(2);
3850 MI.setDesc(TII.get(Opcode));
3853 MachineOperand &FI =
MI.getOperand(4);
3859bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3862 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3863 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3864 MachineBasicBlock *
MBB =
MI.getParent();
3868 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3869 .
addImm(Subtarget->getWavefrontSizeLog2())
3874 .
addImm(Subtarget->getWavefrontSizeLog2())
3878 const TargetRegisterClass &RC =
3879 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3880 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
3883 MI.eraseFromParent();
3892 unsigned NumOpcodes = 0;
3905 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
3916 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3930 if (Src.size() == 3) {
3937 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3938 if (Src[
I] ==
LHS) {
3948 Bits = SrcBits[Src.size()];
3954 switch (
MI->getOpcode()) {
3955 case TargetOpcode::G_AND:
3956 case TargetOpcode::G_OR:
3957 case TargetOpcode::G_XOR: {
3962 if (!getOperandBits(
LHS, LHSBits) ||
3963 !getOperandBits(
RHS, RHSBits)) {
3965 return std::make_pair(0, 0);
3971 NumOpcodes +=
Op.first;
3972 LHSBits =
Op.second;
3977 NumOpcodes +=
Op.first;
3978 RHSBits =
Op.second;
3983 return std::make_pair(0, 0);
3987 switch (
MI->getOpcode()) {
3988 case TargetOpcode::G_AND:
3989 TTbl = LHSBits & RHSBits;
3991 case TargetOpcode::G_OR:
3992 TTbl = LHSBits | RHSBits;
3994 case TargetOpcode::G_XOR:
3995 TTbl = LHSBits ^ RHSBits;
4001 return std::make_pair(NumOpcodes + 1, TTbl);
4004bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
4005 if (!Subtarget->hasBitOp3Insts())
4009 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4010 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
4016 unsigned NumOpcodes;
4018 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
4022 if (NumOpcodes < 2 || Src.empty())
4025 const bool IsB32 = MRI->getType(DstReg) ==
LLT::scalar(32);
4026 if (NumOpcodes == 2 && IsB32) {
4034 }
else if (NumOpcodes < 4) {
4041 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
4042 if (!IsB32 && STI.hasTrue16BitInsts())
4043 Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
4044 : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
4045 unsigned CBL = STI.getConstantBusLimit(
Opc);
4046 MachineBasicBlock *
MBB =
MI.getParent();
4049 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4050 const RegisterBank *RB = RBI.getRegBank(Src[
I], *MRI, TRI);
4051 if (RB->
getID() != AMDGPU::SGPRRegBankID)
4057 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4068 while (Src.size() < 3)
4069 Src.push_back(Src[0]);
4086 MI.eraseFromParent();
4091bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
4093 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
4096 MachineInstr *
DefMI = MRI->getVRegDef(SrcReg);
4098 Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
4100 MachineBasicBlock *
MBB =
MI.getParent();
4104 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4107 .
addImm(Subtarget->getWavefrontSizeLog2())
4114 MI.eraseFromParent();
4120 if (!
I.isPreISelOpcode()) {
4122 return selectCOPY(
I);
4126 switch (
I.getOpcode()) {
4127 case TargetOpcode::G_AND:
4128 case TargetOpcode::G_OR:
4129 case TargetOpcode::G_XOR:
4130 if (selectBITOP3(
I))
4134 return selectG_AND_OR_XOR(
I);
4135 case TargetOpcode::G_ADD:
4136 case TargetOpcode::G_SUB:
4137 case TargetOpcode::G_PTR_ADD:
4140 return selectG_ADD_SUB(
I);
4141 case TargetOpcode::G_UADDO:
4142 case TargetOpcode::G_USUBO:
4143 case TargetOpcode::G_UADDE:
4144 case TargetOpcode::G_USUBE:
4145 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
4146 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4147 case AMDGPU::G_AMDGPU_MAD_I64_I32:
4148 return selectG_AMDGPU_MAD_64_32(
I);
4149 case TargetOpcode::G_INTTOPTR:
4150 case TargetOpcode::G_BITCAST:
4151 case TargetOpcode::G_PTRTOINT:
4152 case TargetOpcode::G_FREEZE:
4153 return selectCOPY(
I);
4154 case TargetOpcode::G_FNEG:
4157 return selectG_FNEG(
I);
4158 case TargetOpcode::G_FABS:
4161 return selectG_FABS(
I);
4162 case TargetOpcode::G_EXTRACT:
4163 return selectG_EXTRACT(
I);
4164 case TargetOpcode::G_MERGE_VALUES:
4165 case TargetOpcode::G_CONCAT_VECTORS:
4166 return selectG_MERGE_VALUES(
I);
4167 case TargetOpcode::G_UNMERGE_VALUES:
4168 return selectG_UNMERGE_VALUES(
I);
4169 case TargetOpcode::G_BUILD_VECTOR:
4170 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4171 return selectG_BUILD_VECTOR(
I);
4172 case TargetOpcode::G_IMPLICIT_DEF:
4173 return selectG_IMPLICIT_DEF(
I);
4174 case TargetOpcode::G_INSERT:
4175 return selectG_INSERT(
I);
4176 case TargetOpcode::G_INTRINSIC:
4177 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4178 return selectG_INTRINSIC(
I);
4179 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4180 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4181 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4182 case TargetOpcode::G_ICMP:
4183 case TargetOpcode::G_FCMP:
4184 if (selectG_ICMP_or_FCMP(
I))
4187 case TargetOpcode::G_LOAD:
4188 case TargetOpcode::G_ZEXTLOAD:
4189 case TargetOpcode::G_SEXTLOAD:
4190 case TargetOpcode::G_STORE:
4191 case TargetOpcode::G_ATOMIC_CMPXCHG:
4192 case TargetOpcode::G_ATOMICRMW_XCHG:
4193 case TargetOpcode::G_ATOMICRMW_ADD:
4194 case TargetOpcode::G_ATOMICRMW_SUB:
4195 case TargetOpcode::G_ATOMICRMW_AND:
4196 case TargetOpcode::G_ATOMICRMW_OR:
4197 case TargetOpcode::G_ATOMICRMW_XOR:
4198 case TargetOpcode::G_ATOMICRMW_MIN:
4199 case TargetOpcode::G_ATOMICRMW_MAX:
4200 case TargetOpcode::G_ATOMICRMW_UMIN:
4201 case TargetOpcode::G_ATOMICRMW_UMAX:
4202 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4203 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4204 case TargetOpcode::G_ATOMICRMW_USUB_COND:
4205 case TargetOpcode::G_ATOMICRMW_USUB_SAT:
4206 case TargetOpcode::G_ATOMICRMW_FADD:
4207 case TargetOpcode::G_ATOMICRMW_FMIN:
4208 case TargetOpcode::G_ATOMICRMW_FMAX:
4209 return selectG_LOAD_STORE_ATOMICRMW(
I);
4210 case TargetOpcode::G_SELECT:
4211 return selectG_SELECT(
I);
4212 case TargetOpcode::G_TRUNC:
4213 return selectG_TRUNC(
I);
4214 case TargetOpcode::G_SEXT:
4215 case TargetOpcode::G_ZEXT:
4216 case TargetOpcode::G_ANYEXT:
4217 case TargetOpcode::G_SEXT_INREG:
4221 if (MRI->getType(
I.getOperand(1).getReg()) !=
LLT::scalar(1) &&
4224 return selectG_SZA_EXT(
I);
4225 case TargetOpcode::G_FPEXT:
4226 if (selectG_FPEXT(
I))
4229 case TargetOpcode::G_BRCOND:
4230 return selectG_BRCOND(
I);
4231 case TargetOpcode::G_GLOBAL_VALUE:
4232 return selectG_GLOBAL_VALUE(
I);
4233 case TargetOpcode::G_PTRMASK:
4234 return selectG_PTRMASK(
I);
4235 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4236 return selectG_EXTRACT_VECTOR_ELT(
I);
4237 case TargetOpcode::G_INSERT_VECTOR_ELT:
4238 return selectG_INSERT_VECTOR_ELT(
I);
4239 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4240 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4241 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4242 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4243 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4246 assert(Intr &&
"not an image intrinsic with image pseudo");
4247 return selectImageIntrinsic(
I, Intr);
4249 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
4250 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4251 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
4252 return selectBVHIntersectRayIntrinsic(
I);
4253 case AMDGPU::G_SBFX:
4254 case AMDGPU::G_UBFX:
4255 return selectG_SBFX_UBFX(
I);
4256 case AMDGPU::G_SI_CALL:
4257 I.setDesc(TII.get(AMDGPU::SI_CALL));
4259 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4260 return selectWaveAddress(
I);
4261 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
4262 I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
4265 case AMDGPU::G_STACKRESTORE:
4266 return selectStackRestore(
I);
4268 return selectPHI(
I);
4269 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4270 return selectCOPY_SCC_VCC(
I);
4271 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4272 return selectCOPY_VCC_SCC(
I);
4273 case AMDGPU::G_AMDGPU_READANYLANE:
4274 return selectReadAnyLane(
I);
4275 case TargetOpcode::G_CONSTANT:
4276 case TargetOpcode::G_FCONSTANT:
4284AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4291std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4292 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4296 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4297 Src =
MI->getOperand(1).getReg();
4300 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4305 if (
LHS &&
LHS->isZero()) {
4307 Src =
MI->getOperand(2).getReg();
4311 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4312 Src =
MI->getOperand(1).getReg();
4319 return std::pair(Src, Mods);
4322Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4324 bool ForceVGPR)
const {
4325 if ((Mods != 0 || ForceVGPR) &&
4326 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
4333 TII.
get(AMDGPU::COPY), VGPRSrc)
4345AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4347 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); }
4352AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4355 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4358 [=](MachineInstrBuilder &MIB) {
4359 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4361 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4362 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4363 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4368AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4371 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4376 [=](MachineInstrBuilder &MIB) {
4377 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4379 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4380 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4381 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4386AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4388 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); },
4389 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4390 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4395AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4398 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4401 [=](MachineInstrBuilder &MIB) {
4402 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4404 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4409AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4413 std::tie(Src, Mods) =
4414 selectVOP3ModsImpl(Root.
getReg(),
false);
4417 [=](MachineInstrBuilder &MIB) {
4418 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4420 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4425AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4428 std::tie(Src, Mods) =
4429 selectVOP3ModsImpl(Root.
getReg(),
true,
4433 [=](MachineInstrBuilder &MIB) {
4434 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4436 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4441AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4444 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4447 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4472 if (
MI->getOpcode() != AMDGPU::G_TRUNC)
4475 unsigned DstSize =
MRI.getType(
MI->getOperand(0).getReg()).getSizeInBits();
4476 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4477 return DstSize * 2 == SrcSize;
4483 if (
MI->getOpcode() != AMDGPU::G_LSHR)
4487 std::optional<ValueAndVReg> ShiftAmt;
4490 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4491 unsigned Shift = ShiftAmt->Value.getZExtValue();
4492 return Shift * 2 == SrcSize;
4500 if (
MI->getOpcode() != AMDGPU::G_SHL)
4504 std::optional<ValueAndVReg> ShiftAmt;
4507 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4508 unsigned Shift = ShiftAmt->Value.getZExtValue();
4509 return Shift * 2 == SrcSize;
4517 if (
MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
4519 return MI->getNumOperands() == 3 &&
MI->getOperand(0).isDef() &&
4520 MI->getOperand(1).isDef() && !
MI->getOperand(2).isDef();
4690static std::optional<std::pair<Register, SrcStatus>>
4695 unsigned Opc =
MI->getOpcode();
4699 case AMDGPU::G_BITCAST:
4700 return std::optional<std::pair<Register, SrcStatus>>(
4701 {
MI->getOperand(1).getReg(), Curr.second});
4703 if (
MI->getOperand(1).getReg().isPhysical())
4704 return std::nullopt;
4705 return std::optional<std::pair<Register, SrcStatus>>(
4706 {
MI->getOperand(1).getReg(), Curr.second});
4707 case AMDGPU::G_FNEG: {
4710 return std::nullopt;
4711 return std::optional<std::pair<Register, SrcStatus>>(
4712 {
MI->getOperand(1).getReg(), Stat});
4719 switch (Curr.second) {
4722 return std::optional<std::pair<Register, SrcStatus>>(
4725 if (Curr.first ==
MI->getOperand(0).getReg())
4726 return std::optional<std::pair<Register, SrcStatus>>(
4728 return std::optional<std::pair<Register, SrcStatus>>(
4740 return std::optional<std::pair<Register, SrcStatus>>(
4744 if (Curr.first ==
MI->getOperand(0).getReg())
4745 return std::optional<std::pair<Register, SrcStatus>>(
4747 return std::optional<std::pair<Register, SrcStatus>>(
4753 return std::optional<std::pair<Register, SrcStatus>>(
4758 return std::optional<std::pair<Register, SrcStatus>>(
4763 return std::optional<std::pair<Register, SrcStatus>>(
4768 return std::optional<std::pair<Register, SrcStatus>>(
4774 return std::nullopt;
4784 bool HasNeg =
false;
4786 bool HasOpsel =
true;
4791 unsigned Opc =
MI->getOpcode();
4793 if (
Opc < TargetOpcode::GENERIC_OP_END) {
4796 }
else if (
Opc == TargetOpcode::G_INTRINSIC) {
4799 if (IntrinsicID == Intrinsic::amdgcn_fdot2)
4823 while (
Depth <= MaxDepth && Curr.has_value()) {
4826 Statlist.push_back(Curr.value());
4833static std::pair<Register, SrcStatus>
4840 while (
Depth <= MaxDepth && Curr.has_value()) {
4846 LastSameOrNeg = Curr.value();
4851 return LastSameOrNeg;
4856 unsigned Width1 =
MRI.getType(Reg1).getSizeInBits();
4857 unsigned Width2 =
MRI.getType(Reg2).getSizeInBits();
4858 return Width1 == Width2;
4894 IsHalfState(HiStat);
4897std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
4903 return {RootReg, Mods};
4906 SearchOptions SO(RootReg, MRI);
4917 MachineInstr *
MI = MRI.getVRegDef(Stat.first);
4919 if (
MI->getOpcode() != AMDGPU::G_BUILD_VECTOR ||
MI->getNumOperands() != 3 ||
4920 (IsDOT && Subtarget->hasDOTOpSelHazard())) {
4922 return {Stat.first, Mods};
4928 if (StatlistHi.
empty()) {
4930 return {Stat.first, Mods};
4936 if (StatlistLo.
empty()) {
4938 return {Stat.first, Mods};
4941 for (
int I = StatlistHi.
size() - 1;
I >= 0;
I--) {
4942 for (
int J = StatlistLo.
size() - 1; J >= 0; J--) {
4943 if (StatlistHi[
I].first == StatlistLo[J].first &&
4945 StatlistHi[
I].first, RootReg, TII, MRI))
4946 return {StatlistHi[
I].first,
4947 updateMods(StatlistHi[
I].second, StatlistLo[J].second, Mods)};
4953 return {Stat.first, Mods};
4963 return RB->
getID() == RBNo;
4980 if (
checkRB(RootReg, AMDGPU::SGPRRegBankID, RBI,
MRI,
TRI) ||
4985 if (
MI->getOpcode() == AMDGPU::COPY && NewReg ==
MI->getOperand(1).getReg()) {
4991 Register DstReg =
MRI.cloneVirtualRegister(RootReg);
4994 BuildMI(*BB,
MI,
MI->getDebugLoc(),
TII.get(AMDGPU::COPY), DstReg)
5002AMDGPUInstructionSelector::selectVOP3PRetHelper(
MachineOperand &Root,
5007 std::tie(
Reg, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI, IsDOT);
5011 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
5012 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5017AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
5019 return selectVOP3PRetHelper(Root);
5023AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
5025 return selectVOP3PRetHelper(Root,
true);
5029AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
5032 "expected i1 value");
5038 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5046 switch (Elts.
size()) {
5048 DstRegClass = &AMDGPU::VReg_256RegClass;
5051 DstRegClass = &AMDGPU::VReg_128RegClass;
5054 DstRegClass = &AMDGPU::VReg_64RegClass;
5061 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
5062 .addDef(
MRI.createVirtualRegister(DstRegClass));
5063 for (
unsigned i = 0; i < Elts.
size(); ++i) {
5074 if (ModOpcode == TargetOpcode::G_FNEG) {
5078 for (
auto El : Elts) {
5084 if (Elts.size() != NegAbsElts.
size()) {
5093 assert(ModOpcode == TargetOpcode::G_FABS);
5101AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
5107 assert(BV->getNumSources() > 0);
5109 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
5110 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
5113 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
5114 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
5121 if (BV->getNumSources() == EltsF32.
size()) {
5127 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5128 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5132AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
5138 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5146 if (CV->getNumSources() == EltsV2F16.
size()) {
5153 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5154 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5158AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
5164 assert(CV->getNumSources() > 0);
5165 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
5167 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
5171 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5172 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
5179 if (CV->getNumSources() == EltsV2F16.
size()) {
5186 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5187 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5191AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
5192 std::optional<FPValueAndVReg> FPValReg;
5194 if (TII.isInlineConstant(FPValReg->Value)) {
5195 return {{[=](MachineInstrBuilder &MIB) {
5196 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
5206 if (TII.isInlineConstant(ICst)) {
5216AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
5222 std::optional<ValueAndVReg> ShiftAmt;
5224 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5225 ShiftAmt->Value.getZExtValue() % 8 == 0) {
5226 Key = ShiftAmt->Value.getZExtValue() / 8;
5231 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5232 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5237AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
5244 std::optional<ValueAndVReg> ShiftAmt;
5246 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5247 ShiftAmt->Value.getZExtValue() == 16) {
5253 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5254 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5259AMDGPUInstructionSelector::selectSWMMACIndex32(
MachineOperand &Root)
const {
5266 S32 = matchAnyExtendFromS32(Src);
5270 if (
Def->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
5275 Src =
Def->getOperand(2).getReg();
5282 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5283 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5288AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
5291 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5295 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5296 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5302AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
5305 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5311 [=](MachineInstrBuilder &MIB) {
5313 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5315 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5320AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
5323 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5329 [=](MachineInstrBuilder &MIB) {
5331 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5333 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5340bool AMDGPUInstructionSelector::selectScaleOffset(
MachineOperand &Root,
5342 bool IsSigned)
const {
5343 if (!Subtarget->hasScaleOffset())
5347 MachineMemOperand *MMO = *
MI.memoperands_begin();
5359 OffsetReg =
Def->Reg;
5374 m_BinOp(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO : AMDGPU::S_MUL_U64,
5378 (
Mul->getOpcode() == (IsSigned ? AMDGPU::G_AMDGPU_MAD_I64_I32
5379 : AMDGPU::G_AMDGPU_MAD_U64_U32) ||
5380 (IsSigned &&
Mul->getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32 &&
5381 VT->signBitIsZero(
Mul->getOperand(2).getReg()))) &&
5394bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
5398 bool *ScaleOffset)
const {
5400 MachineBasicBlock *
MBB =
MI->getParent();
5405 getAddrModeInfo(*
MI, *MRI, AddrInfo);
5407 if (AddrInfo.
empty())
5410 const GEPInfo &GEPI = AddrInfo[0];
5411 std::optional<int64_t> EncodedImm;
5414 *ScaleOffset =
false;
5419 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
5420 AddrInfo.
size() > 1) {
5421 const GEPInfo &GEPI2 = AddrInfo[1];
5422 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
5423 Register OffsetReg = GEPI2.SgprParts[1];
5426 selectScaleOffset(Root, OffsetReg,
false );
5427 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5429 Base = GEPI2.SgprParts[0];
5430 *SOffset = OffsetReg;
5439 auto SKnown =
VT->getKnownBits(*SOffset);
5440 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
5452 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
5453 Base = GEPI.SgprParts[0];
5459 if (SOffset && GEPI.SgprParts.size() == 1 &&
isUInt<32>(GEPI.Imm) &&
5465 Base = GEPI.SgprParts[0];
5466 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5467 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
5472 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
5473 Register OffsetReg = GEPI.SgprParts[1];
5475 *ScaleOffset = selectScaleOffset(Root, OffsetReg,
false );
5476 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5478 Base = GEPI.SgprParts[0];
5479 *SOffset = OffsetReg;
5488AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
5491 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset,
5493 return std::nullopt;
5495 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5496 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset); }}};
5500AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
5502 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
5504 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
5505 return std::nullopt;
5507 const GEPInfo &GEPInfo = AddrInfo[0];
5508 Register PtrReg = GEPInfo.SgprParts[0];
5509 std::optional<int64_t> EncodedImm =
5512 return std::nullopt;
5515 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrReg); },
5516 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }
5521AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
5524 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr,
5526 return std::nullopt;
5529 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5530 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5531 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5535AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
5539 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset, &ScaleOffset))
5540 return std::nullopt;
5543 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5544 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5546 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5549std::pair<Register, int>
5550AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
5551 uint64_t FlatVariant)
const {
5556 if (!STI.hasFlatInstOffsets())
5560 int64_t ConstOffset;
5562 std::tie(PtrBase, ConstOffset, IsInBounds) =
5563 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5569 if (ConstOffset == 0 ||
5571 !isFlatScratchBaseLegal(Root.
getReg())) ||
5575 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
5576 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
5579 return std::pair(PtrBase, ConstOffset);
5583AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
5587 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5588 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5593AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
5597 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5598 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5603AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
5607 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5608 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5614AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root,
5616 bool NeedIOffset)
const {
5619 int64_t ConstOffset;
5620 int64_t ImmOffset = 0;
5624 std::tie(PtrBase, ConstOffset, std::ignore) =
5625 getPtrBaseWithConstantOffset(Addr, *MRI);
5627 if (ConstOffset != 0) {
5632 ImmOffset = ConstOffset;
5635 if (isSGPR(PtrBaseDef->Reg)) {
5636 if (ConstOffset > 0) {
5642 int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
5644 std::tie(SplitImmOffset, RemainderOffset) =
5649 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
5652 MachineBasicBlock *
MBB =
MI->getParent();
5654 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5656 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5658 .
addImm(RemainderOffset);
5662 [=](MachineInstrBuilder &MIB) {
5665 [=](MachineInstrBuilder &MIB) {
5668 [=](MachineInstrBuilder &MIB) { MIB.
addImm(SplitImmOffset); },
5669 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5672 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrBase); },
5673 [=](MachineInstrBuilder &MIB) {
5676 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5686 unsigned NumLiterals =
5687 !TII.isInlineConstant(APInt(32,
Lo_32(ConstOffset))) +
5688 !TII.isInlineConstant(APInt(32,
Hi_32(ConstOffset)));
5689 if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
5690 return std::nullopt;
5697 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5702 if (isSGPR(SAddr)) {
5703 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
5707 bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
5708 Subtarget->hasSignedGVSOffset());
5709 if (
Register VOffset = matchExtendFromS32OrS32(
5710 PtrBaseOffset, Subtarget->hasSignedGVSOffset())) {
5712 return {{[=](MachineInstrBuilder &MIB) {
5715 [=](MachineInstrBuilder &MIB) {
5718 [=](MachineInstrBuilder &MIB) {
5721 [=](MachineInstrBuilder &MIB) {
5725 return {{[=](MachineInstrBuilder &MIB) {
5728 [=](MachineInstrBuilder &MIB) {
5731 [=](MachineInstrBuilder &MIB) {
5741 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
5742 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
5743 return std::nullopt;
5748 MachineBasicBlock *
MBB =
MI->getParent();
5749 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5751 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
5756 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
5757 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
5758 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5759 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
5762 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
5763 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
5764 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
5769AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
5770 return selectGlobalSAddr(Root, 0);
5774AMDGPUInstructionSelector::selectGlobalSAddrCPol(
MachineOperand &Root)
const {
5780 return selectGlobalSAddr(Root, PassedCPol);
5784AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(
MachineOperand &Root)
const {
5790 return selectGlobalSAddr(Root, PassedCPol);
5794AMDGPUInstructionSelector::selectGlobalSAddrGLC(
MachineOperand &Root)
const {
5799AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
5806 return selectGlobalSAddr(Root, PassedCPol,
false);
5810AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0(
5817 return selectGlobalSAddr(Root, PassedCPol,
false);
5821AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
5824 int64_t ConstOffset;
5825 int64_t ImmOffset = 0;
5829 std::tie(PtrBase, ConstOffset, std::ignore) =
5830 getPtrBaseWithConstantOffset(Addr, *MRI);
5832 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
5836 ImmOffset = ConstOffset;
5840 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5841 int FI = AddrDef->MI->getOperand(1).
getIndex();
5844 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
5850 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5851 Register LHS = AddrDef->MI->getOperand(1).getReg();
5852 Register RHS = AddrDef->MI->getOperand(2).getReg();
5856 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
5857 isSGPR(RHSDef->Reg)) {
5858 int FI = LHSDef->MI->getOperand(1).getIndex();
5862 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5864 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
5872 return std::nullopt;
5875 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SAddr); },
5876 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
5881bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
5883 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
5889 auto VKnown =
VT->getKnownBits(VAddr);
5892 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
5893 uint64_t
SMax = SKnown.getMaxValue().getZExtValue();
5894 return (VMax & 3) + (
SMax & 3) >= 4;
5898AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
5901 int64_t ConstOffset;
5902 int64_t ImmOffset = 0;
5906 std::tie(PtrBase, ConstOffset, std::ignore) =
5907 getPtrBaseWithConstantOffset(Addr, *MRI);
5910 if (ConstOffset != 0 &&
5914 ImmOffset = ConstOffset;
5918 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
5919 return std::nullopt;
5921 Register RHS = AddrDef->MI->getOperand(2).getReg();
5922 if (RBI.getRegBank(
RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
5923 return std::nullopt;
5925 Register LHS = AddrDef->MI->getOperand(1).getReg();
5928 if (OrigAddr != Addr) {
5929 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
5930 return std::nullopt;
5932 if (!isFlatScratchBaseLegalSV(OrigAddr))
5933 return std::nullopt;
5936 if (checkFlatScratchSVSSwizzleBug(
RHS,
LHS, ImmOffset))
5937 return std::nullopt;
5939 unsigned CPol = selectScaleOffset(Root,
RHS,
true )
5943 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5944 int FI = LHSDef->MI->getOperand(1).getIndex();
5946 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
5948 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5949 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
5958 return std::nullopt;
5961 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
5962 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
LHS); },
5963 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5964 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
5969AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
5971 MachineBasicBlock *
MBB =
MI->getParent();
5973 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
5978 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5983 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5987 return {{[=](MachineInstrBuilder &MIB) {
5990 [=](MachineInstrBuilder &MIB) {
5993 [=](MachineInstrBuilder &MIB) {
5998 [=](MachineInstrBuilder &MIB) {
6007 std::optional<int> FI;
6010 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6012 int64_t ConstOffset;
6013 std::tie(PtrBase, ConstOffset, std::ignore) =
6014 getPtrBaseWithConstantOffset(VAddr, *MRI);
6015 if (ConstOffset != 0) {
6016 if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
6017 (!STI.privateMemoryResourceIsRangeChecked() ||
6018 VT->signBitIsZero(PtrBase))) {
6019 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
6020 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
6026 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
6030 return {{[=](MachineInstrBuilder &MIB) {
6033 [=](MachineInstrBuilder &MIB) {
6039 [=](MachineInstrBuilder &MIB) {
6044 [=](MachineInstrBuilder &MIB) {
6049bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
6054 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6059 return VT->signBitIsZero(
Base);
6062bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
6064 unsigned Size)
const {
6065 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
6070 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6075 return VT->signBitIsZero(
Base);
6080 return Addr->
getOpcode() == TargetOpcode::G_OR ||
6081 (Addr->
getOpcode() == TargetOpcode::G_PTR_ADD &&
6088bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
6096 if (STI.hasSignedScratchOffsets())
6102 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
6103 std::optional<ValueAndVReg> RhsValReg =
6109 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
6110 RhsValReg->Value.getSExtValue() > -0x40000000)
6114 return VT->signBitIsZero(
LHS);
6119bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
6127 if (STI.hasSignedScratchOffsets())
6132 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6137bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
6141 if (STI.hasSignedScratchOffsets())
6146 std::optional<DefinitionAndSourceRegister> BaseDef =
6148 std::optional<ValueAndVReg> RHSOffset =
6158 (RHSOffset->Value.getSExtValue() < 0 &&
6159 RHSOffset->Value.getSExtValue() > -0x40000000)))
6162 Register LHS = BaseDef->MI->getOperand(1).getReg();
6163 Register RHS = BaseDef->MI->getOperand(2).getReg();
6164 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6167bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
6168 unsigned ShAmtBits)
const {
6169 assert(
MI.getOpcode() == TargetOpcode::G_AND);
6171 std::optional<APInt>
RHS =
6176 if (
RHS->countr_one() >= ShAmtBits)
6179 const APInt &LHSKnownZeros =
VT->getKnownZeroes(
MI.getOperand(1).getReg());
6180 return (LHSKnownZeros | *
RHS).countr_one() >= ShAmtBits;
6184AMDGPUInstructionSelector::selectMUBUFScratchOffset(
6187 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6189 std::optional<DefinitionAndSourceRegister>
Def =
6191 assert(Def &&
"this shouldn't be an optional result");
6196 [=](MachineInstrBuilder &MIB) {
6199 [=](MachineInstrBuilder &MIB) {
6202 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
6213 if (!TII.isLegalMUBUFImmOffset(
Offset))
6221 [=](MachineInstrBuilder &MIB) {
6224 [=](MachineInstrBuilder &MIB) {
6232 !TII.isLegalMUBUFImmOffset(
Offset))
6236 [=](MachineInstrBuilder &MIB) {
6239 [=](MachineInstrBuilder &MIB) {
6246std::pair<Register, unsigned>
6247AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
6248 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6249 int64_t ConstAddr = 0;
6253 std::tie(PtrBase,
Offset, std::ignore) =
6254 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6257 if (isDSOffsetLegal(PtrBase,
Offset)) {
6259 return std::pair(PtrBase,
Offset);
6261 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6270 return std::pair(Root.
getReg(), 0);
6274AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
6277 std::tie(
Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
6279 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6285AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
6286 return selectDSReadWrite2(Root, 4);
6290AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
6291 return selectDSReadWrite2(Root, 8);
6295AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
6296 unsigned Size)
const {
6301 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6303 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset+1); }
6307std::pair<Register, unsigned>
6308AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
6309 unsigned Size)
const {
6310 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6311 int64_t ConstAddr = 0;
6315 std::tie(PtrBase,
Offset, std::ignore) =
6316 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6319 int64_t OffsetValue0 =
Offset;
6321 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
6323 return std::pair(PtrBase, OffsetValue0 /
Size);
6325 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6333 return std::pair(Root.
getReg(), 0);
6341std::tuple<Register, int64_t, bool>
6342AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
6345 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
6346 return {Root, 0,
false};
6349 std::optional<ValueAndVReg> MaybeOffset =
6352 return {Root, 0,
false};
6367 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6368 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6369 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6370 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6372 B.buildInstr(AMDGPU::S_MOV_B32)
6375 B.buildInstr(AMDGPU::S_MOV_B32)
6382 B.buildInstr(AMDGPU::REG_SEQUENCE)
6385 .addImm(AMDGPU::sub0)
6387 .addImm(AMDGPU::sub1);
6391 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6392 B.buildInstr(AMDGPU::S_MOV_B64)
6397 B.buildInstr(AMDGPU::REG_SEQUENCE)
6400 .addImm(AMDGPU::sub0_sub1)
6402 .addImm(AMDGPU::sub2_sub3);
6409 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6418 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6425AMDGPUInstructionSelector::MUBUFAddressData
6426AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
6427 MUBUFAddressData
Data;
6433 std::tie(PtrBase,
Offset, std::ignore) =
6434 getPtrBaseWithConstantOffset(Src, *MRI);
6440 if (MachineInstr *InputAdd
6442 Data.N2 = InputAdd->getOperand(1).getReg();
6443 Data.N3 = InputAdd->getOperand(2).getReg();
6458bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr)
const {
6464 const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
6465 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
6471void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
6473 if (TII.isLegalMUBUFImmOffset(ImmOffset))
6477 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6478 B.buildInstr(AMDGPU::S_MOV_B32)
6484bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
6489 if (!STI.hasAddr64() || STI.useFlatForGlobal())
6492 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6493 if (!shouldUseAddr64(AddrData))
6499 Offset = AddrData.Offset;
6505 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6507 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6520 }
else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6531 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6535bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
6540 if (STI.useFlatForGlobal())
6543 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6544 if (shouldUseAddr64(AddrData))
6550 Offset = AddrData.Offset;
6556 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6561AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
6567 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
6573 [=](MachineInstrBuilder &MIB) {
6576 [=](MachineInstrBuilder &MIB) {
6579 [=](MachineInstrBuilder &MIB) {
6582 else if (STI.hasRestrictedSOffset())
6583 MIB.
addReg(AMDGPU::SGPR_NULL);
6587 [=](MachineInstrBuilder &MIB) {
6597AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
6602 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
6606 [=](MachineInstrBuilder &MIB) {
6609 [=](MachineInstrBuilder &MIB) {
6612 else if (STI.hasRestrictedSOffset())
6613 MIB.
addReg(AMDGPU::SGPR_NULL);
6625AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
6630 SOffset = AMDGPU::SGPR_NULL;
6632 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); }}};
6636static std::optional<uint64_t>
6640 if (!OffsetVal || !
isInt<32>(*OffsetVal))
6641 return std::nullopt;
6642 return Lo_32(*OffsetVal);
6646AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
6647 std::optional<uint64_t> OffsetVal =
6652 std::optional<int64_t> EncodedImm =
6657 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6661AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
6668 std::optional<int64_t> EncodedImm =
6673 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6677AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
6685 return std::nullopt;
6687 std::optional<int64_t> EncodedOffset =
6690 return std::nullopt;
6693 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
6694 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedOffset); }}};
6697std::pair<Register, unsigned>
6698AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
6699 bool &Matched)
const {
6704 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
6714 const auto CheckAbsNeg = [&]() {
6719 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
6750AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
6755 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
6760 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
6761 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
6766AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
6770 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
6773 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
6774 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
6778bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
6782 Register CCReg =
I.getOperand(0).getReg();
6787 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
6788 .
addImm(
I.getOperand(2).getImm());
6792 I.eraseFromParent();
6793 return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
6797bool AMDGPUInstructionSelector::selectSGetBarrierState(
6801 const MachineOperand &BarOp =
I.getOperand(2);
6802 std::optional<int64_t> BarValImm =
6806 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6810 MachineInstrBuilder MIB;
6811 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
6812 : AMDGPU::S_GET_BARRIER_STATE_M0;
6815 auto DstReg =
I.getOperand(0).getReg();
6816 const TargetRegisterClass *DstRC =
6817 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6818 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
6824 I.eraseFromParent();
6829 if (HasInlineConst) {
6833 case Intrinsic::amdgcn_s_barrier_join:
6834 return AMDGPU::S_BARRIER_JOIN_IMM;
6835 case Intrinsic::amdgcn_s_get_named_barrier_state:
6836 return AMDGPU::S_GET_BARRIER_STATE_IMM;
6842 case Intrinsic::amdgcn_s_barrier_join:
6843 return AMDGPU::S_BARRIER_JOIN_M0;
6844 case Intrinsic::amdgcn_s_get_named_barrier_state:
6845 return AMDGPU::S_GET_BARRIER_STATE_M0;
6850bool AMDGPUInstructionSelector::selectNamedBarrierInit(
6854 const MachineOperand &BarOp =
I.getOperand(1);
6855 const MachineOperand &CntOp =
I.getOperand(2);
6858 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6864 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6871 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6877 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6878 constexpr unsigned ShAmt = 16;
6884 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6894 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
6895 ? AMDGPU::S_BARRIER_INIT_M0
6896 : AMDGPU::S_BARRIER_SIGNAL_M0;
6897 MachineInstrBuilder MIB;
6900 I.eraseFromParent();
6904bool AMDGPUInstructionSelector::selectNamedBarrierInst(
6908 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
6911 std::optional<int64_t> BarValImm =
6916 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6922 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6928 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6933 MachineInstrBuilder MIB;
6937 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
6938 auto DstReg =
I.getOperand(0).getReg();
6939 const TargetRegisterClass *DstRC =
6940 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6941 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
6947 auto BarId = ((*BarValImm) >> 4) & 0x3F;
6951 I.eraseFromParent();
6958 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6959 "Expected G_CONSTANT");
6960 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
6966 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6967 "Expected G_CONSTANT");
6968 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
6974 const MachineOperand &
Op =
MI.getOperand(1);
6975 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1);
6976 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
6982 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6983 "Expected G_CONSTANT");
6984 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
6992 const MachineOperand &
Op =
MI.getOperand(
OpIdx);
7009 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7013void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
7015 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7020void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
7022 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7028void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
7030 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7035void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
7037 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7043void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
7045 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7050void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
7052 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7057void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
7059 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7064void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
7066 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7075 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7084 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7091void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
7093 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7094 const uint32_t Cpol =
MI.getOperand(
OpIdx).getImm() &
7109 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
7111 assert(ExpVal != INT_MIN);
7129 if (
MI.getOperand(
OpIdx).getImm())
7131 MIB.
addImm((int64_t)Mods);
7138 if (
MI.getOperand(
OpIdx).getImm())
7140 MIB.
addImm((int64_t)Mods);
7146 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7154 MIB.
addImm((int64_t)Mods);
7160 uint32_t
V =
MI.getOperand(2).getImm();
7163 if (!Subtarget->hasSafeCUPrefetch())
7169void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
7171 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7180bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
7181 return TII.isInlineConstant(Imm);
7184bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
7185 return TII.isInlineConstant(Imm);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static Register getLegalRegBank(Register NewReg, Register RootReg, const AMDGPURegisterBankInfo &RBI, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const SIInstrInfo &TII)
static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is shift left with half bits, such as reg0:2n =G_SHL reg1:2n, CONST(n)
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
static bool checkRB(Register Reg, unsigned int RBNo, const AMDGPURegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods)
static bool isTruncHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is truncating to half, such as reg0:n = G_TRUNC reg1:2n
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static TypeClass isVectorOfTwoOrScalar(Register Reg, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static bool isSameBitWidth(Register Reg1, Register Reg2, const MachineRegisterInfo &MRI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelValueTracking &ValueTracking)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::pair< Register, SrcStatus > getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static std::optional< std::pair< Register, SrcStatus > > calcNextStatus(std::pair< Register, SrcStatus > Curr, const MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg, Register RootReg, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static SmallVector< std::pair< Register, SrcStatus > > getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static bool isUnmergeHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test function, if the MI is reg0:n, reg1:n = G_UNMERGE_VALUES reg2:2n
static SrcStatus getNegStatus(Register Reg, SrcStatus S, const MachineRegisterInfo &MRI)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is logic shift right with half bits, such as reg0:2n =G_LSHR reg1:2n,...
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Machine Check Debug Module
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
This is used to control valid status that current MI supports.
bool checkOptions(SrcStatus Stat) const
SearchOptions(Register Reg, const MachineRegisterInfo &MRI)
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelValueTracking *VT, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
LLVM_ABI DILocation * get() const
Get the underlying DILocation.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
TypeSize getValue() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setReturnAddressIsTaken(bool s)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis providing profile information.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
SpecificRegisterMatch m_SpecificReg(Register RequestedReg)
Matches a register only if it is equal to RequestedReg.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, TargetOpcode::G_MUL, true > m_GMul(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
NodeAddr< DefNode * > Def
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
FunctionAddr VTableAddr Value
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
@ Default
The result values are uniform if and only if all operands are uniform.
unsigned AtomicNoRetBaseOpcode
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.