29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
45 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), STI(STI),
47#include
"AMDGPUGenGlobalISel.inc"
50#include
"AMDGPUGenGlobalISel.inc"
62 MRI = &
MF.getRegInfo();
70 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
71 ? Def->getOperand(1).getReg()
78 F,
"intrinsic not supported on subtarget",
I.getDebugLoc(),
DS_Error));
87 auto &RegClassOrBank = MRI.getRegClassOrRegBank(
Reg);
88 const TargetRegisterClass *RC =
91 const LLT Ty = MRI.getType(
Reg);
95 return MRI.getVRegDef(
Reg)->getOpcode() != AMDGPU::G_TRUNC &&
100 return RB->
getID() == AMDGPU::VCCRegBankID;
103bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
104 unsigned NewOpc)
const {
105 MI.setDesc(TII.get(NewOpc));
109 MachineOperand &Dst =
MI.getOperand(0);
110 MachineOperand &Src =
MI.getOperand(1);
116 const TargetRegisterClass *DstRC
117 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
118 const TargetRegisterClass *SrcRC
119 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
120 if (!DstRC || DstRC != SrcRC)
123 if (!RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) ||
124 !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
126 const MCInstrDesc &MCID =
MI.getDesc();
128 MI.getOperand(0).setIsEarlyClobber(
true);
133bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
136 I.setDesc(TII.get(TargetOpcode::COPY));
138 const MachineOperand &Src =
I.getOperand(1);
139 MachineOperand &Dst =
I.getOperand(0);
143 if (isVCC(DstReg, *MRI)) {
144 if (SrcReg == AMDGPU::SCC) {
145 const TargetRegisterClass *RC
146 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
149 return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
152 if (!isVCC(SrcReg, *MRI)) {
154 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
157 const TargetRegisterClass *SrcRC
158 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
160 std::optional<ValueAndVReg> ConstVal =
164 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
166 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
168 Register MaskedReg = MRI->createVirtualRegister(SrcRC);
175 assert(Subtarget->useRealTrue16Insts());
176 const int64_t NoMods = 0;
177 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
183 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
190 bool IsSGPR = TRI.isSGPRClass(SrcRC);
191 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
198 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
204 if (!MRI->getRegClassOrNull(SrcReg))
205 MRI->setRegClass(SrcReg, SrcRC);
210 const TargetRegisterClass *RC =
211 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
212 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
218 for (
const MachineOperand &MO :
I.operands()) {
219 if (MO.getReg().isPhysical())
222 const TargetRegisterClass *RC =
223 TRI.getConstrainedRegClassForOperand(MO, *MRI);
226 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
231bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
234 Register VCCReg =
I.getOperand(1).getReg();
238 if (STI.hasScalarCompareEq64()) {
240 STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
243 Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
244 Cmp =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
251 Register DstReg =
I.getOperand(0).getReg();
255 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
258bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
262 Register DstReg =
I.getOperand(0).getReg();
263 Register SrcReg =
I.getOperand(1).getReg();
264 std::optional<ValueAndVReg> Arg =
268 const int64_t
Value = Arg->Value.getZExtValue();
270 unsigned Opcode = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
277 return RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI);
283 unsigned SelectOpcode =
284 STI.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
294bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
295 Register DstReg =
I.getOperand(0).getReg();
296 Register SrcReg =
I.getOperand(1).getReg();
301 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
309bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
310 const Register DefReg =
I.getOperand(0).getReg();
311 const LLT DefTy = MRI->getType(DefReg);
323 MRI->getRegClassOrRegBank(DefReg);
325 const TargetRegisterClass *DefRC =
334 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
343 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
344 const Register SrcReg =
I.getOperand(i).getReg();
346 const RegisterBank *RB = MRI->getRegBankOrNull(SrcReg);
348 const LLT SrcTy = MRI->getType(SrcReg);
349 const TargetRegisterClass *SrcRC =
350 TRI.getRegClassForTypeOnBank(SrcTy, *RB);
351 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
356 I.setDesc(TII.get(TargetOpcode::PHI));
357 return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
363 unsigned SubIdx)
const {
367 Register DstReg = MRI->createVirtualRegister(&SubRC);
370 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
372 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
398 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
400 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
402 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
408bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
409 Register DstReg =
I.getOperand(0).getReg();
410 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
412 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
413 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
414 DstRB->
getID() != AMDGPU::VCCRegBankID)
417 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
430bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
433 Register DstReg =
I.getOperand(0).getReg();
435 LLT Ty = MRI->getType(DstReg);
440 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
441 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
442 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
446 const unsigned Opc =
Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
449 .
add(
I.getOperand(1))
450 .
add(
I.getOperand(2))
457 if (STI.hasAddNoCarryInsts()) {
458 const unsigned Opc =
Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
459 I.setDesc(TII.get(
Opc));
466 const unsigned Opc =
Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
468 Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
472 .
add(
I.getOperand(1))
473 .
add(
I.getOperand(2))
480 assert(!
Sub &&
"illegal sub should not reach here");
482 const TargetRegisterClass &RC
483 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
484 const TargetRegisterClass &HalfRC
485 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
487 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
488 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
489 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
490 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
492 Register DstLo = MRI->createVirtualRegister(&HalfRC);
493 Register DstHi = MRI->createVirtualRegister(&HalfRC);
496 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
499 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
504 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
505 Register CarryReg = MRI->createVirtualRegister(CarryRC);
506 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
511 MachineInstr *Addc =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
521 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
528 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
535bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
540 Register Dst0Reg =
I.getOperand(0).getReg();
541 Register Dst1Reg =
I.getOperand(1).getReg();
542 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
543 I.getOpcode() == AMDGPU::G_UADDE;
544 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
545 I.getOpcode() == AMDGPU::G_USUBE;
547 if (isVCC(Dst1Reg, *MRI)) {
548 unsigned NoCarryOpc =
549 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
550 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
551 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
558 Register Src0Reg =
I.getOperand(2).getReg();
559 Register Src1Reg =
I.getOperand(3).getReg();
562 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
563 .
addReg(
I.getOperand(4).getReg());
566 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
567 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
569 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
570 .
add(
I.getOperand(2))
571 .
add(
I.getOperand(3));
573 if (MRI->use_nodbg_empty(Dst1Reg)) {
574 CarryInst.setOperandDead(3);
576 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
578 if (!MRI->getRegClassOrNull(Dst1Reg))
579 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
582 if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
583 !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
584 !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
588 !RBI.constrainGenericRegister(
I.getOperand(4).getReg(),
589 AMDGPU::SReg_32RegClass, *MRI))
596bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
600 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
601 bool UseNoCarry = Subtarget->hasMadNC64_32Insts() &&
602 MRI->use_nodbg_empty(
I.getOperand(1).getReg());
605 if (Subtarget->hasMADIntraFwdBug())
606 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
607 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
609 Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
610 : AMDGPU::V_MAD_NC_I64_I32_e64;
612 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
617 I.setDesc(TII.get(
Opc));
619 I.addImplicitDefUseOperands(*
MF);
620 I.getOperand(0).setIsEarlyClobber(
true);
626bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
628 Register DstReg =
I.getOperand(0).getReg();
629 Register SrcReg =
I.getOperand(1).getReg();
630 LLT DstTy = MRI->getType(DstReg);
631 LLT SrcTy = MRI->getType(SrcReg);
636 unsigned Offset =
I.getOperand(2).getImm();
637 if (
Offset % 32 != 0 || DstSize > 128)
645 const TargetRegisterClass *DstRC =
646 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
647 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
650 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
651 const TargetRegisterClass *SrcRC =
652 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
657 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubReg);
662 *SrcRC,
I.getOperand(1));
664 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
665 .
addReg(SrcReg, {}, SubReg);
671bool AMDGPUInstructionSelector::selectS16MergeToS32(
MachineInstr &
MI)
const {
676 LLT Src0Ty = MRI->getType(Src0);
677 LLT Src1Ty = MRI->getType(Src1);
679 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
680 const RegisterBank *Src0Bank = RBI.getRegBank(Src0, *MRI, TRI);
681 const RegisterBank *Src1Bank = RBI.getRegBank(Src1, *MRI, TRI);
682 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
688 MachineBasicBlock *BB =
MI.getParent();
693 if (Src0Bank->
getID() == AMDGPU::VGPRRegBankID &&
694 Src1Bank->
getID() == AMDGPU::VGPRRegBankID &&
696 BuildMI(*BB,
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), Dst)
702 if (!RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI))
705 MI.eraseFromParent();
710 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
711 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
716 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
722 MI.eraseFromParent();
745 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
746 if (Shift0 && Shift1) {
747 Opc = AMDGPU::S_PACK_HH_B32_B16;
748 MI.getOperand(1).setReg(ShiftSrc0);
749 MI.getOperand(2).setReg(ShiftSrc1);
751 Opc = AMDGPU::S_PACK_LH_B32_B16;
752 MI.getOperand(2).setReg(ShiftSrc1);
756 if (ConstSrc1 && ConstSrc1->Value == 0) {
758 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
763 MI.eraseFromParent();
767 if (STI.hasSPackHL()) {
768 Opc = AMDGPU::S_PACK_HL_B32_B16;
769 MI.getOperand(1).setReg(ShiftSrc0);
773 MI.setDesc(TII.get(
Opc));
778bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
779 MachineBasicBlock *BB =
MI.getParent();
781 LLT DstTy = MRI->getType(DstReg);
782 LLT SrcTy = MRI->getType(
MI.getOperand(1).getReg());
788 MI.getNumOperands() == 3) {
789 return selectS16MergeToS32(
MI);
795 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
797 const TargetRegisterClass *DstRC =
798 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
802 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
803 MachineInstrBuilder MIB =
804 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
805 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
806 MachineOperand &Src =
MI.getOperand(
I + 1);
810 const TargetRegisterClass *SrcRC
811 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
812 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
816 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
819 MI.eraseFromParent();
823bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
824 MachineBasicBlock *BB =
MI.getParent();
825 const int NumDst =
MI.getNumOperands() - 1;
827 MachineOperand &Src =
MI.getOperand(NumDst);
831 LLT DstTy = MRI->getType(DstReg0);
832 LLT SrcTy = MRI->getType(SrcReg);
837 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
839 const TargetRegisterClass *SrcRC =
840 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
841 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
847 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
848 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
849 MachineOperand &Dst =
MI.getOperand(
I);
851 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID &&
852 SubRegs[
I] == AMDGPU::hi16) {
853 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst.getReg())
857 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
858 .
addReg(SrcReg, {}, SubRegs[
I]);
862 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
863 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
866 const TargetRegisterClass *DstRC =
867 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
868 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
872 MI.eraseFromParent();
876bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
877 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
878 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
882 LLT SrcTy = MRI->getType(Src0);
886 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
887 return selectG_MERGE_VALUES(
MI);
894 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
898 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
899 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
902 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
903 DstBank->
getID() == AMDGPU::VGPRRegBankID);
904 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
907 MachineBasicBlock *BB =
MI.getParent();
917 const int64_t K0 = ConstSrc0->Value.getSExtValue();
918 const int64_t K1 = ConstSrc1->Value.getSExtValue();
919 uint32_t Lo16 =
static_cast<uint32_t
>(K0) & 0xffff;
920 uint32_t Hi16 =
static_cast<uint32_t
>(K1) & 0xffff;
921 uint32_t
Imm = Lo16 | (Hi16 << 16);
926 MI.eraseFromParent();
927 return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI);
932 MI.eraseFromParent();
933 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
944 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
945 MI.setDesc(TII.get(AMDGPU::COPY));
948 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
949 return RBI.constrainGenericRegister(Dst, RC, *MRI) &&
950 RBI.constrainGenericRegister(Src0, RC, *MRI);
953 return selectS16MergeToS32(
MI);
956bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
957 const MachineOperand &MO =
I.getOperand(0);
961 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
962 if ((!RC && !MRI->getRegBankOrNull(MO.
getReg())) ||
963 (RC && RBI.constrainGenericRegister(MO.
getReg(), *RC, *MRI))) {
964 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
971bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
974 Register DstReg =
I.getOperand(0).getReg();
975 Register Src0Reg =
I.getOperand(1).getReg();
976 Register Src1Reg =
I.getOperand(2).getReg();
977 LLT Src1Ty = MRI->getType(Src1Reg);
979 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
982 int64_t
Offset =
I.getOperand(3).getImm();
985 if (
Offset % 32 != 0 || InsSize % 32 != 0)
992 unsigned SubReg = TRI.getSubRegFromChannel(
Offset / 32, InsSize / 32);
993 if (SubReg == AMDGPU::NoSubRegister)
996 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
997 const TargetRegisterClass *DstRC =
998 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
1002 const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
1003 const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
1004 const TargetRegisterClass *Src0RC =
1005 TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
1006 const TargetRegisterClass *Src1RC =
1007 TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
1011 Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg);
1012 if (!Src0RC || !Src1RC)
1015 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
1016 !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
1017 !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
1021 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
1026 I.eraseFromParent();
1030bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
1033 Register OffsetReg =
MI.getOperand(2).getReg();
1034 Register WidthReg =
MI.getOperand(3).getReg();
1036 assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID &&
1037 "scalar BFX instructions are expanded in regbankselect");
1038 assert(MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
1039 "64-bit vector BFX instructions are expanded in regbankselect");
1042 MachineBasicBlock *
MBB =
MI.getParent();
1044 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
1045 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
1050 MI.eraseFromParent();
1055bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
1056 if (STI.getLDSBankCount() != 16)
1062 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
1063 !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
1064 !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
1074 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1076 MachineBasicBlock *
MBB =
MI.getParent();
1080 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1083 .
addImm(
MI.getOperand(3).getImm());
1096 MI.eraseFromParent();
1105bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1107 if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
1110 MachineBasicBlock *
MBB =
MI.getParent();
1114 Register LaneSelect =
MI.getOperand(3).getReg();
1117 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1119 std::optional<ValueAndVReg> ConstSelect =
1125 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1128 std::optional<ValueAndVReg> ConstVal =
1134 STI.hasInv2PiInlineImm())) {
1135 MIB.
addImm(ConstVal->Value.getSExtValue());
1143 RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
1145 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1153 MI.eraseFromParent();
1160bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1164 LLT Ty = MRI->getType(Dst0);
1167 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1169 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1176 MachineBasicBlock *
MBB =
MI.getParent();
1180 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1182 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1195 MI.eraseFromParent();
1200bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1202 switch (IntrinsicID) {
1203 case Intrinsic::amdgcn_if_break: {
1208 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1209 .
add(
I.getOperand(0))
1210 .
add(
I.getOperand(2))
1211 .
add(
I.getOperand(3));
1213 Register DstReg =
I.getOperand(0).getReg();
1214 Register Src0Reg =
I.getOperand(2).getReg();
1215 Register Src1Reg =
I.getOperand(3).getReg();
1217 I.eraseFromParent();
1220 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1224 case Intrinsic::amdgcn_interp_p1_f16:
1225 return selectInterpP1F16(
I);
1226 case Intrinsic::amdgcn_wqm:
1227 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1228 case Intrinsic::amdgcn_softwqm:
1229 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1230 case Intrinsic::amdgcn_strict_wwm:
1231 case Intrinsic::amdgcn_wwm:
1232 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1233 case Intrinsic::amdgcn_strict_wqm:
1234 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1235 case Intrinsic::amdgcn_writelane:
1236 return selectWritelane(
I);
1237 case Intrinsic::amdgcn_div_scale:
1238 return selectDivScale(
I);
1239 case Intrinsic::amdgcn_icmp:
1240 case Intrinsic::amdgcn_fcmp:
1243 return selectIntrinsicCmp(
I);
1244 case Intrinsic::amdgcn_ballot:
1245 return selectBallot(
I);
1246 case Intrinsic::amdgcn_reloc_constant:
1247 return selectRelocConstant(
I);
1248 case Intrinsic::amdgcn_groupstaticsize:
1249 return selectGroupStaticSize(
I);
1250 case Intrinsic::returnaddress:
1251 return selectReturnAddress(
I);
1252 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1253 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1254 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1255 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1256 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1257 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1258 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1259 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1260 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1261 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1262 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1263 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1264 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1265 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1266 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1267 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1268 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1269 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1270 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1271 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1272 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1273 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1274 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1275 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1276 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1277 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1278 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1279 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1280 return selectSMFMACIntrin(
I);
1281 case Intrinsic::amdgcn_permlane16_swap:
1282 case Intrinsic::amdgcn_permlane32_swap:
1283 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1284 case Intrinsic::amdgcn_wave_shuffle:
1285 return selectWaveShuffleIntrin(
I);
1286 case Intrinsic::amdgcn_fma_legacy:
1287 if (!STI.hasFmaLegacy32Insts()) {
1292 case Intrinsic::amdgcn_sudot4:
1293 case Intrinsic::amdgcn_sudot8:
1294 if (!STI.hasDot8Insts()) {
1299 case Intrinsic::amdgcn_permlane16:
1300 case Intrinsic::amdgcn_permlanex16:
1301 if (!STI.hasPermlane16Insts()) {
1306 case Intrinsic::amdgcn_mov_dpp8:
1307 if (!STI.hasDPP8()) {
1312 case Intrinsic::amdgcn_tanh:
1313 if (!STI.hasTanhInsts()) {
1328 if (
Size == 16 && !ST.has16BitInsts())
1331 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1332 unsigned FakeS16Opc,
unsigned S32Opc,
1335 return ST.hasTrue16BitInsts()
1336 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1347 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1348 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1349 AMDGPU::V_CMP_NE_U64_e64);
1351 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1352 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1353 AMDGPU::V_CMP_EQ_U64_e64);
1355 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1356 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1357 AMDGPU::V_CMP_GT_I64_e64);
1359 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1360 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1361 AMDGPU::V_CMP_GE_I64_e64);
1363 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1364 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1365 AMDGPU::V_CMP_LT_I64_e64);
1367 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1368 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1369 AMDGPU::V_CMP_LE_I64_e64);
1371 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1372 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1373 AMDGPU::V_CMP_GT_U64_e64);
1375 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1376 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1377 AMDGPU::V_CMP_GE_U64_e64);
1379 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1380 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1381 AMDGPU::V_CMP_LT_U64_e64);
1383 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1384 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1385 AMDGPU::V_CMP_LE_U64_e64);
1388 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1389 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1390 AMDGPU::V_CMP_EQ_F64_e64);
1392 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1393 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1394 AMDGPU::V_CMP_GT_F64_e64);
1396 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1397 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1398 AMDGPU::V_CMP_GE_F64_e64);
1400 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1401 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1402 AMDGPU::V_CMP_LT_F64_e64);
1404 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1405 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1406 AMDGPU::V_CMP_LE_F64_e64);
1408 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1409 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1410 AMDGPU::V_CMP_NEQ_F64_e64);
1412 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1413 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1414 AMDGPU::V_CMP_O_F64_e64);
1416 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1417 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1418 AMDGPU::V_CMP_U_F64_e64);
1420 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1421 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1422 AMDGPU::V_CMP_NLG_F64_e64);
1424 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1425 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1426 AMDGPU::V_CMP_NLE_F64_e64);
1428 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1429 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1430 AMDGPU::V_CMP_NLT_F64_e64);
1432 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1433 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1434 AMDGPU::V_CMP_NGE_F64_e64);
1436 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1437 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1438 AMDGPU::V_CMP_NGT_F64_e64);
1440 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1441 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1442 AMDGPU::V_CMP_NEQ_F64_e64);
1444 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1445 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1446 AMDGPU::V_CMP_TRU_F64_e64);
1448 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1449 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1450 AMDGPU::V_CMP_F_F64_e64);
1455 unsigned Size)
const {
1457 if (!STI.hasScalarCompareEq64())
1462 return AMDGPU::S_CMP_LG_U64;
1464 return AMDGPU::S_CMP_EQ_U64;
1473 return AMDGPU::S_CMP_LG_U32;
1475 return AMDGPU::S_CMP_EQ_U32;
1477 return AMDGPU::S_CMP_GT_I32;
1479 return AMDGPU::S_CMP_GE_I32;
1481 return AMDGPU::S_CMP_LT_I32;
1483 return AMDGPU::S_CMP_LE_I32;
1485 return AMDGPU::S_CMP_GT_U32;
1487 return AMDGPU::S_CMP_GE_U32;
1489 return AMDGPU::S_CMP_LT_U32;
1491 return AMDGPU::S_CMP_LE_U32;
1493 return AMDGPU::S_CMP_EQ_F32;
1495 return AMDGPU::S_CMP_GT_F32;
1497 return AMDGPU::S_CMP_GE_F32;
1499 return AMDGPU::S_CMP_LT_F32;
1501 return AMDGPU::S_CMP_LE_F32;
1503 return AMDGPU::S_CMP_LG_F32;
1505 return AMDGPU::S_CMP_O_F32;
1507 return AMDGPU::S_CMP_U_F32;
1509 return AMDGPU::S_CMP_NLG_F32;
1511 return AMDGPU::S_CMP_NLE_F32;
1513 return AMDGPU::S_CMP_NLT_F32;
1515 return AMDGPU::S_CMP_NGE_F32;
1517 return AMDGPU::S_CMP_NGT_F32;
1519 return AMDGPU::S_CMP_NEQ_F32;
1526 if (!STI.hasSALUFloatInsts())
1531 return AMDGPU::S_CMP_EQ_F16;
1533 return AMDGPU::S_CMP_GT_F16;
1535 return AMDGPU::S_CMP_GE_F16;
1537 return AMDGPU::S_CMP_LT_F16;
1539 return AMDGPU::S_CMP_LE_F16;
1541 return AMDGPU::S_CMP_LG_F16;
1543 return AMDGPU::S_CMP_O_F16;
1545 return AMDGPU::S_CMP_U_F16;
1547 return AMDGPU::S_CMP_NLG_F16;
1549 return AMDGPU::S_CMP_NLE_F16;
1551 return AMDGPU::S_CMP_NLT_F16;
1553 return AMDGPU::S_CMP_NGE_F16;
1555 return AMDGPU::S_CMP_NGT_F16;
1557 return AMDGPU::S_CMP_NEQ_F16;
1566bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1571 Register SrcReg =
I.getOperand(2).getReg();
1572 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1576 Register CCReg =
I.getOperand(0).getReg();
1577 if (!isVCC(CCReg, *MRI)) {
1578 int Opcode = getS_CMPOpcode(Pred,
Size);
1581 MachineInstr *ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode))
1582 .
add(
I.getOperand(2))
1583 .
add(
I.getOperand(3));
1584 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1588 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
1589 I.eraseFromParent();
1593 if (
I.getOpcode() == AMDGPU::G_FCMP)
1600 MachineInstrBuilder ICmp;
1603 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1605 .
add(
I.getOperand(2))
1607 .
add(
I.getOperand(3))
1610 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1611 .
add(
I.getOperand(2))
1612 .
add(
I.getOperand(3));
1616 *TRI.getBoolRC(), *MRI);
1618 I.eraseFromParent();
1622bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1623 Register Dst =
I.getOperand(0).getReg();
1624 if (isVCC(Dst, *MRI))
1627 LLT DstTy = MRI->getType(Dst);
1633 Register SrcReg =
I.getOperand(2).getReg();
1634 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1642 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1643 I.eraseFromParent();
1644 return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1651 MachineInstrBuilder SelectedMI;
1652 MachineOperand &
LHS =
I.getOperand(2);
1653 MachineOperand &
RHS =
I.getOperand(3);
1654 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1655 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1657 copyToVGPRIfSrcFolded(Src0, Src0Mods,
LHS, &
I,
true);
1659 copyToVGPRIfSrcFolded(Src1, Src1Mods,
RHS, &
I,
true);
1660 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1662 SelectedMI.
addImm(Src0Mods);
1663 SelectedMI.
addReg(Src0Reg);
1665 SelectedMI.
addImm(Src1Mods);
1666 SelectedMI.
addReg(Src1Reg);
1672 RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1675 I.eraseFromParent();
1686 if (
MI->getParent() !=
MBB)
1690 if (
MI->getOpcode() == AMDGPU::COPY) {
1693 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1694 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1699 if (
MI->getOpcode() == AMDGPU::G_AMDGPU_COPY_VCC_SCC)
1715bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1718 Register DstReg =
I.getOperand(0).getReg();
1719 Register SrcReg =
I.getOperand(2).getReg();
1720 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();
1721 const unsigned WaveSize = STI.getWavefrontSize();
1725 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1728 std::optional<ValueAndVReg> Arg =
1733 if (BallotSize != WaveSize) {
1734 Dst = MRI->createVirtualRegister(TRI.getBoolRC());
1738 const int64_t
Value = Arg->Value.getZExtValue();
1741 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1748 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1754 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1758 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1768 if (BallotSize != WaveSize) {
1769 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1771 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1778 I.eraseFromParent();
1782bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1783 Register DstReg =
I.getOperand(0).getReg();
1784 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
1785 const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
1786 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
1789 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1791 Module *
M =
MF->getFunction().getParent();
1792 const MDNode *
Metadata =
I.getOperand(2).getMetadata();
1799 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1802 I.eraseFromParent();
1806bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1809 Register DstReg =
I.getOperand(0).getReg();
1810 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1811 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1812 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1820 const SIMachineFunctionInfo *MFI =
MF->getInfo<SIMachineFunctionInfo>();
1823 Module *
M =
MF->getFunction().getParent();
1824 const GlobalValue *GV =
1829 I.eraseFromParent();
1834bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1839 MachineOperand &Dst =
I.getOperand(0);
1841 unsigned Depth =
I.getOperand(2).getImm();
1843 const TargetRegisterClass *RC
1844 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1846 !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
1851 MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1854 I.eraseFromParent();
1858 MachineFrameInfo &MFI =
MF.getFrameInfo();
1863 Register ReturnAddrReg = TRI.getReturnAddressReg(
MF);
1865 AMDGPU::SReg_64RegClass,
DL);
1868 I.eraseFromParent();
1872bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1875 MachineBasicBlock *BB =
MI.getParent();
1876 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1877 .
add(
MI.getOperand(1));
1880 MI.eraseFromParent();
1882 if (!MRI->getRegClassOrNull(
Reg))
1883 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1887bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1889 MachineBasicBlock *
MBB =
MI.getParent();
1893 unsigned IndexOperand =
MI.getOperand(7).getImm();
1894 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1895 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1897 if (WaveDone && !WaveRelease) {
1901 Fn,
"ds_ordered_count: wave_done requires wave_release",
DL));
1904 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1905 IndexOperand &= ~0x3f;
1906 unsigned CountDw = 0;
1909 CountDw = (IndexOperand >> 24) & 0xf;
1910 IndexOperand &= ~(0xf << 24);
1912 if (CountDw < 1 || CountDw > 4) {
1915 Fn,
"ds_ordered_count: dword count must be between 1 and 4",
DL));
1923 Fn,
"ds_ordered_count: bad index operand",
DL));
1926 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1929 unsigned Offset0 = OrderedCountIndex << 2;
1930 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
1933 Offset1 |= (CountDw - 1) << 6;
1936 Offset1 |= ShaderType << 2;
1938 unsigned Offset = Offset0 | (Offset1 << 8);
1946 MachineInstrBuilder
DS =
1947 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1952 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
1956 MI.eraseFromParent();
1962 case Intrinsic::amdgcn_ds_gws_init:
1963 return AMDGPU::DS_GWS_INIT;
1964 case Intrinsic::amdgcn_ds_gws_barrier:
1965 return AMDGPU::DS_GWS_BARRIER;
1966 case Intrinsic::amdgcn_ds_gws_sema_v:
1967 return AMDGPU::DS_GWS_SEMA_V;
1968 case Intrinsic::amdgcn_ds_gws_sema_br:
1969 return AMDGPU::DS_GWS_SEMA_BR;
1970 case Intrinsic::amdgcn_ds_gws_sema_p:
1971 return AMDGPU::DS_GWS_SEMA_P;
1972 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1973 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1979bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1981 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1982 !STI.hasGWSSemaReleaseAll()))
1986 const bool HasVSrc =
MI.getNumOperands() == 3;
1987 assert(HasVSrc ||
MI.getNumOperands() == 2);
1989 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1990 const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
1991 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1997 MachineBasicBlock *
MBB =
MI.getParent();
2000 MachineInstr *Readfirstlane =
nullptr;
2005 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
2006 Readfirstlane = OffsetDef;
2011 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
2021 std::tie(BaseOffset, ImmOffset) =
2024 if (Readfirstlane) {
2027 if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
2033 if (!RBI.constrainGenericRegister(BaseOffset,
2034 AMDGPU::SReg_32RegClass, *MRI))
2038 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2053 const MCInstrDesc &InstrDesc = TII.get(
Opc);
2058 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
2059 const TargetRegisterClass *DataRC = TII.getRegClass(InstrDesc, Data0Idx);
2060 const TargetRegisterClass *SubRC =
2061 TRI.getSubRegisterClass(DataRC, AMDGPU::sub0);
2065 if (!RBI.constrainGenericRegister(VSrc, *DataRC, *MRI))
2075 Register DataReg = MRI->createVirtualRegister(DataRC);
2076 if (!RBI.constrainGenericRegister(VSrc, *SubRC, *MRI))
2079 Register UndefReg = MRI->createVirtualRegister(SubRC);
2098 MI.eraseFromParent();
2102bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
2103 bool IsAppend)
const {
2104 Register PtrBase =
MI.getOperand(2).getReg();
2105 LLT PtrTy = MRI->getType(PtrBase);
2109 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
2112 if (!isDSOffsetLegal(PtrBase,
Offset)) {
2113 PtrBase =
MI.getOperand(2).getReg();
2117 MachineBasicBlock *
MBB =
MI.getParent();
2119 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2123 if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
2130 MI.eraseFromParent();
2135bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
2136 MachineFunction *
MF =
MI.getMF();
2137 SIMachineFunctionInfo *MFInfo =
MF->getInfo<SIMachineFunctionInfo>();
2148 TFE = TexFailCtrl & 0x1;
2150 LWE = TexFailCtrl & 0x2;
2153 return TexFailCtrl == 0;
2156bool AMDGPUInstructionSelector::selectImageIntrinsic(
2158 MachineBasicBlock *
MBB =
MI.getParent();
2164 Register ResultDef =
MI.getOperand(0).getReg();
2165 if (MRI->use_nodbg_empty(ResultDef))
2169 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2178 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2180 Register VDataIn = AMDGPU::NoRegister;
2181 Register VDataOut = AMDGPU::NoRegister;
2183 int NumVDataDwords = -1;
2184 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2185 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2191 Unorm =
MI.getOperand(ArgOffset + Intr->
UnormIndex).getImm() != 0;
2195 bool IsTexFail =
false;
2197 TFE, LWE, IsTexFail))
2200 const int Flags =
MI.getOperand(ArgOffset + Intr->
NumArgs).getImm();
2201 const bool IsA16 = (
Flags & 1) != 0;
2202 const bool IsG16 = (
Flags & 2) != 0;
2205 if (IsA16 && !STI.hasG16() && !IsG16)
2209 unsigned DMaskLanes = 0;
2211 if (BaseOpcode->
Atomic) {
2213 VDataOut =
MI.getOperand(0).getReg();
2214 VDataIn =
MI.getOperand(2).getReg();
2215 LLT Ty = MRI->getType(VDataIn);
2218 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2223 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2225 DMask = Is64Bit ? 0xf : 0x3;
2226 NumVDataDwords = Is64Bit ? 4 : 2;
2228 DMask = Is64Bit ? 0x3 : 0x1;
2229 NumVDataDwords = Is64Bit ? 2 : 1;
2232 DMask =
MI.getOperand(ArgOffset + Intr->
DMaskIndex).getImm();
2235 if (BaseOpcode->
Store) {
2236 VDataIn =
MI.getOperand(1).getReg();
2237 VDataTy = MRI->getType(VDataIn);
2242 VDataOut =
MI.getOperand(0).getReg();
2243 VDataTy = MRI->getType(VDataOut);
2244 NumVDataDwords = DMaskLanes;
2246 if (IsD16 && !STI.hasUnpackedD16VMem())
2247 NumVDataDwords = (DMaskLanes + 1) / 2;
2252 if (Subtarget->hasG16() && IsG16) {
2253 const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
2256 IntrOpcode = G16MappingInfo->
G16;
2260 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2270 int NumVAddrRegs = 0;
2271 int NumVAddrDwords = 0;
2274 MachineOperand &AddrOp =
MI.getOperand(ArgOffset +
I);
2275 if (!AddrOp.
isReg())
2283 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
2290 NumVAddrRegs != 1 &&
2291 (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
2292 : NumVAddrDwords == NumVAddrRegs);
2293 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2304 NumVDataDwords, NumVAddrDwords);
2305 }
else if (IsGFX12Plus) {
2307 NumVDataDwords, NumVAddrDwords);
2308 }
else if (IsGFX11Plus) {
2310 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2311 : AMDGPU::MIMGEncGfx11Default,
2312 NumVDataDwords, NumVAddrDwords);
2313 }
else if (IsGFX10Plus) {
2315 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2316 : AMDGPU::MIMGEncGfx10Default,
2317 NumVDataDwords, NumVAddrDwords);
2319 if (Subtarget->hasGFX90AInsts()) {
2321 NumVDataDwords, NumVAddrDwords);
2325 <<
"requested image instruction is not supported on this GPU\n");
2332 NumVDataDwords, NumVAddrDwords);
2335 NumVDataDwords, NumVAddrDwords);
2345 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
2347 Register TmpReg = MRI->createVirtualRegister(
2348 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2349 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2352 if (!MRI->use_empty(VDataOut)) {
2365 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2366 MachineOperand &SrcOp =
MI.getOperand(ArgOffset + Intr->
VAddrStart +
I);
2367 if (SrcOp.
isReg()) {
2386 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2388 MIB.
addImm(IsA16 ? -1 : 0);
2390 if (!Subtarget->hasGFX90AInsts()) {
2402 MIB.
addImm(IsD16 ? -1 : 0);
2404 MI.eraseFromParent();
2406 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
2412bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2418 MachineBasicBlock *
MBB =
MI.getParent();
2423 unsigned Offset =
MI.getOperand(6).getImm();
2427 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2428 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2429 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2431 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2432 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2434 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2435 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2447 MI.eraseFromParent();
2452bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2455 switch (IntrinsicID) {
2456 case Intrinsic::amdgcn_end_cf:
2457 return selectEndCfIntrinsic(
I);
2458 case Intrinsic::amdgcn_ds_ordered_add:
2459 case Intrinsic::amdgcn_ds_ordered_swap:
2460 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2461 case Intrinsic::amdgcn_ds_gws_init:
2462 case Intrinsic::amdgcn_ds_gws_barrier:
2463 case Intrinsic::amdgcn_ds_gws_sema_v:
2464 case Intrinsic::amdgcn_ds_gws_sema_br:
2465 case Intrinsic::amdgcn_ds_gws_sema_p:
2466 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2467 return selectDSGWSIntrinsic(
I, IntrinsicID);
2468 case Intrinsic::amdgcn_ds_append:
2469 return selectDSAppendConsume(
I,
true);
2470 case Intrinsic::amdgcn_ds_consume:
2471 return selectDSAppendConsume(
I,
false);
2472 case Intrinsic::amdgcn_init_whole_wave:
2473 return selectInitWholeWave(
I);
2474 case Intrinsic::amdgcn_raw_buffer_load_lds:
2475 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
2476 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2477 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
2478 case Intrinsic::amdgcn_struct_buffer_load_lds:
2479 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
2480 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2481 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds:
2482 return selectBufferLoadLds(
I);
2487 case Intrinsic::amdgcn_load_to_lds:
2488 case Intrinsic::amdgcn_load_async_to_lds:
2489 case Intrinsic::amdgcn_global_load_lds:
2490 case Intrinsic::amdgcn_global_load_async_lds:
2491 return selectGlobalLoadLds(
I);
2492 case Intrinsic::amdgcn_tensor_load_to_lds:
2493 case Intrinsic::amdgcn_tensor_store_from_lds:
2494 return selectTensorLoadStore(
I, IntrinsicID);
2495 case Intrinsic::amdgcn_asyncmark:
2496 case Intrinsic::amdgcn_wait_asyncmark:
2497 if (!Subtarget->hasAsyncMark())
2500 case Intrinsic::amdgcn_exp_compr:
2501 if (!STI.hasCompressedExport()) {
2506 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2507 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2508 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2509 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2510 return selectDSBvhStackIntrinsic(
I);
2511 case Intrinsic::amdgcn_s_alloc_vgpr: {
2517 Register ResReg =
I.getOperand(0).getReg();
2519 MachineInstr *AllocMI =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_ALLOC_VGPR))
2520 .
add(
I.getOperand(2));
2523 I.eraseFromParent();
2525 return RBI.constrainGenericRegister(ResReg, AMDGPU::SReg_32RegClass, *MRI);
2527 case Intrinsic::amdgcn_s_barrier_init:
2528 case Intrinsic::amdgcn_s_barrier_signal_var:
2529 return selectNamedBarrierInit(
I, IntrinsicID);
2530 case Intrinsic::amdgcn_s_wakeup_barrier: {
2531 if (!STI.hasSWakeupBarrier()) {
2535 return selectNamedBarrierInst(
I, IntrinsicID);
2537 case Intrinsic::amdgcn_s_barrier_join:
2538 case Intrinsic::amdgcn_s_get_named_barrier_state:
2539 return selectNamedBarrierInst(
I, IntrinsicID);
2540 case Intrinsic::amdgcn_s_get_barrier_state:
2541 return selectSGetBarrierState(
I, IntrinsicID);
2542 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2543 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2548bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2555 Register DstReg =
I.getOperand(0).getReg();
2556 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
2558 const MachineOperand &CCOp =
I.getOperand(1);
2560 if (!isVCC(CCReg, *MRI)) {
2561 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2562 AMDGPU::S_CSELECT_B32;
2563 MachineInstr *CopySCC =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
2569 if (!MRI->getRegClassOrNull(CCReg))
2570 MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
2572 .
add(
I.getOperand(2))
2573 .
add(
I.getOperand(3));
2577 I.eraseFromParent();
2586 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2588 .
add(
I.getOperand(3))
2590 .
add(
I.getOperand(2))
2591 .
add(
I.getOperand(1));
2594 I.eraseFromParent();
2598bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2599 Register DstReg =
I.getOperand(0).getReg();
2600 Register SrcReg =
I.getOperand(1).getReg();
2601 const LLT DstTy = MRI->getType(DstReg);
2602 const LLT SrcTy = MRI->getType(SrcReg);
2605 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
2606 const RegisterBank *DstRB;
2612 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
2617 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2622 const TargetRegisterClass *SrcRC =
2623 TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
2624 const TargetRegisterClass *DstRC =
2625 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
2626 if (!SrcRC || !DstRC)
2629 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
2630 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
2635 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2636 assert(STI.useRealTrue16Insts());
2640 .
addReg(SrcReg, {}, AMDGPU::lo16);
2641 I.eraseFromParent();
2649 Register LoReg = MRI->createVirtualRegister(DstRC);
2650 Register HiReg = MRI->createVirtualRegister(DstRC);
2652 .
addReg(SrcReg, {}, AMDGPU::sub0);
2654 .
addReg(SrcReg, {}, AMDGPU::sub1);
2656 if (IsVALU && STI.hasSDWA()) {
2659 MachineInstr *MovSDWA =
2660 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2670 Register TmpReg0 = MRI->createVirtualRegister(DstRC);
2671 Register TmpReg1 = MRI->createVirtualRegister(DstRC);
2672 Register ImmReg = MRI->createVirtualRegister(DstRC);
2674 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2684 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2685 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2686 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2698 And.setOperandDead(3);
2699 Or.setOperandDead(3);
2703 I.eraseFromParent();
2711 unsigned SubRegIdx = DstSize < 32
2712 ?
static_cast<unsigned>(AMDGPU::sub0)
2713 : TRI.getSubRegFromChannel(0, DstSize / 32);
2714 if (SubRegIdx == AMDGPU::NoSubRegister)
2719 const TargetRegisterClass *SrcWithSubRC
2720 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2724 if (SrcWithSubRC != SrcRC) {
2725 if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
2729 I.getOperand(1).setSubReg(SubRegIdx);
2732 I.setDesc(TII.get(TargetOpcode::COPY));
2739 int SignedMask =
static_cast<int>(Mask);
2740 return SignedMask >= -16 && SignedMask <= 64;
2744const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2753 return &RBI.getRegBankFromRegClass(*RC, LLT());
2757bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2758 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2759 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2762 const Register DstReg =
I.getOperand(0).getReg();
2763 const Register SrcReg =
I.getOperand(1).getReg();
2765 const LLT DstTy = MRI->getType(DstReg);
2766 const LLT SrcTy = MRI->getType(SrcReg);
2767 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2774 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2777 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2779 return selectCOPY(
I);
2781 const TargetRegisterClass *SrcRC =
2782 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2783 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
2784 const TargetRegisterClass *DstRC =
2785 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2787 Register UndefReg = MRI->createVirtualRegister(SrcRC);
2788 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2794 I.eraseFromParent();
2796 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
2797 RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
2800 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2806 MachineInstr *ExtI =
2810 I.eraseFromParent();
2815 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2816 MachineInstr *ExtI =
2821 I.eraseFromParent();
2826 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2827 const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
2828 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2829 if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
2832 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2833 const unsigned SextOpc = SrcSize == 8 ?
2834 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2837 I.eraseFromParent();
2838 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2843 if (DstSize > 32 && SrcSize == 32) {
2844 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2845 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2848 .
addReg(SrcReg, {}, SubReg)
2856 .
addReg(SrcReg, {}, SubReg)
2857 .addImm(AMDGPU::sub0)
2860 I.eraseFromParent();
2861 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass,
2865 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2866 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2869 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2871 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2872 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2873 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2875 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2877 .
addReg(SrcReg, {}, SubReg)
2878 .addImm(AMDGPU::sub0)
2886 I.eraseFromParent();
2887 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
2902 I.eraseFromParent();
2903 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2927 if (Unmerge->getNumDefs() == 2 && Unmerge->getOperand(1).getReg() == In &&
2929 Out = Unmerge->getSourceReg();
2949 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2956 assert(Mask.size() == 2);
2958 if (Mask[0] == 1 && Mask[1] <= 1) {
2966bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2967 if (!Subtarget->hasSALUFloatInsts())
2970 Register Dst =
I.getOperand(0).getReg();
2971 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2972 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2975 Register Src =
I.getOperand(1).getReg();
2981 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2983 I.eraseFromParent();
2984 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
2991bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
3004 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
3005 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
3010 MachineInstr *Fabs =
getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
3014 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
3015 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
3018 MachineBasicBlock *BB =
MI.getParent();
3020 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3021 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3022 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3023 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3025 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
3026 .
addReg(Src, {}, AMDGPU::sub0);
3027 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
3028 .
addReg(Src, {}, AMDGPU::sub1);
3029 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
3033 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
3038 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
3043 MI.eraseFromParent();
3048bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
3050 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
3051 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
3056 MachineBasicBlock *BB =
MI.getParent();
3058 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3059 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3060 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3061 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3063 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
3064 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
3067 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
3068 .
addReg(Src, {}, AMDGPU::sub0);
3069 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
3070 .
addReg(Src, {}, AMDGPU::sub1);
3071 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
3076 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
3080 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
3086 MI.eraseFromParent();
3091 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
3094void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
3097 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
3098 const MachineInstr *PtrMI =
3099 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
3103 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
3108 for (
unsigned i = 1; i != 3; ++i) {
3109 const MachineOperand &GEPOp = PtrMI->
getOperand(i);
3110 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.
getReg());
3115 assert(GEPInfo.Imm == 0);
3119 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.
getReg(), MRI, TRI);
3120 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
3121 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
3123 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
3127 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
3130bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
3131 return RBI.getRegBank(
Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
3134bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
3135 if (!
MI.hasOneMemOperand())
3138 const MachineMemOperand *MMO = *
MI.memoperands_begin();
3151 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
3152 return RBI.getRegBank(
MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
3153 AMDGPU::SGPRRegBankID;
3156 return I &&
I->getMetadata(
"amdgpu.uniform");
3160 for (
const GEPInfo &GEPInfo : AddrInfo) {
3161 if (!GEPInfo.VgprParts.empty())
3167void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
3168 const LLT PtrTy = MRI->getType(
I.getOperand(1).getReg());
3171 STI.ldsRequiresM0Init()) {
3175 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
3180bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
3187 if (
Reg.isPhysical())
3191 const unsigned Opcode =
MI.getOpcode();
3193 if (Opcode == AMDGPU::COPY)
3196 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
3197 Opcode == AMDGPU::G_XOR)
3202 return GI->is(Intrinsic::amdgcn_class);
3204 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
3207bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
3209 MachineOperand &CondOp =
I.getOperand(0);
3215 const TargetRegisterClass *ConstrainRC;
3222 if (!isVCC(CondReg, *MRI)) {
3226 CondPhysReg = AMDGPU::SCC;
3227 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3228 ConstrainRC = &AMDGPU::SReg_32RegClass;
3235 const bool Is64 = STI.isWave64();
3236 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3237 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3239 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
3240 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3247 CondPhysReg = TRI.getVCC();
3248 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3249 ConstrainRC = TRI.getBoolRC();
3252 if (!MRI->getRegClassOrNull(CondReg))
3253 MRI->setRegClass(CondReg, ConstrainRC);
3255 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3258 .
addMBB(
I.getOperand(1).getMBB());
3260 I.eraseFromParent();
3264bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3266 Register DstReg =
I.getOperand(0).getReg();
3267 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3268 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3269 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3273 return RBI.constrainGenericRegister(
3274 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3277bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3278 Register DstReg =
I.getOperand(0).getReg();
3279 Register SrcReg =
I.getOperand(1).getReg();
3280 Register MaskReg =
I.getOperand(2).getReg();
3281 LLT Ty = MRI->getType(DstReg);
3282 LLT MaskTy = MRI->getType(MaskReg);
3286 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3287 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3288 const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
3289 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3295 APInt MaskOnes =
VT->getKnownOnes(MaskReg).zext(64);
3299 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3300 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3303 !CanCopyLow32 && !CanCopyHi32) {
3304 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3308 I.eraseFromParent();
3313 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3314 const TargetRegisterClass &RegRC
3315 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3317 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
3318 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
3319 const TargetRegisterClass *MaskRC =
3320 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3322 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3323 !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3324 !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
3329 "ptrmask should have been narrowed during legalize");
3331 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3337 I.eraseFromParent();
3341 Register HiReg = MRI->createVirtualRegister(&RegRC);
3342 Register LoReg = MRI->createVirtualRegister(&RegRC);
3345 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3346 .
addReg(SrcReg, {}, AMDGPU::sub0);
3347 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3348 .
addReg(SrcReg, {}, AMDGPU::sub1);
3357 Register MaskLo = MRI->createVirtualRegister(&RegRC);
3358 MaskedLo = MRI->createVirtualRegister(&RegRC);
3360 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3361 .
addReg(MaskReg, {}, AMDGPU::sub0);
3362 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3371 Register MaskHi = MRI->createVirtualRegister(&RegRC);
3372 MaskedHi = MRI->createVirtualRegister(&RegRC);
3374 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3375 .
addReg(MaskReg, {}, AMDGPU::sub1);
3376 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3381 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3386 I.eraseFromParent();
3392static std::pair<Register, unsigned>
3399 std::tie(IdxBaseReg,
Offset) =
3401 if (IdxBaseReg == AMDGPU::NoRegister) {
3405 IdxBaseReg = IdxReg;
3412 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3413 return std::pair(IdxReg, SubRegs[0]);
3414 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3417bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3423 LLT DstTy = MRI->getType(DstReg);
3424 LLT SrcTy = MRI->getType(SrcReg);
3426 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3427 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3428 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3432 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3435 const TargetRegisterClass *SrcRC =
3436 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3437 const TargetRegisterClass *DstRC =
3438 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3439 if (!SrcRC || !DstRC)
3441 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3442 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3443 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3446 MachineBasicBlock *BB =
MI.getParent();
3454 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3458 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3461 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3463 .
addReg(SrcReg, {}, SubReg)
3465 MI.eraseFromParent();
3472 if (!STI.useVGPRIndexMode()) {
3473 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3475 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3476 .
addReg(SrcReg, {}, SubReg)
3478 MI.eraseFromParent();
3482 const MCInstrDesc &GPRIDXDesc =
3483 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC),
true);
3489 MI.eraseFromParent();
3494bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3501 LLT VecTy = MRI->getType(DstReg);
3502 LLT ValTy = MRI->getType(ValReg);
3506 const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
3507 const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
3508 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3514 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3517 const TargetRegisterClass *VecRC =
3518 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3519 const TargetRegisterClass *ValRC =
3520 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3522 if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
3523 !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
3524 !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
3525 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3528 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3532 std::tie(IdxReg, SubReg) =
3535 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3536 STI.useVGPRIndexMode();
3538 MachineBasicBlock *BB =
MI.getParent();
3542 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3545 const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
3546 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3551 MI.eraseFromParent();
3555 const MCInstrDesc &GPRIDXDesc =
3556 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC),
false);
3563 MI.eraseFromParent();
3569 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
3570 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
3571 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
3572 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds:
3573 case Intrinsic::amdgcn_load_async_to_lds:
3574 case Intrinsic::amdgcn_global_load_async_lds:
3580bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3581 if (!Subtarget->hasVMemToLDSLoad())
3584 unsigned Size =
MI.getOperand(3).getImm();
3588 const bool HasVIndex =
MI.getNumOperands() == 9;
3592 VIndex =
MI.getOperand(4).getReg();
3596 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3597 std::optional<ValueAndVReg> MaybeVOffset =
3599 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3605 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3606 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3607 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3608 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3611 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3612 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3613 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3614 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3617 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3618 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3619 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3620 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3623 if (!Subtarget->hasLDSLoadB96_B128())
3626 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3627 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3628 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3629 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3632 if (!Subtarget->hasLDSLoadB96_B128())
3635 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3636 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3637 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3638 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3642 MachineBasicBlock *
MBB =
MI.getParent();
3645 .
add(
MI.getOperand(2));
3649 if (HasVIndex && HasVOffset) {
3650 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
3651 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3658 }
else if (HasVIndex) {
3660 }
else if (HasVOffset) {
3664 MIB.
add(
MI.getOperand(1));
3665 MIB.
add(
MI.getOperand(5 + OpOffset));
3666 MIB.
add(
MI.getOperand(6 + OpOffset));
3668 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3677 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3682 MachinePointerInfo StorePtrI = LoadPtrI;
3693 MachineMemOperand *StoreMMO =
3699 MI.eraseFromParent();
3712 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3718 return Def->getOperand(1).getReg();
3732 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3740 return Def->getOperand(1).getReg();
3742 if (
VT->signBitIsZero(
Reg))
3743 return matchZeroExtendFromS32(
Reg);
3751AMDGPUInstructionSelector::matchZeroExtendFromS32OrS32(
Register Reg)
const {
3753 : matchZeroExtendFromS32(
Reg);
3759AMDGPUInstructionSelector::matchSignExtendFromS32OrS32(
Register Reg)
const {
3761 : matchSignExtendFromS32(
Reg);
3765AMDGPUInstructionSelector::matchExtendFromS32OrS32(
Register Reg,
3766 bool IsSigned)
const {
3768 return matchSignExtendFromS32OrS32(
Reg);
3770 return matchZeroExtendFromS32OrS32(
Reg);
3780 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3787 return Def->getOperand(1).getReg();
3792bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3793 if (!Subtarget->hasVMemToLDSLoad())
3797 unsigned Size =
MI.getOperand(3).getImm();
3804 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3807 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3810 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3813 if (!Subtarget->hasLDSLoadB96_B128())
3815 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3818 if (!Subtarget->hasLDSLoadB96_B128())
3820 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3824 MachineBasicBlock *
MBB =
MI.getParent();
3827 .
add(
MI.getOperand(2));
3833 if (!isSGPR(Addr)) {
3835 if (isSGPR(AddrDef->Reg)) {
3836 Addr = AddrDef->Reg;
3837 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3840 if (isSGPR(SAddr)) {
3841 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3842 if (
Register Off = matchZeroExtendFromS32(PtrBaseOffset)) {
3853 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3865 MIB.
add(
MI.getOperand(4));
3867 unsigned Aux =
MI.getOperand(5).getImm();
3871 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3873 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3874 MachinePointerInfo StorePtrI = LoadPtrI;
3883 MachineMemOperand *StoreMMO =
3885 sizeof(int32_t),
Align(4));
3889 MI.eraseFromParent();
3894bool AMDGPUInstructionSelector::selectTensorLoadStore(
MachineInstr &
MI,
3896 bool IsLoad = IID == Intrinsic::amdgcn_tensor_load_to_lds;
3898 IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d4 : AMDGPU::TENSOR_STORE_FROM_LDS_d4;
3902 const auto isAllZeros = [&](MachineOperand &Opnd) {
3903 const MachineInstr *
DefMI = MRI->getVRegDef(Opnd.getReg());
3912 Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d2
3913 : AMDGPU::TENSOR_STORE_FROM_LDS_d2;
3918 MachineBasicBlock *
MBB =
MI.getParent();
3920 .
add(
MI.getOperand(1))
3921 .
add(
MI.getOperand(2));
3923 if (NumGroups >= 4) {
3924 MIB.
add(
MI.getOperand(3))
3925 .
add(
MI.getOperand(4));
3929 .
add(
MI.getOperand(6));
3931 MI.eraseFromParent();
3935bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
3937 unsigned OpcodeOpIdx =
3938 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
3939 MI.setDesc(TII.get(
MI.getOperand(OpcodeOpIdx).getImm()));
3940 MI.removeOperand(OpcodeOpIdx);
3941 MI.addImplicitDefUseOperands(*
MI.getMF());
3948bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3951 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3952 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3954 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3955 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3957 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3958 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3960 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3961 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3963 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3964 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3966 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3967 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3969 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3970 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3972 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3973 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3975 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3976 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3978 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3979 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3981 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3982 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3984 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3985 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3987 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3988 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3990 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3991 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3993 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3994 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3996 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3997 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3999 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
4000 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
4002 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
4003 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
4005 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
4006 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
4008 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
4009 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
4011 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
4012 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
4014 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
4015 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
4017 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
4018 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
4020 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
4021 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
4023 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
4024 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
4026 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
4027 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
4029 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
4030 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
4032 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
4033 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
4039 auto VDst_In =
MI.getOperand(4);
4041 MI.setDesc(TII.get(
Opc));
4042 MI.removeOperand(4);
4043 MI.removeOperand(1);
4044 MI.addOperand(VDst_In);
4045 MI.addImplicitDefUseOperands(*
MI.getMF());
4046 const MCInstrDesc &MCID =
MI.getDesc();
4048 MI.getOperand(0).setIsEarlyClobber(
true);
4053bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
4055 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
4056 !Subtarget->hasPermlane16Swap())
4058 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
4059 !Subtarget->hasPermlane32Swap())
4062 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
4063 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
4064 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
4066 MI.removeOperand(2);
4067 MI.setDesc(TII.get(Opcode));
4070 MachineOperand &FI =
MI.getOperand(4);
4077bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
4080 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4081 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
4082 MachineBasicBlock *
MBB =
MI.getParent();
4086 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
4087 .
addImm(Subtarget->getWavefrontSizeLog2())
4092 .
addImm(Subtarget->getWavefrontSizeLog2())
4096 const TargetRegisterClass &RC =
4097 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
4098 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
4101 MI.eraseFromParent();
4105bool AMDGPUInstructionSelector::selectWaveShuffleIntrin(
4108 MachineBasicBlock *
MBB =
MI.getParent();
4115 const LLT DstTy = MRI->getType(DstReg);
4117 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4118 const TargetRegisterClass *DstRC =
4119 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
4124 if (!Subtarget->supportsBPermute())
4128 if (Subtarget->supportsWaveWideBPermute()) {
4129 Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
4130 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), ShiftIdxReg)
4140 assert(Subtarget->isWave64());
4144 MRI->createVirtualRegister(TRI.getRegClass(AMDGPU::SReg_32RegClassID));
4145 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefValReg);
4147 Register UndefExecReg = MRI->createVirtualRegister(
4148 TRI.getRegClass(AMDGPU::SReg_64_XEXECRegClassID));
4149 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefExecReg);
4151 Register PoisonValReg = MRI->createVirtualRegister(DstRC);
4152 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_SET_INACTIVE_B32), PoisonValReg)
4160 Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
4161 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), ShiftIdxReg)
4165 Register PoisonIdxReg = MRI->createVirtualRegister(DstRC);
4166 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_SET_INACTIVE_B32), PoisonIdxReg)
4174 Register SameSidePermReg = MRI->createVirtualRegister(DstRC);
4175 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::DS_BPERMUTE_B32), SameSidePermReg)
4180 Register SwappedValReg = MRI->createVirtualRegister(DstRC);
4181 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_PERMLANE64_B32), SwappedValReg)
4184 Register OppSidePermReg = MRI->createVirtualRegister(DstRC);
4185 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::DS_BPERMUTE_B32), OppSidePermReg)
4190 Register WWMSwapPermReg = MRI->createVirtualRegister(DstRC);
4191 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::STRICT_WWM), WWMSwapPermReg)
4198 Register ThreadIDReg = MRI->createVirtualRegister(DstRC);
4199 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_MBCNT_LO_U32_B32_e64), ThreadIDReg)
4203 Register XORReg = MRI->createVirtualRegister(DstRC);
4208 Register ANDReg = MRI->createVirtualRegister(DstRC);
4213 Register CompareReg = MRI->createVirtualRegister(
4214 TRI.getRegClass(AMDGPU::SReg_64_XEXECRegClassID));
4215 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_CMP_EQ_U32_e64), CompareReg)
4220 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
4228 MI.eraseFromParent();
4237 unsigned NumOpcodes = 0;
4250 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4261 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4275 if (Src.size() == 3) {
4282 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4283 if (Src[
I] ==
LHS) {
4293 Bits = SrcBits[Src.size()];
4299 switch (
MI->getOpcode()) {
4300 case TargetOpcode::G_AND:
4301 case TargetOpcode::G_OR:
4302 case TargetOpcode::G_XOR: {
4307 if (!getOperandBits(
LHS, LHSBits) ||
4308 !getOperandBits(
RHS, RHSBits)) {
4309 Src = std::move(Backup);
4310 return std::make_pair(0, 0);
4331 uint8_t LHSBitsOrig = LHSBits;
4332 uint8_t RHSBitsOrig = RHSBits;
4336 NumOpcodes += LHSOp.first;
4337 LHSBits = LHSOp.second;
4344 NumOpcodes += RHSOp.first;
4345 RHSBits = RHSOp.second;
4349 auto dependsOnSlot = [](
uint8_t TT,
int Slot) ->
bool {
4350 if (Slot < 0 || Slot > 2)
4352 const uint8_t Masks[3] = {0x0f, 0x33, 0x55};
4353 const int Shifts[3] = {4, 2, 1};
4354 return ((TT ^ (TT >> Shifts[Slot])) & Masks[Slot]) != 0;
4360 const uint8_t SrcBitsConst[3] = {0xf0, 0xcc, 0xaa};
4367 for (
int I = 0;
I < (int)S.size();
I++) {
4368 if (Bits == SrcBitsConst[
I] && S[
I] ==
Op)
4370 if (IsNegationOp && Bits == (
uint8_t)~SrcBitsConst[
I] &&
4371 S[
I] == NegatedInner)
4382 for (
int I = 0;
I < (int)SrcAfterLHS.
size() &&
I < 3;
I++) {
4383 if (
I < (
int)Src.size() && Src[
I] != SrcAfterLHS[
I] &&
4384 dependsOnSlot(LHSBits,
I)) {
4393 if (!Stale && !RHSOp.first) {
4394 int Slot = findSlot(RHSBitsOrig,
RHS, SrcBeforeRecurse);
4396 (Slot >= (
int)Src.size() || Src[Slot] != SrcBeforeRecurse[Slot]))
4402 if (!Stale && !LHSOp.first) {
4403 int Slot = findSlot(LHSBitsOrig,
LHS, SrcBeforeRecurse);
4405 (Slot >= (
int)Src.size() || Src[Slot] != SrcBeforeRecurse[Slot]))
4410 Src = std::move(SrcBeforeRecurse);
4411 LHSBits = LHSBitsOrig;
4412 RHSBits = RHSBitsOrig;
4418 return std::make_pair(0, 0);
4422 switch (
MI->getOpcode()) {
4423 case TargetOpcode::G_AND:
4424 TTbl = LHSBits & RHSBits;
4426 case TargetOpcode::G_OR:
4427 TTbl = LHSBits | RHSBits;
4429 case TargetOpcode::G_XOR:
4430 TTbl = LHSBits ^ RHSBits;
4436 return std::make_pair(NumOpcodes + 1, TTbl);
4439bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
4440 if (!Subtarget->hasBitOp3Insts())
4444 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4445 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
4451 unsigned NumOpcodes;
4453 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
4457 if (NumOpcodes < 2 || Src.empty())
4460 const bool IsB32 = MRI->getType(DstReg) ==
LLT::scalar(32);
4461 if (NumOpcodes == 2 && IsB32) {
4469 }
else if (NumOpcodes < 4) {
4476 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
4477 if (!IsB32 && STI.hasTrue16BitInsts())
4478 Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
4479 : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
4480 unsigned CBL = STI.getConstantBusLimit(
Opc);
4481 MachineBasicBlock *
MBB =
MI.getParent();
4484 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4485 const RegisterBank *RB = RBI.getRegBank(Src[
I], *MRI, TRI);
4486 if (RB->
getID() != AMDGPU::SGPRRegBankID)
4492 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4503 while (Src.size() < 3)
4504 Src.push_back(Src[0]);
4521 MI.eraseFromParent();
4526bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
4528 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
4531 MachineInstr *
DefMI = MRI->getVRegDef(SrcReg);
4533 Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
4535 MachineBasicBlock *
MBB =
MI.getParent();
4539 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4542 .
addImm(Subtarget->getWavefrontSizeLog2())
4549 MI.eraseFromParent();
4555 if (!
I.isPreISelOpcode()) {
4557 return selectCOPY(
I);
4561 switch (
I.getOpcode()) {
4562 case TargetOpcode::G_AND:
4563 case TargetOpcode::G_OR:
4564 case TargetOpcode::G_XOR:
4565 if (selectBITOP3(
I))
4569 return selectG_AND_OR_XOR(
I);
4570 case TargetOpcode::G_ADD:
4571 case TargetOpcode::G_SUB:
4572 case TargetOpcode::G_PTR_ADD:
4575 return selectG_ADD_SUB(
I);
4576 case TargetOpcode::G_UADDO:
4577 case TargetOpcode::G_USUBO:
4578 case TargetOpcode::G_UADDE:
4579 case TargetOpcode::G_USUBE:
4580 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
4581 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4582 case AMDGPU::G_AMDGPU_MAD_I64_I32:
4583 return selectG_AMDGPU_MAD_64_32(
I);
4584 case TargetOpcode::G_INTTOPTR:
4585 case TargetOpcode::G_BITCAST:
4586 case TargetOpcode::G_PTRTOINT:
4587 case TargetOpcode::G_FREEZE:
4588 return selectCOPY(
I);
4589 case TargetOpcode::G_FNEG:
4592 return selectG_FNEG(
I);
4593 case TargetOpcode::G_FABS:
4596 return selectG_FABS(
I);
4597 case TargetOpcode::G_EXTRACT:
4598 return selectG_EXTRACT(
I);
4599 case TargetOpcode::G_MERGE_VALUES:
4600 case TargetOpcode::G_CONCAT_VECTORS:
4601 return selectG_MERGE_VALUES(
I);
4602 case TargetOpcode::G_UNMERGE_VALUES:
4603 return selectG_UNMERGE_VALUES(
I);
4604 case TargetOpcode::G_BUILD_VECTOR:
4605 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4606 return selectG_BUILD_VECTOR(
I);
4607 case TargetOpcode::G_IMPLICIT_DEF:
4608 return selectG_IMPLICIT_DEF(
I);
4609 case TargetOpcode::G_INSERT:
4610 return selectG_INSERT(
I);
4611 case TargetOpcode::G_INTRINSIC:
4612 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4613 return selectG_INTRINSIC(
I);
4614 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4615 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4616 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4617 case TargetOpcode::G_ICMP:
4618 case TargetOpcode::G_FCMP:
4619 if (selectG_ICMP_or_FCMP(
I))
4622 case TargetOpcode::G_LOAD:
4623 case TargetOpcode::G_ZEXTLOAD:
4624 case TargetOpcode::G_SEXTLOAD:
4625 case TargetOpcode::G_STORE:
4626 case TargetOpcode::G_ATOMIC_CMPXCHG:
4627 case TargetOpcode::G_ATOMICRMW_XCHG:
4628 case TargetOpcode::G_ATOMICRMW_ADD:
4629 case TargetOpcode::G_ATOMICRMW_SUB:
4630 case TargetOpcode::G_ATOMICRMW_AND:
4631 case TargetOpcode::G_ATOMICRMW_OR:
4632 case TargetOpcode::G_ATOMICRMW_XOR:
4633 case TargetOpcode::G_ATOMICRMW_MIN:
4634 case TargetOpcode::G_ATOMICRMW_MAX:
4635 case TargetOpcode::G_ATOMICRMW_UMIN:
4636 case TargetOpcode::G_ATOMICRMW_UMAX:
4637 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4638 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4639 case TargetOpcode::G_ATOMICRMW_USUB_COND:
4640 case TargetOpcode::G_ATOMICRMW_USUB_SAT:
4641 case TargetOpcode::G_ATOMICRMW_FADD:
4642 case TargetOpcode::G_ATOMICRMW_FMIN:
4643 case TargetOpcode::G_ATOMICRMW_FMAX:
4644 return selectG_LOAD_STORE_ATOMICRMW(
I);
4645 case TargetOpcode::G_SELECT:
4646 return selectG_SELECT(
I);
4647 case TargetOpcode::G_TRUNC:
4648 return selectG_TRUNC(
I);
4649 case TargetOpcode::G_SEXT:
4650 case TargetOpcode::G_ZEXT:
4651 case TargetOpcode::G_ANYEXT:
4652 case TargetOpcode::G_SEXT_INREG:
4656 if (MRI->getType(
I.getOperand(1).getReg()) !=
LLT::scalar(1) &&
4659 return selectG_SZA_EXT(
I);
4660 case TargetOpcode::G_FPEXT:
4661 if (selectG_FPEXT(
I))
4664 case TargetOpcode::G_BRCOND:
4665 return selectG_BRCOND(
I);
4666 case TargetOpcode::G_GLOBAL_VALUE:
4667 return selectG_GLOBAL_VALUE(
I);
4668 case TargetOpcode::G_PTRMASK:
4669 return selectG_PTRMASK(
I);
4670 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4671 return selectG_EXTRACT_VECTOR_ELT(
I);
4672 case TargetOpcode::G_INSERT_VECTOR_ELT:
4673 return selectG_INSERT_VECTOR_ELT(
I);
4674 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4675 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4676 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4677 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4678 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4681 assert(Intr &&
"not an image intrinsic with image pseudo");
4682 return selectImageIntrinsic(
I, Intr);
4684 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
4685 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4686 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
4687 return selectBVHIntersectRayIntrinsic(
I);
4688 case AMDGPU::G_SBFX:
4689 case AMDGPU::G_UBFX:
4690 return selectG_SBFX_UBFX(
I);
4691 case AMDGPU::G_SI_CALL:
4692 I.setDesc(TII.get(AMDGPU::SI_CALL));
4694 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4695 return selectWaveAddress(
I);
4696 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
4697 I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
4700 case AMDGPU::G_STACKRESTORE:
4701 return selectStackRestore(
I);
4703 return selectPHI(
I);
4704 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4705 return selectCOPY_SCC_VCC(
I);
4706 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4707 return selectCOPY_VCC_SCC(
I);
4708 case AMDGPU::G_AMDGPU_READANYLANE:
4709 return selectReadAnyLane(
I);
4710 case TargetOpcode::G_CONSTANT:
4711 case TargetOpcode::G_FCONSTANT:
4719AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4726std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4727 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4731 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4732 Src =
MI->getOperand(1).getReg();
4735 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4740 if (
LHS &&
LHS->isZero()) {
4742 Src =
MI->getOperand(2).getReg();
4746 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4747 Src =
MI->getOperand(1).getReg();
4754 return std::pair(Src, Mods);
4757std::pair<Register, unsigned>
4758AMDGPUInstructionSelector::selectVOP3PModsF32Impl(
Register Src)
const {
4760 std::tie(Src, Mods) = selectVOP3ModsImpl(Src);
4762 return std::pair(Src, Mods);
4765Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4767 bool ForceVGPR)
const {
4768 if ((Mods != 0 || ForceVGPR) &&
4769 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
4776 TII.
get(AMDGPU::COPY), VGPRSrc)
4788AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4790 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); }
4795AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4798 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4801 [=](MachineInstrBuilder &MIB) {
4802 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4804 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4805 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4806 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4811AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4814 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4819 [=](MachineInstrBuilder &MIB) {
4820 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4822 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4823 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4824 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4829AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4831 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); },
4832 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4833 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4838AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4841 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4844 [=](MachineInstrBuilder &MIB) {
4845 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4847 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4852AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4856 std::tie(Src, Mods) =
4857 selectVOP3ModsImpl(Root.
getReg(),
false);
4860 [=](MachineInstrBuilder &MIB) {
4861 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4863 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4868AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4871 std::tie(Src, Mods) =
4872 selectVOP3ModsImpl(Root.
getReg(),
true,
4876 [=](MachineInstrBuilder &MIB) {
4877 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4879 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4884AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4887 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4890 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4915 if (
MI->getOpcode() != AMDGPU::G_TRUNC)
4920 return DstSize * 2 == SrcSize;
4926 if (
MI->getOpcode() != AMDGPU::G_LSHR)
4930 std::optional<ValueAndVReg> ShiftAmt;
4931 if (
mi_match(
MI->getOperand(0).getReg(), MRI,
4934 unsigned Shift = ShiftAmt->Value.getZExtValue();
4935 return Shift * 2 == SrcSize;
4943 if (
MI->getOpcode() != AMDGPU::G_SHL)
4947 std::optional<ValueAndVReg> ShiftAmt;
4948 if (
mi_match(
MI->getOperand(0).getReg(), MRI,
4951 unsigned Shift = ShiftAmt->Value.getZExtValue();
4952 return Shift * 2 == SrcSize;
4960 if (
MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
4962 return MI->getNumOperands() == 3 &&
MI->getOperand(0).isDef() &&
4963 MI->getOperand(1).isDef() && !
MI->getOperand(2).isDef();
5133static std::optional<std::pair<Register, SrcStatus>>
5138 unsigned Opc =
MI->getOpcode();
5142 case AMDGPU::G_BITCAST:
5143 return std::optional<std::pair<Register, SrcStatus>>(
5144 {
MI->getOperand(1).getReg(), Curr.second});
5146 if (
MI->getOperand(1).getReg().isPhysical())
5147 return std::nullopt;
5148 return std::optional<std::pair<Register, SrcStatus>>(
5149 {
MI->getOperand(1).getReg(), Curr.second});
5150 case AMDGPU::G_FNEG: {
5153 return std::nullopt;
5154 return std::optional<std::pair<Register, SrcStatus>>(
5155 {
MI->getOperand(1).getReg(), Stat});
5162 switch (Curr.second) {
5165 return std::optional<std::pair<Register, SrcStatus>>(
5168 if (Curr.first ==
MI->getOperand(0).getReg())
5169 return std::optional<std::pair<Register, SrcStatus>>(
5171 return std::optional<std::pair<Register, SrcStatus>>(
5183 return std::optional<std::pair<Register, SrcStatus>>(
5187 if (Curr.first ==
MI->getOperand(0).getReg())
5188 return std::optional<std::pair<Register, SrcStatus>>(
5190 return std::optional<std::pair<Register, SrcStatus>>(
5196 return std::optional<std::pair<Register, SrcStatus>>(
5201 return std::optional<std::pair<Register, SrcStatus>>(
5206 return std::optional<std::pair<Register, SrcStatus>>(
5211 return std::optional<std::pair<Register, SrcStatus>>(
5217 return std::nullopt;
5227 bool HasNeg =
false;
5229 bool HasOpsel =
true;
5234 unsigned Opc =
MI->getOpcode();
5236 if (
Opc == TargetOpcode::G_INTRINSIC) {
5239 if (IntrinsicID == Intrinsic::amdgcn_fdot2)
5266 while (
Depth <= MaxDepth && Curr.has_value()) {
5269 Statlist.push_back(Curr.value());
5276static std::pair<Register, SrcStatus>
5283 while (
Depth <= MaxDepth && Curr.has_value()) {
5289 LastSameOrNeg = Curr.value();
5294 return LastSameOrNeg;
5301 return Width1 == Width2;
5336 return isSameBitWidth(NewReg, RootReg, MRI) && IsHalfState(LoStat) &&
5337 IsHalfState(HiStat);
5340std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
5346 return {RootReg, Mods};
5349 SearchOptions SO(RootReg, MRI);
5362 if (MRI.getType(RootReg).getSizeInBits() == 128) {
5364 return {Stat.first, Mods};
5367 MachineInstr *
MI = MRI.getVRegDef(Stat.first);
5369 if (
MI->getOpcode() != AMDGPU::G_BUILD_VECTOR ||
MI->getNumOperands() != 3 ||
5370 (IsDOT && Subtarget->hasDOTOpSelHazard())) {
5372 return {Stat.first, Mods};
5378 if (StatlistHi.
empty()) {
5380 return {Stat.first, Mods};
5386 if (StatlistLo.
empty()) {
5388 return {Stat.first, Mods};
5391 for (
int I = StatlistHi.
size() - 1;
I >= 0;
I--) {
5392 for (
int J = StatlistLo.
size() - 1; J >= 0; J--) {
5393 if (StatlistHi[
I].first == StatlistLo[J].first &&
5395 StatlistHi[
I].first, RootReg, TII, MRI))
5396 return {StatlistHi[
I].first,
5397 updateMods(StatlistHi[
I].second, StatlistLo[J].second, Mods)};
5403 return {Stat.first, Mods};
5413 return RB->
getID() == RBNo;
5430 if (
checkRB(RootReg, AMDGPU::SGPRRegBankID, RBI, MRI,
TRI) ||
5431 checkRB(NewReg, AMDGPU::VGPRRegBankID, RBI, MRI,
TRI))
5435 if (
MI->getOpcode() == AMDGPU::COPY && NewReg ==
MI->getOperand(1).getReg()) {
5444 BuildMI(*BB,
MI,
MI->getDebugLoc(),
TII.get(AMDGPU::COPY), DstReg)
5452AMDGPUInstructionSelector::selectVOP3PRetHelper(
MachineOperand &Root,
5457 std::tie(
Reg, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI, IsDOT);
5461 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
5462 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5467AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
5469 return selectVOP3PRetHelper(Root);
5473AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
5475 return selectVOP3PRetHelper(Root,
true);
5479AMDGPUInstructionSelector::selectVOP3PNoModsDOT(
MachineOperand &Root)
const {
5483 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true );
5487 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); }}};
5491AMDGPUInstructionSelector::selectVOP3PModsF32(
MachineOperand &Root)
const {
5494 std::tie(Src, Mods) = selectVOP3PModsF32Impl(Root.
getReg());
5497 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5498 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5503AMDGPUInstructionSelector::selectVOP3PNoModsF32(
MachineOperand &Root)
const {
5506 std::tie(Src, Mods) = selectVOP3PModsF32Impl(Root.
getReg());
5510 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); }}};
5514AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
5517 "expected i1 value");
5523 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5531 switch (Elts.
size()) {
5533 DstRegClass = &AMDGPU::VReg_256RegClass;
5536 DstRegClass = &AMDGPU::VReg_128RegClass;
5539 DstRegClass = &AMDGPU::VReg_64RegClass;
5546 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
5548 for (
unsigned i = 0; i < Elts.
size(); ++i) {
5559 if (ModOpcode == TargetOpcode::G_FNEG) {
5563 for (
auto El : Elts) {
5569 if (Elts.size() != NegAbsElts.
size()) {
5578 assert(ModOpcode == TargetOpcode::G_FABS);
5586AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
5592 assert(BV->getNumSources() > 0);
5594 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
5595 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
5598 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
5599 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
5606 if (BV->getNumSources() == EltsF32.
size()) {
5612 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5613 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5617AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
5623 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5631 if (CV->getNumSources() == EltsV2F16.
size()) {
5638 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5639 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5643AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
5649 assert(CV->getNumSources() > 0);
5650 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
5652 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
5656 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5657 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
5664 if (CV->getNumSources() == EltsV2F16.
size()) {
5671 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5672 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5676AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
5677 std::optional<FPValueAndVReg> FPValReg;
5679 if (TII.isInlineConstant(FPValReg->Value)) {
5680 return {{[=](MachineInstrBuilder &MIB) {
5681 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
5691 if (TII.isInlineConstant(ICst)) {
5701AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
5707 std::optional<ValueAndVReg> ShiftAmt;
5709 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5710 ShiftAmt->Value.getZExtValue() % 8 == 0) {
5711 Key = ShiftAmt->Value.getZExtValue() / 8;
5716 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5717 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5722AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
5729 std::optional<ValueAndVReg> ShiftAmt;
5731 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5732 ShiftAmt->Value.getZExtValue() == 16) {
5738 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5739 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5744AMDGPUInstructionSelector::selectSWMMACIndex32(
MachineOperand &Root)
const {
5751 S32 = matchAnyExtendFromS32(Src);
5755 if (
Def->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
5760 Src =
Def->getOperand(2).getReg();
5767 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5768 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5773AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
5776 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5780 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5781 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5787AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
5790 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5796 [=](MachineInstrBuilder &MIB) {
5798 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5800 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5805AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
5808 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5814 [=](MachineInstrBuilder &MIB) {
5816 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5818 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5825bool AMDGPUInstructionSelector::selectScaleOffset(
MachineOperand &Root,
5827 bool IsSigned)
const {
5828 if (!Subtarget->hasScaleOffset())
5832 MachineMemOperand *MMO = *
MI.memoperands_begin();
5844 OffsetReg =
Def->Reg;
5859 m_BinOp(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO : AMDGPU::S_MUL_U64,
5863 (
Mul->getOpcode() == (IsSigned ? AMDGPU::G_AMDGPU_MAD_I64_I32
5864 : AMDGPU::G_AMDGPU_MAD_U64_U32) ||
5865 (IsSigned &&
Mul->getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32 &&
5866 VT->signBitIsZero(
Mul->getOperand(2).getReg()))) &&
5879bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
5883 bool *ScaleOffset)
const {
5885 MachineBasicBlock *
MBB =
MI->getParent();
5890 getAddrModeInfo(*
MI, *MRI, AddrInfo);
5892 if (AddrInfo.
empty())
5895 const GEPInfo &GEPI = AddrInfo[0];
5896 std::optional<int64_t> EncodedImm;
5899 *ScaleOffset =
false;
5904 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
5905 AddrInfo.
size() > 1) {
5906 const GEPInfo &GEPI2 = AddrInfo[1];
5907 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
5908 Register OffsetReg = GEPI2.SgprParts[1];
5911 selectScaleOffset(Root, OffsetReg,
false );
5912 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5914 Base = GEPI2.SgprParts[0];
5915 *SOffset = OffsetReg;
5924 auto SKnown =
VT->getKnownBits(*SOffset);
5925 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
5937 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
5938 Base = GEPI.SgprParts[0];
5944 if (SOffset && GEPI.SgprParts.size() == 1 &&
isUInt<32>(GEPI.Imm) &&
5950 Base = GEPI.SgprParts[0];
5951 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5952 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
5957 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
5958 Register OffsetReg = GEPI.SgprParts[1];
5960 *ScaleOffset = selectScaleOffset(Root, OffsetReg,
false );
5961 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5963 Base = GEPI.SgprParts[0];
5964 *SOffset = OffsetReg;
5973AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
5976 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset,
5978 return std::nullopt;
5980 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5981 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset); }}};
5985AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
5987 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
5989 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
5990 return std::nullopt;
5992 const GEPInfo &GEPInfo = AddrInfo[0];
5993 Register PtrReg = GEPInfo.SgprParts[0];
5994 std::optional<int64_t> EncodedImm =
5997 return std::nullopt;
6000 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrReg); },
6001 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }
6006AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
6009 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr,
6011 return std::nullopt;
6014 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
6015 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
6016 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
6020AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
6024 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset, &ScaleOffset))
6025 return std::nullopt;
6028 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
6029 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
6031 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
6034std::pair<Register, int> AMDGPUInstructionSelector::selectFlatOffsetImpl(
6040 if (!STI.hasFlatInstOffsets())
6044 int64_t ConstOffset;
6046 std::tie(PtrBase, ConstOffset, IsInBounds) =
6047 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6053 if (ConstOffset == 0 ||
6055 !isFlatScratchBaseLegal(Root.
getReg())) ||
6059 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
6060 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
6063 return std::pair(PtrBase, ConstOffset);
6067AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
6071 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
6072 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
6077AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
6078 auto PtrWithOffset =
6082 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
6083 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
6088AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
6089 auto PtrWithOffset =
6093 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
6094 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
6100AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root,
6102 bool NeedIOffset)
const {
6105 int64_t ConstOffset;
6106 int64_t ImmOffset = 0;
6110 std::tie(PtrBase, ConstOffset, std::ignore) =
6111 getPtrBaseWithConstantOffset(Addr, *MRI);
6113 if (ConstOffset != 0) {
6118 ImmOffset = ConstOffset;
6121 if (isSGPR(PtrBaseDef->Reg)) {
6122 if (ConstOffset > 0) {
6128 int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
6130 std::tie(SplitImmOffset, RemainderOffset) =
6135 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
6138 MachineBasicBlock *
MBB =
MI->getParent();
6140 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6142 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
6144 .
addImm(RemainderOffset);
6148 [=](MachineInstrBuilder &MIB) {
6151 [=](MachineInstrBuilder &MIB) {
6154 [=](MachineInstrBuilder &MIB) { MIB.
addImm(SplitImmOffset); },
6155 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
6158 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrBase); },
6159 [=](MachineInstrBuilder &MIB) {
6162 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
6172 unsigned NumLiterals =
6173 !TII.isInlineConstant(APInt(32,
Lo_32(ConstOffset))) +
6174 !TII.isInlineConstant(APInt(32,
Hi_32(ConstOffset)));
6175 if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
6176 return std::nullopt;
6183 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
6188 if (isSGPR(SAddr)) {
6189 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
6193 bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
6194 Subtarget->hasSignedGVSOffset());
6195 if (
Register VOffset = matchExtendFromS32OrS32(
6196 PtrBaseOffset, Subtarget->hasSignedGVSOffset())) {
6198 return {{[=](MachineInstrBuilder &MIB) {
6201 [=](MachineInstrBuilder &MIB) {
6204 [=](MachineInstrBuilder &MIB) {
6207 [=](MachineInstrBuilder &MIB) {
6211 return {{[=](MachineInstrBuilder &MIB) {
6214 [=](MachineInstrBuilder &MIB) {
6217 [=](MachineInstrBuilder &MIB) {
6227 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
6228 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
6229 return std::nullopt;
6234 MachineBasicBlock *
MBB =
MI->getParent();
6235 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6237 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
6242 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
6243 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
6244 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6245 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
6248 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
6249 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
6250 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
6255AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
6256 return selectGlobalSAddr(Root, 0);
6260AMDGPUInstructionSelector::selectGlobalSAddrCPol(
MachineOperand &Root)
const {
6266 return selectGlobalSAddr(Root, PassedCPol);
6270AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(
MachineOperand &Root)
const {
6276 return selectGlobalSAddr(Root, PassedCPol);
6280AMDGPUInstructionSelector::selectGlobalSAddrGLC(
MachineOperand &Root)
const {
6285AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
6292 return selectGlobalSAddr(Root, PassedCPol,
false);
6296AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0(
6303 return selectGlobalSAddr(Root, PassedCPol,
false);
6307AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
6310 int64_t ConstOffset;
6311 int64_t ImmOffset = 0;
6315 std::tie(PtrBase, ConstOffset, std::ignore) =
6316 getPtrBaseWithConstantOffset(Addr, *MRI);
6318 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
6322 ImmOffset = ConstOffset;
6326 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
6327 int FI = AddrDef->MI->getOperand(1).
getIndex();
6330 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
6336 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
6337 Register LHS = AddrDef->MI->getOperand(1).getReg();
6338 Register RHS = AddrDef->MI->getOperand(2).getReg();
6342 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
6343 isSGPR(RHSDef->Reg)) {
6344 int FI = LHSDef->MI->getOperand(1).getIndex();
6348 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6350 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
6358 return std::nullopt;
6361 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SAddr); },
6362 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
6367bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
6369 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
6375 auto VKnown =
VT->getKnownBits(VAddr);
6378 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
6379 uint64_t
SMax = SKnown.getMaxValue().getZExtValue();
6380 return (VMax & 3) + (
SMax & 3) >= 4;
6384AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
6387 int64_t ConstOffset;
6388 int64_t ImmOffset = 0;
6392 std::tie(PtrBase, ConstOffset, std::ignore) =
6393 getPtrBaseWithConstantOffset(Addr, *MRI);
6396 if (ConstOffset != 0 &&
6400 ImmOffset = ConstOffset;
6404 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
6405 return std::nullopt;
6407 Register RHS = AddrDef->MI->getOperand(2).getReg();
6408 if (RBI.getRegBank(
RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
6409 return std::nullopt;
6411 Register LHS = AddrDef->MI->getOperand(1).getReg();
6414 if (OrigAddr != Addr) {
6415 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
6416 return std::nullopt;
6418 if (!isFlatScratchBaseLegalSV(OrigAddr))
6419 return std::nullopt;
6422 if (checkFlatScratchSVSSwizzleBug(
RHS,
LHS, ImmOffset))
6423 return std::nullopt;
6425 unsigned CPol = selectScaleOffset(Root,
RHS,
true )
6429 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
6430 int FI = LHSDef->MI->getOperand(1).getIndex();
6432 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
6434 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6435 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
6444 return std::nullopt;
6447 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
6448 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
LHS); },
6449 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6450 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
6455AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
6457 MachineBasicBlock *
MBB =
MI->getParent();
6459 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6464 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6469 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
6473 return {{[=](MachineInstrBuilder &MIB) {
6476 [=](MachineInstrBuilder &MIB) {
6479 [=](MachineInstrBuilder &MIB) {
6484 [=](MachineInstrBuilder &MIB) {
6493 std::optional<int> FI;
6496 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6498 int64_t ConstOffset;
6499 std::tie(PtrBase, ConstOffset, std::ignore) =
6500 getPtrBaseWithConstantOffset(VAddr, *MRI);
6501 if (ConstOffset != 0) {
6502 if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
6503 (!STI.privateMemoryResourceIsRangeChecked() ||
6504 VT->signBitIsZero(PtrBase))) {
6505 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
6506 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
6512 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
6516 return {{[=](MachineInstrBuilder &MIB) {
6519 [=](MachineInstrBuilder &MIB) {
6525 [=](MachineInstrBuilder &MIB) {
6530 [=](MachineInstrBuilder &MIB) {
6535bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
6540 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6545 return VT->signBitIsZero(
Base);
6548bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
6550 unsigned Size)
const {
6551 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
6556 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6561 return VT->signBitIsZero(
Base);
6566 return Addr->
getOpcode() == TargetOpcode::G_OR ||
6567 (Addr->
getOpcode() == TargetOpcode::G_PTR_ADD &&
6574bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
6582 if (STI.hasSignedScratchOffsets())
6588 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
6589 std::optional<ValueAndVReg> RhsValReg =
6595 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
6596 RhsValReg->Value.getSExtValue() > -0x40000000)
6600 return VT->signBitIsZero(
LHS);
6605bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
6613 if (STI.hasSignedScratchOffsets())
6618 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6623bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
6627 if (STI.hasSignedScratchOffsets())
6632 std::optional<DefinitionAndSourceRegister> BaseDef =
6634 std::optional<ValueAndVReg> RHSOffset =
6644 (RHSOffset->Value.getSExtValue() < 0 &&
6645 RHSOffset->Value.getSExtValue() > -0x40000000)))
6648 Register LHS = BaseDef->MI->getOperand(1).getReg();
6649 Register RHS = BaseDef->MI->getOperand(2).getReg();
6650 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6653bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
6654 unsigned ShAmtBits)
const {
6655 assert(
MI.getOpcode() == TargetOpcode::G_AND);
6657 std::optional<APInt>
RHS =
6662 if (
RHS->countr_one() >= ShAmtBits)
6665 const APInt &LHSKnownZeros =
VT->getKnownZeroes(
MI.getOperand(1).getReg());
6666 return (LHSKnownZeros | *
RHS).countr_one() >= ShAmtBits;
6670AMDGPUInstructionSelector::selectMUBUFScratchOffset(
6673 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6675 std::optional<DefinitionAndSourceRegister>
Def =
6677 assert(Def &&
"this shouldn't be an optional result");
6682 [=](MachineInstrBuilder &MIB) {
6685 [=](MachineInstrBuilder &MIB) {
6688 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
6699 if (!TII.isLegalMUBUFImmOffset(
Offset))
6707 [=](MachineInstrBuilder &MIB) {
6710 [=](MachineInstrBuilder &MIB) {
6718 !TII.isLegalMUBUFImmOffset(
Offset))
6722 [=](MachineInstrBuilder &MIB) {
6725 [=](MachineInstrBuilder &MIB) {
6732std::pair<Register, unsigned>
6733AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
6734 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6735 int64_t ConstAddr = 0;
6739 std::tie(PtrBase,
Offset, std::ignore) =
6740 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6743 if (isDSOffsetLegal(PtrBase,
Offset)) {
6745 return std::pair(PtrBase,
Offset);
6747 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6756 return std::pair(Root.
getReg(), 0);
6760AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
6763 std::tie(
Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
6765 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6771AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
6772 return selectDSReadWrite2(Root, 4);
6776AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
6777 return selectDSReadWrite2(Root, 8);
6781AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
6782 unsigned Size)
const {
6787 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6789 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset+1); }
6793std::pair<Register, unsigned>
6794AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
6795 unsigned Size)
const {
6796 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6797 int64_t ConstAddr = 0;
6801 std::tie(PtrBase,
Offset, std::ignore) =
6802 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6805 int64_t OffsetValue0 =
Offset;
6807 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
6809 return std::pair(PtrBase, OffsetValue0 /
Size);
6811 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6819 return std::pair(Root.
getReg(), 0);
6827std::tuple<Register, int64_t, bool>
6828AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
6831 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
6832 return {Root, 0,
false};
6835 std::optional<ValueAndVReg> MaybeOffset =
6838 return {Root, 0,
false};
6858 B.buildInstr(AMDGPU::S_MOV_B32)
6861 B.buildInstr(AMDGPU::S_MOV_B32)
6868 B.buildInstr(AMDGPU::REG_SEQUENCE)
6871 .addImm(AMDGPU::sub0)
6873 .addImm(AMDGPU::sub1);
6878 B.buildInstr(AMDGPU::S_MOV_B64)
6883 B.buildInstr(AMDGPU::REG_SEQUENCE)
6886 .addImm(AMDGPU::sub0_sub1)
6888 .addImm(AMDGPU::sub2_sub3);
6895 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6904 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6911AMDGPUInstructionSelector::MUBUFAddressData
6912AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
6913 MUBUFAddressData
Data;
6919 std::tie(PtrBase,
Offset, std::ignore) =
6920 getPtrBaseWithConstantOffset(Src, *MRI);
6926 if (MachineInstr *InputAdd
6928 Data.N2 = InputAdd->getOperand(1).getReg();
6929 Data.N3 = InputAdd->getOperand(2).getReg();
6944bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr)
const {
6950 const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
6951 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
6957void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
6959 if (TII.isLegalMUBUFImmOffset(ImmOffset))
6963 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6964 B.buildInstr(AMDGPU::S_MOV_B32)
6970bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
6975 if (!STI.hasAddr64() || STI.useFlatForGlobal())
6978 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6979 if (!shouldUseAddr64(AddrData))
6985 Offset = AddrData.Offset;
6991 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6993 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
7006 }
else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
7017 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
7021bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
7026 if (STI.useFlatForGlobal())
7029 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
7030 if (shouldUseAddr64(AddrData))
7036 Offset = AddrData.Offset;
7042 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
7047AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
7053 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
7059 [=](MachineInstrBuilder &MIB) {
7062 [=](MachineInstrBuilder &MIB) {
7065 [=](MachineInstrBuilder &MIB) {
7068 else if (STI.hasRestrictedSOffset())
7069 MIB.
addReg(AMDGPU::SGPR_NULL);
7073 [=](MachineInstrBuilder &MIB) {
7083AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
7088 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
7092 [=](MachineInstrBuilder &MIB) {
7095 [=](MachineInstrBuilder &MIB) {
7098 else if (STI.hasRestrictedSOffset())
7099 MIB.
addReg(AMDGPU::SGPR_NULL);
7111AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
7116 SOffset = AMDGPU::SGPR_NULL;
7118 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); }}};
7122static std::optional<uint64_t>
7126 if (!OffsetVal || !
isInt<32>(*OffsetVal))
7127 return std::nullopt;
7128 return Lo_32(*OffsetVal);
7132AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
7133 std::optional<uint64_t> OffsetVal =
7138 std::optional<int64_t> EncodedImm =
7143 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
7147AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
7154 std::optional<int64_t> EncodedImm =
7159 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
7163AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
7171 return std::nullopt;
7173 std::optional<int64_t> EncodedOffset =
7176 return std::nullopt;
7179 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
7180 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedOffset); }}};
7183std::pair<Register, unsigned>
7184AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
7185 bool &Matched)
const {
7190 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
7200 const auto CheckAbsNeg = [&]() {
7205 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
7236AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
7241 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
7246 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
7247 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
7252AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
7256 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
7259 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
7260 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
7264bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
7268 Register CCReg =
I.getOperand(0).getReg();
7273 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
7274 .
addImm(
I.getOperand(2).getImm());
7278 I.eraseFromParent();
7279 return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
7283bool AMDGPUInstructionSelector::selectSGetBarrierState(
7287 const MachineOperand &BarOp =
I.getOperand(2);
7288 std::optional<int64_t> BarValImm =
7292 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
7296 MachineInstrBuilder MIB;
7297 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
7298 : AMDGPU::S_GET_BARRIER_STATE_M0;
7301 auto DstReg =
I.getOperand(0).getReg();
7302 const TargetRegisterClass *DstRC =
7303 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
7304 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
7310 I.eraseFromParent();
7315 if (HasInlineConst) {
7319 case Intrinsic::amdgcn_s_barrier_join:
7320 return AMDGPU::S_BARRIER_JOIN_IMM;
7321 case Intrinsic::amdgcn_s_wakeup_barrier:
7322 return AMDGPU::S_WAKEUP_BARRIER_IMM;
7323 case Intrinsic::amdgcn_s_get_named_barrier_state:
7324 return AMDGPU::S_GET_BARRIER_STATE_IMM;
7330 case Intrinsic::amdgcn_s_barrier_join:
7331 return AMDGPU::S_BARRIER_JOIN_M0;
7332 case Intrinsic::amdgcn_s_wakeup_barrier:
7333 return AMDGPU::S_WAKEUP_BARRIER_M0;
7334 case Intrinsic::amdgcn_s_get_named_barrier_state:
7335 return AMDGPU::S_GET_BARRIER_STATE_M0;
7340bool AMDGPUInstructionSelector::selectNamedBarrierInit(
7344 const MachineOperand &BarOp =
I.getOperand(1);
7345 const MachineOperand &CntOp =
I.getOperand(2);
7349 if (IntrID == Intrinsic::amdgcn_s_barrier_signal_var) {
7350 std::optional<int64_t> CntImm =
7352 if (CntImm && *CntImm == 0) {
7353 std::optional<int64_t> BarValImm =
7356 auto BarID = ((*BarValImm) >> 4) & 0x3F;
7357 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM))
7359 I.eraseFromParent();
7366 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7372 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7379 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7385 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7386 constexpr unsigned ShAmt = 16;
7392 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7402 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
7403 ? AMDGPU::S_BARRIER_INIT_M0
7404 : AMDGPU::S_BARRIER_SIGNAL_M0;
7405 MachineInstrBuilder MIB;
7408 I.eraseFromParent();
7412bool AMDGPUInstructionSelector::selectNamedBarrierInst(
7416 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
7419 std::optional<int64_t> BarValImm =
7424 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7430 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7436 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
7441 MachineInstrBuilder MIB;
7445 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
7446 auto DstReg =
I.getOperand(0).getReg();
7447 const TargetRegisterClass *DstRC =
7448 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
7449 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
7455 auto BarId = ((*BarValImm) >> 4) & 0x3F;
7459 I.eraseFromParent();
7466 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7467 "Expected G_CONSTANT");
7468 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
7474 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7475 "Expected G_CONSTANT");
7476 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
7482 const MachineOperand &
Op =
MI.getOperand(1);
7483 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1);
7484 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
7487void AMDGPUInstructionSelector::renderCountTrailingOnesImm(
7489 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7490 "Expected G_CONSTANT");
7491 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().countTrailingOnes());
7499 const MachineOperand &
Op =
MI.getOperand(
OpIdx);
7516 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7520void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
7522 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7527void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
7529 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7535void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
7537 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7542void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
7544 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7550void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
7552 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7557void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
7559 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7564void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
7566 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7571void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
7573 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7582 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7591 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7598void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
7600 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7601 const uint32_t Cpol =
MI.getOperand(
OpIdx).getImm() &
7616 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
7618 assert(ExpVal != INT_MIN);
7636 if (
MI.getOperand(
OpIdx).getImm())
7638 MIB.
addImm((int64_t)Mods);
7645 if (
MI.getOperand(
OpIdx).getImm())
7647 MIB.
addImm((int64_t)Mods);
7653 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7661 MIB.
addImm((int64_t)Mods);
7667 uint32_t
V =
MI.getOperand(2).getImm();
7670 if (!Subtarget->hasSafeCUPrefetch())
7676void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
7678 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7687bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
7688 return TII.isInlineConstant(Imm);
7691bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
7692 return TII.isInlineConstant(Imm);
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static Register getLegalRegBank(Register NewReg, Register RootReg, const AMDGPURegisterBankInfo &RBI, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const SIInstrInfo &TII)
static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is shift left with half bits, such as reg0:2n =G_SHL reg1:2n, CONST(n)
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
static bool checkRB(Register Reg, unsigned int RBNo, const AMDGPURegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods)
static bool isTruncHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is truncating to half, such as reg0:n = G_TRUNC reg1:2n
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static TypeClass isVectorOfTwoOrScalar(Register Reg, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static bool isSameBitWidth(Register Reg1, Register Reg2, const MachineRegisterInfo &MRI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static bool isAsyncLDSDMA(Intrinsic::ID Intr)
static void diagnoseUnsupportedIntrinsic(const MachineInstr &I)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelValueTracking &ValueTracking)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::pair< Register, SrcStatus > getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static std::optional< std::pair< Register, SrcStatus > > calcNextStatus(std::pair< Register, SrcStatus > Curr, const MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg, Register RootReg, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static SmallVector< std::pair< Register, SrcStatus > > getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static bool isUnmergeHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test function, if the MI is reg0:n, reg1:n = G_UNMERGE_VALUES reg2:2n
static SrcStatus getNegStatus(Register Reg, SrcStatus S, const MachineRegisterInfo &MRI)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is logic shift right with half bits, such as reg0:2n =G_LSHR reg1:2n,...
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool isAllZeros(StringRef Arr)
Return true if the array is empty or all zeros.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Machine Check Debug Module
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
This is used to control valid status that current MI supports.
bool checkOptions(SrcStatus Stat) const
SearchOptions(Register Reg, const MachineRegisterInfo &MRI)
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelValueTracking *VT, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
DILocation * get() const
Get the underlying DILocation.
Diagnostic information for unsupported feature in backend.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
TypeSize getValue() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setReturnAddressIsTaken(bool s)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const RegisterBank * getRegBankOrNull(Register Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis providing profile information.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static bool isGenericOpcode(unsigned Opc)
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
constexpr int64_t getNullPointerValue(unsigned AS)
Get the null pointer value for the given address space.
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
LLVM_READONLY int32_t getGlobalSaddrOp(uint32_t Opcode)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
SpecificRegisterMatch m_SpecificReg(Register RequestedReg)
Matches a register only if it is equal to RequestedReg.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, TargetOpcode::G_MUL, true > m_GMul(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
NodeAddr< DefNode * > Def
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
FunctionAddr VTableAddr Value
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
constexpr RegState getUndefRegState(bool B)
@ Default
The result value is uniform if and only if all operands are uniform.
unsigned AtomicNoRetBaseOpcode
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false, bool SelfAdd=false)
Compute knownbits resulting from addition of LHS and RHS.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.