29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
64 MRI = &
MF.getRegInfo();
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
83 auto &RegClassOrBank = MRI.getRegClassOrRegBank(
Reg);
84 const TargetRegisterClass *RC =
87 const LLT Ty = MRI.getType(
Reg);
91 return MRI.getVRegDef(
Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
105 MachineOperand &Dst =
MI.getOperand(0);
106 MachineOperand &Src =
MI.getOperand(1);
112 const TargetRegisterClass *DstRC
113 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
114 const TargetRegisterClass *SrcRC
115 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
116 if (!DstRC || DstRC != SrcRC)
119 return RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) &&
120 RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI);
123bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
126 I.setDesc(TII.get(TargetOpcode::COPY));
128 const MachineOperand &Src =
I.getOperand(1);
129 MachineOperand &Dst =
I.getOperand(0);
133 if (isVCC(DstReg, *MRI)) {
134 if (SrcReg == AMDGPU::SCC) {
135 const TargetRegisterClass *RC
136 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
139 return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
142 if (!isVCC(SrcReg, *MRI)) {
144 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
147 const TargetRegisterClass *SrcRC
148 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
150 std::optional<ValueAndVReg> ConstVal =
154 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
156 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
158 Register MaskedReg = MRI->createVirtualRegister(SrcRC);
165 assert(Subtarget->useRealTrue16Insts());
166 const int64_t NoMods = 0;
167 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
180 bool IsSGPR = TRI.isSGPRClass(SrcRC);
181 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
188 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
194 if (!MRI->getRegClassOrNull(SrcReg))
195 MRI->setRegClass(SrcReg, SrcRC);
200 const TargetRegisterClass *RC =
201 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
202 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
208 for (
const MachineOperand &MO :
I.operands()) {
209 if (MO.getReg().isPhysical())
212 const TargetRegisterClass *RC =
213 TRI.getConstrainedRegClassForOperand(MO, *MRI);
216 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
221bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
224 Register VCCReg =
I.getOperand(1).getReg();
228 if (STI.hasScalarCompareEq64()) {
230 STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
233 Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
234 Cmp =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
242 Register DstReg =
I.getOperand(0).getReg();
246 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
249bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
253 Register DstReg =
I.getOperand(0).getReg();
254 Register SrcReg =
I.getOperand(1).getReg();
255 std::optional<ValueAndVReg> Arg =
259 const int64_t
Value = Arg->Value.getZExtValue();
261 unsigned Opcode = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
268 return RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI);
274 unsigned SelectOpcode =
275 STI.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
284bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
285 Register DstReg =
I.getOperand(0).getReg();
286 Register SrcReg =
I.getOperand(1).getReg();
291 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
298bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
299 const Register DefReg =
I.getOperand(0).getReg();
300 const LLT DefTy = MRI->getType(DefReg);
312 MRI->getRegClassOrRegBank(DefReg);
314 const TargetRegisterClass *DefRC =
323 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
332 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
333 const Register SrcReg =
I.getOperand(i).getReg();
335 const RegisterBank *RB = MRI->getRegBankOrNull(SrcReg);
337 const LLT SrcTy = MRI->getType(SrcReg);
338 const TargetRegisterClass *SrcRC =
339 TRI.getRegClassForTypeOnBank(SrcTy, *RB);
340 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
345 I.setDesc(TII.get(TargetOpcode::PHI));
346 return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
352 unsigned SubIdx)
const {
356 Register DstReg = MRI->createVirtualRegister(&SubRC);
359 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
361 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
387 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
389 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
391 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
397bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
398 Register DstReg =
I.getOperand(0).getReg();
399 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
401 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
402 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
403 DstRB->
getID() != AMDGPU::VCCRegBankID)
406 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
418bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
421 Register DstReg =
I.getOperand(0).getReg();
423 LLT Ty = MRI->getType(DstReg);
428 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
429 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
430 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
434 const unsigned Opc =
Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
437 .
add(
I.getOperand(1))
438 .
add(
I.getOperand(2))
444 if (STI.hasAddNoCarry()) {
445 const unsigned Opc =
Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
446 I.setDesc(TII.get(
Opc));
452 const unsigned Opc =
Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
454 Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
458 .
add(
I.getOperand(1))
459 .
add(
I.getOperand(2))
465 assert(!
Sub &&
"illegal sub should not reach here");
467 const TargetRegisterClass &RC
468 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
469 const TargetRegisterClass &HalfRC
470 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
472 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
473 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
474 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
475 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
477 Register DstLo = MRI->createVirtualRegister(&HalfRC);
478 Register DstHi = MRI->createVirtualRegister(&HalfRC);
481 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
484 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
489 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
490 Register CarryReg = MRI->createVirtualRegister(CarryRC);
491 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
496 MachineInstr *Addc =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
507 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
514 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
521bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
526 Register Dst0Reg =
I.getOperand(0).getReg();
527 Register Dst1Reg =
I.getOperand(1).getReg();
528 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
529 I.getOpcode() == AMDGPU::G_UADDE;
530 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
531 I.getOpcode() == AMDGPU::G_USUBE;
533 if (isVCC(Dst1Reg, *MRI)) {
534 unsigned NoCarryOpc =
535 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
536 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
537 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
543 Register Src0Reg =
I.getOperand(2).getReg();
544 Register Src1Reg =
I.getOperand(3).getReg();
547 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
548 .
addReg(
I.getOperand(4).getReg());
551 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
552 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
554 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
555 .
add(
I.getOperand(2))
556 .
add(
I.getOperand(3));
558 if (MRI->use_nodbg_empty(Dst1Reg)) {
559 CarryInst.setOperandDead(3);
561 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
563 if (!MRI->getRegClassOrNull(Dst1Reg))
564 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
567 if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
568 !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
569 !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
573 !RBI.constrainGenericRegister(
I.getOperand(4).getReg(),
574 AMDGPU::SReg_32RegClass, *MRI))
581bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
585 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
586 bool UseNoCarry = Subtarget->hasMadU64U32NoCarry() &&
587 MRI->use_nodbg_empty(
I.getOperand(1).getReg());
590 if (Subtarget->hasMADIntraFwdBug())
591 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
592 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
594 Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
595 : AMDGPU::V_MAD_NC_I64_I32_e64;
597 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
602 I.setDesc(TII.get(
Opc));
604 I.addImplicitDefUseOperands(*
MF);
609bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
611 Register DstReg =
I.getOperand(0).getReg();
612 Register SrcReg =
I.getOperand(1).getReg();
613 LLT DstTy = MRI->getType(DstReg);
614 LLT SrcTy = MRI->getType(SrcReg);
619 unsigned Offset =
I.getOperand(2).getImm();
620 if (
Offset % 32 != 0 || DstSize > 128)
628 const TargetRegisterClass *DstRC =
629 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
630 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
633 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
634 const TargetRegisterClass *SrcRC =
635 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
640 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
645 *SrcRC,
I.getOperand(1));
647 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
654bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
655 MachineBasicBlock *BB =
MI.getParent();
657 LLT DstTy = MRI->getType(DstReg);
658 LLT SrcTy = MRI->getType(
MI.getOperand(1).getReg());
665 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
667 const TargetRegisterClass *DstRC =
668 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
672 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
673 MachineInstrBuilder MIB =
674 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
675 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
676 MachineOperand &Src =
MI.getOperand(
I + 1);
680 const TargetRegisterClass *SrcRC
681 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
682 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
686 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
689 MI.eraseFromParent();
693bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
694 MachineBasicBlock *BB =
MI.getParent();
695 const int NumDst =
MI.getNumOperands() - 1;
697 MachineOperand &Src =
MI.getOperand(NumDst);
701 LLT DstTy = MRI->getType(DstReg0);
702 LLT SrcTy = MRI->getType(SrcReg);
707 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
709 const TargetRegisterClass *SrcRC =
710 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
711 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
717 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
718 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
719 MachineOperand &Dst =
MI.getOperand(
I);
720 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
721 .
addReg(SrcReg, 0, SubRegs[
I]);
724 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
725 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
728 const TargetRegisterClass *DstRC =
729 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
730 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
734 MI.eraseFromParent();
738bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
739 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
740 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
744 LLT SrcTy = MRI->getType(Src0);
748 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
749 return selectG_MERGE_VALUES(
MI);
756 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
760 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
761 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
764 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
765 DstBank->
getID() == AMDGPU::VGPRRegBankID);
766 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
769 MachineBasicBlock *BB =
MI.getParent();
779 const int64_t K0 = ConstSrc0->Value.getSExtValue();
780 const int64_t K1 = ConstSrc1->Value.getSExtValue();
781 uint32_t Lo16 =
static_cast<uint32_t
>(K0) & 0xffff;
782 uint32_t Hi16 =
static_cast<uint32_t
>(K1) & 0xffff;
783 uint32_t
Imm = Lo16 | (Hi16 << 16);
788 MI.eraseFromParent();
789 return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI);
794 MI.eraseFromParent();
795 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
806 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
807 MI.setDesc(TII.get(AMDGPU::COPY));
810 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
811 return RBI.constrainGenericRegister(Dst, RC, *MRI) &&
812 RBI.constrainGenericRegister(Src0, RC, *MRI);
817 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
818 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
824 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
831 MI.eraseFromParent();
856 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
857 if (Shift0 && Shift1) {
858 Opc = AMDGPU::S_PACK_HH_B32_B16;
859 MI.getOperand(1).setReg(ShiftSrc0);
860 MI.getOperand(2).setReg(ShiftSrc1);
862 Opc = AMDGPU::S_PACK_LH_B32_B16;
863 MI.getOperand(2).setReg(ShiftSrc1);
867 if (ConstSrc1 && ConstSrc1->Value == 0) {
869 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
874 MI.eraseFromParent();
877 if (STI.hasSPackHL()) {
878 Opc = AMDGPU::S_PACK_HL_B32_B16;
879 MI.getOperand(1).setReg(ShiftSrc0);
883 MI.setDesc(TII.get(
Opc));
887bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
888 const MachineOperand &MO =
I.getOperand(0);
892 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
893 if ((!RC && !MRI->getRegBankOrNull(MO.
getReg())) ||
894 (RC && RBI.constrainGenericRegister(MO.
getReg(), *RC, *MRI))) {
895 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
902bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
905 Register DstReg =
I.getOperand(0).getReg();
906 Register Src0Reg =
I.getOperand(1).getReg();
907 Register Src1Reg =
I.getOperand(2).getReg();
908 LLT Src1Ty = MRI->getType(Src1Reg);
910 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
913 int64_t
Offset =
I.getOperand(3).getImm();
916 if (
Offset % 32 != 0 || InsSize % 32 != 0)
923 unsigned SubReg = TRI.getSubRegFromChannel(
Offset / 32, InsSize / 32);
924 if (
SubReg == AMDGPU::NoSubRegister)
927 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
928 const TargetRegisterClass *DstRC =
929 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
933 const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
934 const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
935 const TargetRegisterClass *Src0RC =
936 TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
937 const TargetRegisterClass *Src1RC =
938 TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
942 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
943 if (!Src0RC || !Src1RC)
946 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
947 !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
948 !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
952 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
961bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
964 Register OffsetReg =
MI.getOperand(2).getReg();
965 Register WidthReg =
MI.getOperand(3).getReg();
967 assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID &&
968 "scalar BFX instructions are expanded in regbankselect");
969 assert(MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
970 "64-bit vector BFX instructions are expanded in regbankselect");
973 MachineBasicBlock *
MBB =
MI.getParent();
975 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
976 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
981 MI.eraseFromParent();
985bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
986 if (STI.getLDSBankCount() != 16)
992 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
993 !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
994 !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
1004 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1006 MachineBasicBlock *
MBB =
MI.getParent();
1010 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1013 .
addImm(
MI.getOperand(3).getImm());
1026 MI.eraseFromParent();
1035bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1037 if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
1040 MachineBasicBlock *
MBB =
MI.getParent();
1044 Register LaneSelect =
MI.getOperand(3).getReg();
1047 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1049 std::optional<ValueAndVReg> ConstSelect =
1055 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1058 std::optional<ValueAndVReg> ConstVal =
1064 STI.hasInv2PiInlineImm())) {
1065 MIB.
addImm(ConstVal->Value.getSExtValue());
1073 RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
1075 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1083 MI.eraseFromParent();
1089bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1093 LLT Ty = MRI->getType(Dst0);
1096 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1098 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1105 MachineBasicBlock *
MBB =
MI.getParent();
1109 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1111 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1124 MI.eraseFromParent();
1128bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1130 switch (IntrinsicID) {
1131 case Intrinsic::amdgcn_if_break: {
1136 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1137 .
add(
I.getOperand(0))
1138 .
add(
I.getOperand(2))
1139 .
add(
I.getOperand(3));
1141 Register DstReg =
I.getOperand(0).getReg();
1142 Register Src0Reg =
I.getOperand(2).getReg();
1143 Register Src1Reg =
I.getOperand(3).getReg();
1145 I.eraseFromParent();
1148 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1152 case Intrinsic::amdgcn_interp_p1_f16:
1153 return selectInterpP1F16(
I);
1154 case Intrinsic::amdgcn_wqm:
1155 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1156 case Intrinsic::amdgcn_softwqm:
1157 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1158 case Intrinsic::amdgcn_strict_wwm:
1159 case Intrinsic::amdgcn_wwm:
1160 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1161 case Intrinsic::amdgcn_strict_wqm:
1162 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1163 case Intrinsic::amdgcn_writelane:
1164 return selectWritelane(
I);
1165 case Intrinsic::amdgcn_div_scale:
1166 return selectDivScale(
I);
1167 case Intrinsic::amdgcn_icmp:
1168 case Intrinsic::amdgcn_fcmp:
1171 return selectIntrinsicCmp(
I);
1172 case Intrinsic::amdgcn_ballot:
1173 return selectBallot(
I);
1174 case Intrinsic::amdgcn_reloc_constant:
1175 return selectRelocConstant(
I);
1176 case Intrinsic::amdgcn_groupstaticsize:
1177 return selectGroupStaticSize(
I);
1178 case Intrinsic::returnaddress:
1179 return selectReturnAddress(
I);
1180 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1181 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1182 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1183 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1184 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1185 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1186 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1187 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1188 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1189 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1190 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1191 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1192 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1193 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1194 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1195 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1196 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1197 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1198 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1199 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1200 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1201 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1202 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1203 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1204 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1205 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1206 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1207 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1208 return selectSMFMACIntrin(
I);
1209 case Intrinsic::amdgcn_permlane16_swap:
1210 case Intrinsic::amdgcn_permlane32_swap:
1211 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1222 if (
Size == 16 && !ST.has16BitInsts())
1225 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1226 unsigned FakeS16Opc,
unsigned S32Opc,
1229 return ST.hasTrue16BitInsts()
1230 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1241 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1242 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1243 AMDGPU::V_CMP_NE_U64_e64);
1245 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1246 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1247 AMDGPU::V_CMP_EQ_U64_e64);
1249 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1250 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1251 AMDGPU::V_CMP_GT_I64_e64);
1253 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1254 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1255 AMDGPU::V_CMP_GE_I64_e64);
1257 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1258 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1259 AMDGPU::V_CMP_LT_I64_e64);
1261 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1262 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1263 AMDGPU::V_CMP_LE_I64_e64);
1265 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1266 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1267 AMDGPU::V_CMP_GT_U64_e64);
1269 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1270 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1271 AMDGPU::V_CMP_GE_U64_e64);
1273 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1274 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1275 AMDGPU::V_CMP_LT_U64_e64);
1277 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1278 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1279 AMDGPU::V_CMP_LE_U64_e64);
1282 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1283 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1284 AMDGPU::V_CMP_EQ_F64_e64);
1286 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1287 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1288 AMDGPU::V_CMP_GT_F64_e64);
1290 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1291 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1292 AMDGPU::V_CMP_GE_F64_e64);
1294 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1295 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1296 AMDGPU::V_CMP_LT_F64_e64);
1298 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1299 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1300 AMDGPU::V_CMP_LE_F64_e64);
1302 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1303 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1304 AMDGPU::V_CMP_NEQ_F64_e64);
1306 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1307 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1308 AMDGPU::V_CMP_O_F64_e64);
1310 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1311 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1312 AMDGPU::V_CMP_U_F64_e64);
1314 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1315 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1316 AMDGPU::V_CMP_NLG_F64_e64);
1318 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1319 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1320 AMDGPU::V_CMP_NLE_F64_e64);
1322 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1323 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1324 AMDGPU::V_CMP_NLT_F64_e64);
1326 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1327 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1328 AMDGPU::V_CMP_NGE_F64_e64);
1330 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1331 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1332 AMDGPU::V_CMP_NGT_F64_e64);
1334 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1335 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1336 AMDGPU::V_CMP_NEQ_F64_e64);
1338 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1339 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1340 AMDGPU::V_CMP_TRU_F64_e64);
1342 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1343 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1344 AMDGPU::V_CMP_F_F64_e64);
1349 unsigned Size)
const {
1351 if (!STI.hasScalarCompareEq64())
1356 return AMDGPU::S_CMP_LG_U64;
1358 return AMDGPU::S_CMP_EQ_U64;
1367 return AMDGPU::S_CMP_LG_U32;
1369 return AMDGPU::S_CMP_EQ_U32;
1371 return AMDGPU::S_CMP_GT_I32;
1373 return AMDGPU::S_CMP_GE_I32;
1375 return AMDGPU::S_CMP_LT_I32;
1377 return AMDGPU::S_CMP_LE_I32;
1379 return AMDGPU::S_CMP_GT_U32;
1381 return AMDGPU::S_CMP_GE_U32;
1383 return AMDGPU::S_CMP_LT_U32;
1385 return AMDGPU::S_CMP_LE_U32;
1387 return AMDGPU::S_CMP_EQ_F32;
1389 return AMDGPU::S_CMP_GT_F32;
1391 return AMDGPU::S_CMP_GE_F32;
1393 return AMDGPU::S_CMP_LT_F32;
1395 return AMDGPU::S_CMP_LE_F32;
1397 return AMDGPU::S_CMP_LG_F32;
1399 return AMDGPU::S_CMP_O_F32;
1401 return AMDGPU::S_CMP_U_F32;
1403 return AMDGPU::S_CMP_NLG_F32;
1405 return AMDGPU::S_CMP_NLE_F32;
1407 return AMDGPU::S_CMP_NLT_F32;
1409 return AMDGPU::S_CMP_NGE_F32;
1411 return AMDGPU::S_CMP_NGT_F32;
1413 return AMDGPU::S_CMP_NEQ_F32;
1420 if (!STI.hasSALUFloatInsts())
1425 return AMDGPU::S_CMP_EQ_F16;
1427 return AMDGPU::S_CMP_GT_F16;
1429 return AMDGPU::S_CMP_GE_F16;
1431 return AMDGPU::S_CMP_LT_F16;
1433 return AMDGPU::S_CMP_LE_F16;
1435 return AMDGPU::S_CMP_LG_F16;
1437 return AMDGPU::S_CMP_O_F16;
1439 return AMDGPU::S_CMP_U_F16;
1441 return AMDGPU::S_CMP_NLG_F16;
1443 return AMDGPU::S_CMP_NLE_F16;
1445 return AMDGPU::S_CMP_NLT_F16;
1447 return AMDGPU::S_CMP_NGE_F16;
1449 return AMDGPU::S_CMP_NGT_F16;
1451 return AMDGPU::S_CMP_NEQ_F16;
1460bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1465 Register SrcReg =
I.getOperand(2).getReg();
1466 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1470 Register CCReg =
I.getOperand(0).getReg();
1471 if (!isVCC(CCReg, *MRI)) {
1472 int Opcode = getS_CMPOpcode(Pred,
Size);
1475 MachineInstr *ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode))
1476 .
add(
I.getOperand(2))
1477 .
add(
I.getOperand(3));
1478 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1482 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
1483 I.eraseFromParent();
1487 if (
I.getOpcode() == AMDGPU::G_FCMP)
1494 MachineInstrBuilder ICmp;
1497 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1499 .
add(
I.getOperand(2))
1501 .
add(
I.getOperand(3))
1504 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1505 .
add(
I.getOperand(2))
1506 .
add(
I.getOperand(3));
1510 *TRI.getBoolRC(), *MRI);
1512 I.eraseFromParent();
1516bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1517 Register Dst =
I.getOperand(0).getReg();
1518 if (isVCC(Dst, *MRI))
1521 LLT DstTy = MRI->getType(Dst);
1527 Register SrcReg =
I.getOperand(2).getReg();
1528 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1536 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1537 I.eraseFromParent();
1538 return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1545 MachineInstrBuilder SelectedMI;
1546 MachineOperand &
LHS =
I.getOperand(2);
1547 MachineOperand &
RHS =
I.getOperand(3);
1548 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1549 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1551 copyToVGPRIfSrcFolded(Src0, Src0Mods,
LHS, &
I,
true);
1553 copyToVGPRIfSrcFolded(Src1, Src1Mods,
RHS, &
I,
true);
1554 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1556 SelectedMI.
addImm(Src0Mods);
1557 SelectedMI.
addReg(Src0Reg);
1559 SelectedMI.
addImm(Src1Mods);
1560 SelectedMI.
addReg(Src1Reg);
1566 RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1570 I.eraseFromParent();
1581 if (
MI->getParent() !=
MBB)
1585 if (
MI->getOpcode() == AMDGPU::COPY) {
1586 auto DstRB =
MRI.getRegBankOrNull(
MI->getOperand(0).getReg());
1587 auto SrcRB =
MRI.getRegBankOrNull(
MI->getOperand(1).getReg());
1588 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1589 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1606bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1609 Register DstReg =
I.getOperand(0).getReg();
1610 Register SrcReg =
I.getOperand(2).getReg();
1611 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();
1612 const unsigned WaveSize = STI.getWavefrontSize();
1616 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1619 std::optional<ValueAndVReg> Arg =
1624 if (BallotSize != WaveSize) {
1625 Dst = MRI->createVirtualRegister(TRI.getBoolRC());
1629 const int64_t
Value = Arg->Value.getZExtValue();
1632 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1639 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1645 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1649 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1660 if (BallotSize != WaveSize) {
1661 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1663 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1670 I.eraseFromParent();
1674bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1675 Register DstReg =
I.getOperand(0).getReg();
1676 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
1677 const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
1678 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
1681 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1683 Module *
M =
MF->getFunction().getParent();
1684 const MDNode *
Metadata =
I.getOperand(2).getMetadata();
1691 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1694 I.eraseFromParent();
1698bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1701 Register DstReg =
I.getOperand(0).getReg();
1702 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1703 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1704 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1712 const SIMachineFunctionInfo *MFI =
MF->getInfo<SIMachineFunctionInfo>();
1715 Module *
M =
MF->getFunction().getParent();
1716 const GlobalValue *GV =
1721 I.eraseFromParent();
1725bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1730 MachineOperand &Dst =
I.getOperand(0);
1732 unsigned Depth =
I.getOperand(2).getImm();
1734 const TargetRegisterClass *RC
1735 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1737 !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
1742 MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1745 I.eraseFromParent();
1749 MachineFrameInfo &MFI =
MF.getFrameInfo();
1754 Register ReturnAddrReg = TRI.getReturnAddressReg(
MF);
1756 AMDGPU::SReg_64RegClass,
DL);
1759 I.eraseFromParent();
1763bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1766 MachineBasicBlock *BB =
MI.getParent();
1767 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1768 .
add(
MI.getOperand(1));
1771 MI.eraseFromParent();
1773 if (!MRI->getRegClassOrNull(
Reg))
1774 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1778bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1780 MachineBasicBlock *
MBB =
MI.getParent();
1784 unsigned IndexOperand =
MI.getOperand(7).getImm();
1785 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1786 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1788 if (WaveDone && !WaveRelease) {
1792 Fn,
"ds_ordered_count: wave_done requires wave_release",
DL));
1795 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1796 IndexOperand &= ~0x3f;
1797 unsigned CountDw = 0;
1800 CountDw = (IndexOperand >> 24) & 0xf;
1801 IndexOperand &= ~(0xf << 24);
1803 if (CountDw < 1 || CountDw > 4) {
1806 Fn,
"ds_ordered_count: dword count must be between 1 and 4",
DL));
1814 Fn,
"ds_ordered_count: bad index operand",
DL));
1817 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1820 unsigned Offset0 = OrderedCountIndex << 2;
1821 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
1824 Offset1 |= (CountDw - 1) << 6;
1827 Offset1 |= ShaderType << 2;
1829 unsigned Offset = Offset0 | (Offset1 << 8);
1837 MachineInstrBuilder
DS =
1838 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1843 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
1847 MI.eraseFromParent();
1853 case Intrinsic::amdgcn_ds_gws_init:
1854 return AMDGPU::DS_GWS_INIT;
1855 case Intrinsic::amdgcn_ds_gws_barrier:
1856 return AMDGPU::DS_GWS_BARRIER;
1857 case Intrinsic::amdgcn_ds_gws_sema_v:
1858 return AMDGPU::DS_GWS_SEMA_V;
1859 case Intrinsic::amdgcn_ds_gws_sema_br:
1860 return AMDGPU::DS_GWS_SEMA_BR;
1861 case Intrinsic::amdgcn_ds_gws_sema_p:
1862 return AMDGPU::DS_GWS_SEMA_P;
1863 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1864 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1870bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1872 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1873 !STI.hasGWSSemaReleaseAll()))
1877 const bool HasVSrc =
MI.getNumOperands() == 3;
1878 assert(HasVSrc ||
MI.getNumOperands() == 2);
1880 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1881 const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
1882 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1888 MachineBasicBlock *
MBB =
MI.getParent();
1891 MachineInstr *Readfirstlane =
nullptr;
1896 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1897 Readfirstlane = OffsetDef;
1902 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1912 std::tie(BaseOffset, ImmOffset) =
1915 if (Readfirstlane) {
1918 if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
1924 if (!RBI.constrainGenericRegister(BaseOffset,
1925 AMDGPU::SReg_32RegClass, *MRI))
1929 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1948 if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI))
1955 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::data0);
1957 MI.eraseFromParent();
1961bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1962 bool IsAppend)
const {
1963 Register PtrBase =
MI.getOperand(2).getReg();
1964 LLT PtrTy = MRI->getType(PtrBase);
1968 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1971 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1972 PtrBase =
MI.getOperand(2).getReg();
1976 MachineBasicBlock *
MBB =
MI.getParent();
1978 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1982 if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
1989 MI.eraseFromParent();
1993bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
1994 MachineFunction *
MF =
MI.getParent()->getParent();
1995 SIMachineFunctionInfo *MFInfo =
MF->getInfo<SIMachineFunctionInfo>();
2006 TFE = TexFailCtrl & 0x1;
2008 LWE = TexFailCtrl & 0x2;
2011 return TexFailCtrl == 0;
2014bool AMDGPUInstructionSelector::selectImageIntrinsic(
2016 MachineBasicBlock *
MBB =
MI.getParent();
2022 Register ResultDef =
MI.getOperand(0).getReg();
2023 if (MRI->use_nodbg_empty(ResultDef))
2027 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2035 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2037 Register VDataIn = AMDGPU::NoRegister;
2038 Register VDataOut = AMDGPU::NoRegister;
2040 int NumVDataDwords = -1;
2041 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2042 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2048 Unorm =
MI.getOperand(ArgOffset + Intr->
UnormIndex).getImm() != 0;
2052 bool IsTexFail =
false;
2054 TFE, LWE, IsTexFail))
2057 const int Flags =
MI.getOperand(ArgOffset + Intr->
NumArgs).getImm();
2058 const bool IsA16 = (
Flags & 1) != 0;
2059 const bool IsG16 = (
Flags & 2) != 0;
2062 if (IsA16 && !STI.hasG16() && !IsG16)
2066 unsigned DMaskLanes = 0;
2068 if (BaseOpcode->
Atomic) {
2070 VDataOut =
MI.getOperand(0).getReg();
2071 VDataIn =
MI.getOperand(2).getReg();
2072 LLT Ty = MRI->getType(VDataIn);
2075 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2080 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2082 DMask = Is64Bit ? 0xf : 0x3;
2083 NumVDataDwords = Is64Bit ? 4 : 2;
2085 DMask = Is64Bit ? 0x3 : 0x1;
2086 NumVDataDwords = Is64Bit ? 2 : 1;
2089 DMask =
MI.getOperand(ArgOffset + Intr->
DMaskIndex).getImm();
2092 if (BaseOpcode->
Store) {
2093 VDataIn =
MI.getOperand(1).getReg();
2094 VDataTy = MRI->getType(VDataIn);
2099 VDataOut =
MI.getOperand(0).getReg();
2100 VDataTy = MRI->getType(VDataOut);
2101 NumVDataDwords = DMaskLanes;
2103 if (IsD16 && !STI.hasUnpackedD16VMem())
2104 NumVDataDwords = (DMaskLanes + 1) / 2;
2109 if (Subtarget->hasG16() && IsG16) {
2110 const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
2113 IntrOpcode = G16MappingInfo->
G16;
2117 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2127 int NumVAddrRegs = 0;
2128 int NumVAddrDwords = 0;
2131 MachineOperand &AddrOp =
MI.getOperand(ArgOffset +
I);
2132 if (!AddrOp.
isReg())
2140 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
2147 NumVAddrRegs != 1 &&
2148 (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
2149 : NumVAddrDwords == NumVAddrRegs);
2150 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2161 NumVDataDwords, NumVAddrDwords);
2162 }
else if (IsGFX11Plus) {
2164 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2165 : AMDGPU::MIMGEncGfx11Default,
2166 NumVDataDwords, NumVAddrDwords);
2167 }
else if (IsGFX10Plus) {
2169 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2170 : AMDGPU::MIMGEncGfx10Default,
2171 NumVDataDwords, NumVAddrDwords);
2173 if (Subtarget->hasGFX90AInsts()) {
2175 NumVDataDwords, NumVAddrDwords);
2179 <<
"requested image instruction is not supported on this GPU\n");
2186 NumVDataDwords, NumVAddrDwords);
2189 NumVDataDwords, NumVAddrDwords);
2199 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
2201 Register TmpReg = MRI->createVirtualRegister(
2202 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2203 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2206 if (!MRI->use_empty(VDataOut)) {
2219 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2220 MachineOperand &SrcOp =
MI.getOperand(ArgOffset + Intr->
VAddrStart +
I);
2221 if (SrcOp.
isReg()) {
2240 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2242 MIB.
addImm(IsA16 ? -1 : 0);
2244 if (!Subtarget->hasGFX90AInsts()) {
2256 MIB.
addImm(IsD16 ? -1 : 0);
2258 MI.eraseFromParent();
2260 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
2266bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2272 MachineBasicBlock *
MBB =
MI.getParent();
2277 unsigned Offset =
MI.getOperand(6).getImm();
2281 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2282 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2283 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2285 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2286 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2288 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2289 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2301 MI.eraseFromParent();
2305bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2308 switch (IntrinsicID) {
2309 case Intrinsic::amdgcn_end_cf:
2310 return selectEndCfIntrinsic(
I);
2311 case Intrinsic::amdgcn_ds_ordered_add:
2312 case Intrinsic::amdgcn_ds_ordered_swap:
2313 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2314 case Intrinsic::amdgcn_ds_gws_init:
2315 case Intrinsic::amdgcn_ds_gws_barrier:
2316 case Intrinsic::amdgcn_ds_gws_sema_v:
2317 case Intrinsic::amdgcn_ds_gws_sema_br:
2318 case Intrinsic::amdgcn_ds_gws_sema_p:
2319 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2320 return selectDSGWSIntrinsic(
I, IntrinsicID);
2321 case Intrinsic::amdgcn_ds_append:
2322 return selectDSAppendConsume(
I,
true);
2323 case Intrinsic::amdgcn_ds_consume:
2324 return selectDSAppendConsume(
I,
false);
2325 case Intrinsic::amdgcn_init_whole_wave:
2326 return selectInitWholeWave(
I);
2327 case Intrinsic::amdgcn_raw_buffer_load_lds:
2328 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2329 case Intrinsic::amdgcn_struct_buffer_load_lds:
2330 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2331 return selectBufferLoadLds(
I);
2336 case Intrinsic::amdgcn_load_to_lds:
2337 case Intrinsic::amdgcn_global_load_lds:
2338 return selectGlobalLoadLds(
I);
2339 case Intrinsic::amdgcn_exp_compr:
2340 if (!STI.hasCompressedExport()) {
2342 F.getContext().diagnose(
2343 DiagnosticInfoUnsupported(
F,
"intrinsic not supported on subtarget",
2348 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2349 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2350 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2351 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2352 return selectDSBvhStackIntrinsic(
I);
2353 case Intrinsic::amdgcn_s_barrier_init:
2354 case Intrinsic::amdgcn_s_barrier_signal_var:
2355 return selectNamedBarrierInit(
I, IntrinsicID);
2356 case Intrinsic::amdgcn_s_barrier_join:
2357 case Intrinsic::amdgcn_s_get_named_barrier_state:
2358 return selectNamedBarrierInst(
I, IntrinsicID);
2359 case Intrinsic::amdgcn_s_get_barrier_state:
2360 return selectSGetBarrierState(
I, IntrinsicID);
2361 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2362 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2367bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2374 Register DstReg =
I.getOperand(0).getReg();
2375 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
2377 const MachineOperand &CCOp =
I.getOperand(1);
2379 if (!isVCC(CCReg, *MRI)) {
2380 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2381 AMDGPU::S_CSELECT_B32;
2382 MachineInstr *CopySCC =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
2388 if (!MRI->getRegClassOrNull(CCReg))
2389 MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
2391 .
add(
I.getOperand(2))
2392 .
add(
I.getOperand(3));
2397 I.eraseFromParent();
2406 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2408 .
add(
I.getOperand(3))
2410 .
add(
I.getOperand(2))
2411 .
add(
I.getOperand(1));
2414 I.eraseFromParent();
2418bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2419 Register DstReg =
I.getOperand(0).getReg();
2420 Register SrcReg =
I.getOperand(1).getReg();
2421 const LLT DstTy = MRI->getType(DstReg);
2422 const LLT SrcTy = MRI->getType(SrcReg);
2425 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
2426 const RegisterBank *DstRB;
2432 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
2437 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2442 const TargetRegisterClass *SrcRC =
2443 TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
2444 const TargetRegisterClass *DstRC =
2445 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
2446 if (!SrcRC || !DstRC)
2449 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
2450 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
2455 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2456 assert(STI.useRealTrue16Insts());
2460 .
addReg(SrcReg, 0, AMDGPU::lo16);
2461 I.eraseFromParent();
2469 Register LoReg = MRI->createVirtualRegister(DstRC);
2470 Register HiReg = MRI->createVirtualRegister(DstRC);
2472 .
addReg(SrcReg, 0, AMDGPU::sub0);
2474 .
addReg(SrcReg, 0, AMDGPU::sub1);
2476 if (IsVALU && STI.hasSDWA()) {
2479 MachineInstr *MovSDWA =
2480 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2490 Register TmpReg0 = MRI->createVirtualRegister(DstRC);
2491 Register TmpReg1 = MRI->createVirtualRegister(DstRC);
2492 Register ImmReg = MRI->createVirtualRegister(DstRC);
2494 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2504 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2505 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2506 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2518 And.setOperandDead(3);
2519 Or.setOperandDead(3);
2523 I.eraseFromParent();
2531 unsigned SubRegIdx = DstSize < 32
2532 ?
static_cast<unsigned>(AMDGPU::sub0)
2533 : TRI.getSubRegFromChannel(0, DstSize / 32);
2534 if (SubRegIdx == AMDGPU::NoSubRegister)
2539 const TargetRegisterClass *SrcWithSubRC
2540 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2544 if (SrcWithSubRC != SrcRC) {
2545 if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
2549 I.getOperand(1).setSubReg(SubRegIdx);
2552 I.setDesc(TII.get(TargetOpcode::COPY));
2559 int SignedMask =
static_cast<int>(Mask);
2560 return SignedMask >= -16 && SignedMask <= 64;
2564const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2573 return &RBI.getRegBankFromRegClass(*RC, LLT());
2577bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2578 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2579 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2582 const Register DstReg =
I.getOperand(0).getReg();
2583 const Register SrcReg =
I.getOperand(1).getReg();
2585 const LLT DstTy = MRI->getType(DstReg);
2586 const LLT SrcTy = MRI->getType(SrcReg);
2587 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2594 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2597 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2599 return selectCOPY(
I);
2601 const TargetRegisterClass *SrcRC =
2602 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2603 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
2604 const TargetRegisterClass *DstRC =
2605 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2607 Register UndefReg = MRI->createVirtualRegister(SrcRC);
2608 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2614 I.eraseFromParent();
2616 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
2617 RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
2620 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2626 MachineInstr *ExtI =
2630 I.eraseFromParent();
2634 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2635 MachineInstr *ExtI =
2640 I.eraseFromParent();
2644 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2645 const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
2646 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2647 if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
2650 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2651 const unsigned SextOpc = SrcSize == 8 ?
2652 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2655 I.eraseFromParent();
2656 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2661 if (DstSize > 32 && SrcSize == 32) {
2662 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2663 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2678 I.eraseFromParent();
2679 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass,
2683 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2684 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2687 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2689 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2690 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2691 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2693 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2704 I.eraseFromParent();
2705 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
2720 I.eraseFromParent();
2721 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2755 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2762 assert(Mask.size() == 2);
2764 if (Mask[0] == 1 && Mask[1] <= 1) {
2772bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2773 if (!Subtarget->hasSALUFloatInsts())
2776 Register Dst =
I.getOperand(0).getReg();
2777 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2778 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2781 Register Src =
I.getOperand(1).getReg();
2787 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2789 I.eraseFromParent();
2790 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
2797bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2810 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2811 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2816 MachineInstr *Fabs =
getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
2820 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2821 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2824 MachineBasicBlock *BB =
MI.getParent();
2826 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2827 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2828 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2829 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2831 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2832 .
addReg(Src, 0, AMDGPU::sub0);
2833 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2834 .
addReg(Src, 0, AMDGPU::sub1);
2835 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2839 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2844 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2849 MI.eraseFromParent();
2854bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2856 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2857 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2862 MachineBasicBlock *BB =
MI.getParent();
2864 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2865 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2866 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2867 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2869 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
2870 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
2873 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2874 .
addReg(Src, 0, AMDGPU::sub0);
2875 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2876 .
addReg(Src, 0, AMDGPU::sub1);
2877 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2882 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2886 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2892 MI.eraseFromParent();
2897 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2900void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2903 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2904 const MachineInstr *PtrMI =
2905 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2909 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2914 for (
unsigned i = 1; i != 3; ++i) {
2915 const MachineOperand &GEPOp = PtrMI->
getOperand(i);
2916 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.
getReg());
2921 assert(GEPInfo.Imm == 0);
2925 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.
getReg(), MRI, TRI);
2926 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2927 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2929 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2933 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2936bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2937 return RBI.getRegBank(
Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
2940bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2941 if (!
MI.hasOneMemOperand())
2944 const MachineMemOperand *MMO = *
MI.memoperands_begin();
2957 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2958 return RBI.getRegBank(
MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
2959 AMDGPU::SGPRRegBankID;
2962 return I &&
I->getMetadata(
"amdgpu.uniform");
2966 for (
const GEPInfo &GEPInfo : AddrInfo) {
2967 if (!GEPInfo.VgprParts.empty())
2973void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2974 const LLT PtrTy = MRI->getType(
I.getOperand(1).getReg());
2977 STI.ldsRequiresM0Init()) {
2981 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2986bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2993 if (
Reg.isPhysical())
2997 const unsigned Opcode =
MI.getOpcode();
2999 if (Opcode == AMDGPU::COPY)
3002 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
3003 Opcode == AMDGPU::G_XOR)
3008 return GI->is(Intrinsic::amdgcn_class);
3010 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
3013bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
3015 MachineOperand &CondOp =
I.getOperand(0);
3021 const TargetRegisterClass *ConstrainRC;
3028 if (!isVCC(CondReg, *MRI)) {
3032 CondPhysReg = AMDGPU::SCC;
3033 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3034 ConstrainRC = &AMDGPU::SReg_32RegClass;
3041 const bool Is64 = STI.isWave64();
3042 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3043 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3045 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
3046 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3053 CondPhysReg = TRI.getVCC();
3054 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3055 ConstrainRC = TRI.getBoolRC();
3058 if (!MRI->getRegClassOrNull(CondReg))
3059 MRI->setRegClass(CondReg, ConstrainRC);
3061 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3064 .
addMBB(
I.getOperand(1).getMBB());
3066 I.eraseFromParent();
3070bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3072 Register DstReg =
I.getOperand(0).getReg();
3073 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3074 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3075 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3079 return RBI.constrainGenericRegister(
3080 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3083bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3084 Register DstReg =
I.getOperand(0).getReg();
3085 Register SrcReg =
I.getOperand(1).getReg();
3086 Register MaskReg =
I.getOperand(2).getReg();
3087 LLT Ty = MRI->getType(DstReg);
3088 LLT MaskTy = MRI->getType(MaskReg);
3092 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3093 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3094 const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
3095 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3101 APInt MaskOnes =
VT->getKnownOnes(MaskReg).zext(64);
3105 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3106 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3109 !CanCopyLow32 && !CanCopyHi32) {
3110 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3114 I.eraseFromParent();
3118 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3119 const TargetRegisterClass &RegRC
3120 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3122 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
3123 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
3124 const TargetRegisterClass *MaskRC =
3125 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3127 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3128 !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3129 !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
3134 "ptrmask should have been narrowed during legalize");
3136 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3142 I.eraseFromParent();
3146 Register HiReg = MRI->createVirtualRegister(&RegRC);
3147 Register LoReg = MRI->createVirtualRegister(&RegRC);
3150 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3151 .
addReg(SrcReg, 0, AMDGPU::sub0);
3152 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3153 .
addReg(SrcReg, 0, AMDGPU::sub1);
3162 Register MaskLo = MRI->createVirtualRegister(&RegRC);
3163 MaskedLo = MRI->createVirtualRegister(&RegRC);
3165 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3166 .
addReg(MaskReg, 0, AMDGPU::sub0);
3167 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3176 Register MaskHi = MRI->createVirtualRegister(&RegRC);
3177 MaskedHi = MRI->createVirtualRegister(&RegRC);
3179 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3180 .
addReg(MaskReg, 0, AMDGPU::sub1);
3181 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3186 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3191 I.eraseFromParent();
3197static std::pair<Register, unsigned>
3204 std::tie(IdxBaseReg,
Offset) =
3206 if (IdxBaseReg == AMDGPU::NoRegister) {
3210 IdxBaseReg = IdxReg;
3217 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3218 return std::pair(IdxReg, SubRegs[0]);
3219 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3222bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3228 LLT DstTy = MRI->getType(DstReg);
3229 LLT SrcTy = MRI->getType(SrcReg);
3231 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3232 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3233 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3237 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3240 const TargetRegisterClass *SrcRC =
3241 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3242 const TargetRegisterClass *DstRC =
3243 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3244 if (!SrcRC || !DstRC)
3246 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3247 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3248 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3251 MachineBasicBlock *BB =
MI.getParent();
3259 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3263 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3266 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3270 MI.eraseFromParent();
3277 if (!STI.useVGPRIndexMode()) {
3278 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3280 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3283 MI.eraseFromParent();
3287 const MCInstrDesc &GPRIDXDesc =
3288 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC),
true);
3294 MI.eraseFromParent();
3299bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3306 LLT VecTy = MRI->getType(DstReg);
3307 LLT ValTy = MRI->getType(ValReg);
3311 const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
3312 const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
3313 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3319 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3322 const TargetRegisterClass *VecRC =
3323 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3324 const TargetRegisterClass *ValRC =
3325 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3327 if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
3328 !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
3329 !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
3330 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3333 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3337 std::tie(IdxReg,
SubReg) =
3340 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3341 STI.useVGPRIndexMode();
3343 MachineBasicBlock *BB =
MI.getParent();
3347 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3350 const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
3351 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3356 MI.eraseFromParent();
3360 const MCInstrDesc &GPRIDXDesc =
3361 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC),
false);
3368 MI.eraseFromParent();
3372bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3373 if (!Subtarget->hasVMemToLDSLoad())
3376 unsigned Size =
MI.getOperand(3).getImm();
3379 const bool HasVIndex =
MI.getNumOperands() == 9;
3383 VIndex =
MI.getOperand(4).getReg();
3387 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3388 std::optional<ValueAndVReg> MaybeVOffset =
3390 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3396 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3397 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3398 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3399 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3402 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3403 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3404 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3405 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3408 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3409 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3410 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3411 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3414 if (!Subtarget->hasLDSLoadB96_B128())
3417 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3418 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3419 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3420 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3423 if (!Subtarget->hasLDSLoadB96_B128())
3426 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3427 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3428 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3429 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3433 MachineBasicBlock *
MBB =
MI.getParent();
3436 .
add(
MI.getOperand(2));
3440 if (HasVIndex && HasVOffset) {
3441 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
3442 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3449 }
else if (HasVIndex) {
3451 }
else if (HasVOffset) {
3455 MIB.
add(
MI.getOperand(1));
3456 MIB.
add(
MI.getOperand(5 + OpOffset));
3457 MIB.
add(
MI.getOperand(6 + OpOffset));
3459 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3467 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3472 MachinePointerInfo StorePtrI = LoadPtrI;
3483 MachineMemOperand *StoreMMO =
3489 MI.eraseFromParent();
3501 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3507 return Def->getOperand(1).getReg();
3521 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3529 return Def->getOperand(1).getReg();
3531 if (
VT->signBitIsZero(
Reg))
3532 return matchZeroExtendFromS32(
Reg);
3540AMDGPUInstructionSelector::matchZeroExtendFromS32OrS32(
Register Reg)
const {
3542 : matchZeroExtendFromS32(
Reg);
3548AMDGPUInstructionSelector::matchSignExtendFromS32OrS32(
Register Reg)
const {
3550 : matchSignExtendFromS32(
Reg);
3554AMDGPUInstructionSelector::matchExtendFromS32OrS32(
Register Reg,
3555 bool IsSigned)
const {
3557 return matchSignExtendFromS32OrS32(
Reg);
3559 return matchZeroExtendFromS32OrS32(
Reg);
3569 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3576 return Def->getOperand(1).getReg();
3581bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3582 if (!Subtarget->hasVMemToLDSLoad())
3586 unsigned Size =
MI.getOperand(3).getImm();
3592 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3595 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3598 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3601 if (!Subtarget->hasLDSLoadB96_B128())
3603 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3606 if (!Subtarget->hasLDSLoadB96_B128())
3608 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3612 MachineBasicBlock *
MBB =
MI.getParent();
3615 .
add(
MI.getOperand(2));
3621 if (!isSGPR(Addr)) {
3623 if (isSGPR(AddrDef->Reg)) {
3624 Addr = AddrDef->Reg;
3625 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3628 if (isSGPR(SAddr)) {
3629 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3630 if (
Register Off = matchZeroExtendFromS32(PtrBaseOffset)) {
3641 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3653 MIB.
add(
MI.getOperand(4));
3655 unsigned Aux =
MI.getOperand(5).getImm();
3658 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3660 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3661 MachinePointerInfo StorePtrI = LoadPtrI;
3670 MachineMemOperand *StoreMMO =
3672 sizeof(int32_t),
Align(4));
3676 MI.eraseFromParent();
3680bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
3682 unsigned OpcodeOpIdx =
3683 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
3684 MI.setDesc(TII.get(
MI.getOperand(OpcodeOpIdx).getImm()));
3685 MI.removeOperand(OpcodeOpIdx);
3686 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3692bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3695 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3696 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3698 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3699 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3701 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3702 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3704 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3705 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3707 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3708 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3710 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3711 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3713 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3714 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3716 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3717 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3719 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3720 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3722 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3723 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3725 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3726 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3728 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3729 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3731 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3732 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3734 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3735 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3737 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3738 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3740 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3741 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3743 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3744 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
3746 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
3747 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
3749 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
3750 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
3752 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
3753 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
3755 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
3756 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
3758 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
3759 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
3761 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
3762 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
3764 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
3765 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
3767 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
3768 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
3770 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
3771 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
3773 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
3774 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
3776 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
3777 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
3783 auto VDst_In =
MI.getOperand(4);
3785 MI.setDesc(TII.get(
Opc));
3786 MI.removeOperand(4);
3787 MI.removeOperand(1);
3788 MI.addOperand(VDst_In);
3789 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3793bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
3795 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
3796 !Subtarget->hasPermlane16Swap())
3798 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3799 !Subtarget->hasPermlane32Swap())
3802 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3803 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3804 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3806 MI.removeOperand(2);
3807 MI.setDesc(TII.get(Opcode));
3810 MachineOperand &FI =
MI.getOperand(4);
3816bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3819 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3820 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3821 MachineBasicBlock *
MBB =
MI.getParent();
3825 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3826 .
addImm(Subtarget->getWavefrontSizeLog2())
3831 .
addImm(Subtarget->getWavefrontSizeLog2())
3835 const TargetRegisterClass &RC =
3836 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3837 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
3840 MI.eraseFromParent();
3849 unsigned NumOpcodes = 0;
3862 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
3873 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3887 if (Src.size() == 3) {
3894 for (
unsigned I = 0;
I < Src.size(); ++
I) {
3895 if (Src[
I] ==
LHS) {
3905 Bits = SrcBits[Src.size()];
3911 switch (
MI->getOpcode()) {
3912 case TargetOpcode::G_AND:
3913 case TargetOpcode::G_OR:
3914 case TargetOpcode::G_XOR: {
3919 if (!getOperandBits(
LHS, LHSBits) ||
3920 !getOperandBits(
RHS, RHSBits)) {
3922 return std::make_pair(0, 0);
3928 NumOpcodes +=
Op.first;
3929 LHSBits =
Op.second;
3934 NumOpcodes +=
Op.first;
3935 RHSBits =
Op.second;
3940 return std::make_pair(0, 0);
3944 switch (
MI->getOpcode()) {
3945 case TargetOpcode::G_AND:
3946 TTbl = LHSBits & RHSBits;
3948 case TargetOpcode::G_OR:
3949 TTbl = LHSBits | RHSBits;
3951 case TargetOpcode::G_XOR:
3952 TTbl = LHSBits ^ RHSBits;
3958 return std::make_pair(NumOpcodes + 1, TTbl);
3961bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
3962 if (!Subtarget->hasBitOp3Insts())
3966 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3967 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3973 unsigned NumOpcodes;
3975 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
3979 if (NumOpcodes < 2 || Src.empty())
3982 const bool IsB32 = MRI->getType(DstReg) ==
LLT::scalar(32);
3983 if (NumOpcodes == 2 && IsB32) {
3991 }
else if (NumOpcodes < 4) {
3998 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
3999 if (!IsB32 && STI.hasTrue16BitInsts())
4000 Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
4001 : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
4002 unsigned CBL = STI.getConstantBusLimit(
Opc);
4003 MachineBasicBlock *
MBB =
MI.getParent();
4006 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4007 const RegisterBank *RB = RBI.getRegBank(Src[
I], *MRI, TRI);
4008 if (RB->
getID() != AMDGPU::SGPRRegBankID)
4014 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4025 while (Src.size() < 3)
4026 Src.push_back(Src[0]);
4043 MI.eraseFromParent();
4048bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
4050 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
4053 MachineInstr *
DefMI = MRI->getVRegDef(SrcReg);
4055 Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
4057 MachineBasicBlock *
MBB =
MI.getParent();
4061 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4064 .
addImm(Subtarget->getWavefrontSizeLog2())
4071 MI.eraseFromParent();
4077 if (!
I.isPreISelOpcode()) {
4079 return selectCOPY(
I);
4083 switch (
I.getOpcode()) {
4084 case TargetOpcode::G_AND:
4085 case TargetOpcode::G_OR:
4086 case TargetOpcode::G_XOR:
4087 if (selectBITOP3(
I))
4091 return selectG_AND_OR_XOR(
I);
4092 case TargetOpcode::G_ADD:
4093 case TargetOpcode::G_SUB:
4094 case TargetOpcode::G_PTR_ADD:
4097 return selectG_ADD_SUB(
I);
4098 case TargetOpcode::G_UADDO:
4099 case TargetOpcode::G_USUBO:
4100 case TargetOpcode::G_UADDE:
4101 case TargetOpcode::G_USUBE:
4102 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
4103 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4104 case AMDGPU::G_AMDGPU_MAD_I64_I32:
4105 return selectG_AMDGPU_MAD_64_32(
I);
4106 case TargetOpcode::G_INTTOPTR:
4107 case TargetOpcode::G_BITCAST:
4108 case TargetOpcode::G_PTRTOINT:
4109 case TargetOpcode::G_FREEZE:
4110 return selectCOPY(
I);
4111 case TargetOpcode::G_FNEG:
4114 return selectG_FNEG(
I);
4115 case TargetOpcode::G_FABS:
4118 return selectG_FABS(
I);
4119 case TargetOpcode::G_EXTRACT:
4120 return selectG_EXTRACT(
I);
4121 case TargetOpcode::G_MERGE_VALUES:
4122 case TargetOpcode::G_CONCAT_VECTORS:
4123 return selectG_MERGE_VALUES(
I);
4124 case TargetOpcode::G_UNMERGE_VALUES:
4125 return selectG_UNMERGE_VALUES(
I);
4126 case TargetOpcode::G_BUILD_VECTOR:
4127 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4128 return selectG_BUILD_VECTOR(
I);
4129 case TargetOpcode::G_IMPLICIT_DEF:
4130 return selectG_IMPLICIT_DEF(
I);
4131 case TargetOpcode::G_INSERT:
4132 return selectG_INSERT(
I);
4133 case TargetOpcode::G_INTRINSIC:
4134 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4135 return selectG_INTRINSIC(
I);
4136 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4137 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4138 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4139 case TargetOpcode::G_ICMP:
4140 case TargetOpcode::G_FCMP:
4141 if (selectG_ICMP_or_FCMP(
I))
4144 case TargetOpcode::G_LOAD:
4145 case TargetOpcode::G_ZEXTLOAD:
4146 case TargetOpcode::G_SEXTLOAD:
4147 case TargetOpcode::G_STORE:
4148 case TargetOpcode::G_ATOMIC_CMPXCHG:
4149 case TargetOpcode::G_ATOMICRMW_XCHG:
4150 case TargetOpcode::G_ATOMICRMW_ADD:
4151 case TargetOpcode::G_ATOMICRMW_SUB:
4152 case TargetOpcode::G_ATOMICRMW_AND:
4153 case TargetOpcode::G_ATOMICRMW_OR:
4154 case TargetOpcode::G_ATOMICRMW_XOR:
4155 case TargetOpcode::G_ATOMICRMW_MIN:
4156 case TargetOpcode::G_ATOMICRMW_MAX:
4157 case TargetOpcode::G_ATOMICRMW_UMIN:
4158 case TargetOpcode::G_ATOMICRMW_UMAX:
4159 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4160 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4161 case TargetOpcode::G_ATOMICRMW_FADD:
4162 case TargetOpcode::G_ATOMICRMW_FMIN:
4163 case TargetOpcode::G_ATOMICRMW_FMAX:
4164 return selectG_LOAD_STORE_ATOMICRMW(
I);
4165 case TargetOpcode::G_SELECT:
4166 return selectG_SELECT(
I);
4167 case TargetOpcode::G_TRUNC:
4168 return selectG_TRUNC(
I);
4169 case TargetOpcode::G_SEXT:
4170 case TargetOpcode::G_ZEXT:
4171 case TargetOpcode::G_ANYEXT:
4172 case TargetOpcode::G_SEXT_INREG:
4176 if (MRI->getType(
I.getOperand(1).getReg()) !=
LLT::scalar(1) &&
4179 return selectG_SZA_EXT(
I);
4180 case TargetOpcode::G_FPEXT:
4181 if (selectG_FPEXT(
I))
4184 case TargetOpcode::G_BRCOND:
4185 return selectG_BRCOND(
I);
4186 case TargetOpcode::G_GLOBAL_VALUE:
4187 return selectG_GLOBAL_VALUE(
I);
4188 case TargetOpcode::G_PTRMASK:
4189 return selectG_PTRMASK(
I);
4190 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4191 return selectG_EXTRACT_VECTOR_ELT(
I);
4192 case TargetOpcode::G_INSERT_VECTOR_ELT:
4193 return selectG_INSERT_VECTOR_ELT(
I);
4194 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4195 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4196 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4197 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4198 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4201 assert(Intr &&
"not an image intrinsic with image pseudo");
4202 return selectImageIntrinsic(
I, Intr);
4204 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
4205 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4206 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
4207 return selectBVHIntersectRayIntrinsic(
I);
4208 case AMDGPU::G_SBFX:
4209 case AMDGPU::G_UBFX:
4210 return selectG_SBFX_UBFX(
I);
4211 case AMDGPU::G_SI_CALL:
4212 I.setDesc(TII.get(AMDGPU::SI_CALL));
4214 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4215 return selectWaveAddress(
I);
4216 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
4217 I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
4220 case AMDGPU::G_STACKRESTORE:
4221 return selectStackRestore(
I);
4223 return selectPHI(
I);
4224 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4225 return selectCOPY_SCC_VCC(
I);
4226 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4227 return selectCOPY_VCC_SCC(
I);
4228 case AMDGPU::G_AMDGPU_READANYLANE:
4229 return selectReadAnyLane(
I);
4230 case TargetOpcode::G_CONSTANT:
4231 case TargetOpcode::G_FCONSTANT:
4239AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4246std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4247 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4251 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4252 Src =
MI->getOperand(1).getReg();
4255 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4260 if (
LHS &&
LHS->isZero()) {
4262 Src =
MI->getOperand(2).getReg();
4266 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4267 Src =
MI->getOperand(1).getReg();
4274 return std::pair(Src, Mods);
4277Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4279 bool ForceVGPR)
const {
4280 if ((Mods != 0 || ForceVGPR) &&
4281 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
4288 TII.
get(AMDGPU::COPY), VGPRSrc)
4300AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4302 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); }
4307AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4310 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4313 [=](MachineInstrBuilder &MIB) {
4314 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4316 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4317 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4318 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4323AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4326 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4331 [=](MachineInstrBuilder &MIB) {
4332 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4334 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4335 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4336 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4341AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4343 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); },
4344 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4345 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4350AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4353 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4356 [=](MachineInstrBuilder &MIB) {
4357 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4359 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4364AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4368 std::tie(Src, Mods) =
4369 selectVOP3ModsImpl(Root.
getReg(),
false);
4372 [=](MachineInstrBuilder &MIB) {
4373 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4375 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4380AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4383 std::tie(Src, Mods) =
4384 selectVOP3ModsImpl(Root.
getReg(),
true,
4388 [=](MachineInstrBuilder &MIB) {
4389 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4391 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4396AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4399 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4402 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4427 if (
MI->getOpcode() != AMDGPU::G_TRUNC)
4430 unsigned DstSize =
MRI.getType(
MI->getOperand(0).getReg()).getSizeInBits();
4431 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4432 return DstSize * 2 == SrcSize;
4438 if (
MI->getOpcode() != AMDGPU::G_LSHR)
4442 std::optional<ValueAndVReg> ShiftAmt;
4445 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4446 unsigned Shift = ShiftAmt->Value.getZExtValue();
4447 return Shift * 2 == SrcSize;
4455 if (
MI->getOpcode() != AMDGPU::G_SHL)
4459 std::optional<ValueAndVReg> ShiftAmt;
4462 unsigned SrcSize =
MRI.getType(
MI->getOperand(1).getReg()).getSizeInBits();
4463 unsigned Shift = ShiftAmt->Value.getZExtValue();
4464 return Shift * 2 == SrcSize;
4472 if (
MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
4474 return MI->getNumOperands() == 3 &&
MI->getOperand(0).isDef() &&
4475 MI->getOperand(1).isDef() && !
MI->getOperand(2).isDef();
4645static std::optional<std::pair<Register, SrcStatus>>
4650 unsigned Opc =
MI->getOpcode();
4654 case AMDGPU::G_BITCAST:
4655 return std::optional<std::pair<Register, SrcStatus>>(
4656 {
MI->getOperand(1).getReg(), Curr.second});
4658 if (
MI->getOperand(1).getReg().isPhysical())
4659 return std::nullopt;
4660 return std::optional<std::pair<Register, SrcStatus>>(
4661 {
MI->getOperand(1).getReg(), Curr.second});
4662 case AMDGPU::G_FNEG: {
4665 return std::nullopt;
4666 return std::optional<std::pair<Register, SrcStatus>>(
4667 {
MI->getOperand(1).getReg(), Stat});
4674 switch (Curr.second) {
4677 return std::optional<std::pair<Register, SrcStatus>>(
4680 if (Curr.first ==
MI->getOperand(0).getReg())
4681 return std::optional<std::pair<Register, SrcStatus>>(
4683 return std::optional<std::pair<Register, SrcStatus>>(
4695 return std::optional<std::pair<Register, SrcStatus>>(
4699 if (Curr.first ==
MI->getOperand(0).getReg())
4700 return std::optional<std::pair<Register, SrcStatus>>(
4702 return std::optional<std::pair<Register, SrcStatus>>(
4708 return std::optional<std::pair<Register, SrcStatus>>(
4713 return std::optional<std::pair<Register, SrcStatus>>(
4718 return std::optional<std::pair<Register, SrcStatus>>(
4723 return std::optional<std::pair<Register, SrcStatus>>(
4729 return std::nullopt;
4739 bool HasNeg =
false;
4741 bool HasOpsel =
true;
4746 unsigned Opc =
MI->getOpcode();
4748 if (
Opc < TargetOpcode::GENERIC_OP_END) {
4751 }
else if (
Opc == TargetOpcode::G_INTRINSIC) {
4754 if (IntrinsicID == Intrinsic::amdgcn_fdot2)
4778 while (
Depth <= MaxDepth && Curr.has_value()) {
4781 Statlist.push_back(Curr.value());
4788static std::pair<Register, SrcStatus>
4795 while (
Depth <= MaxDepth && Curr.has_value()) {
4801 LastSameOrNeg = Curr.value();
4806 return LastSameOrNeg;
4811 unsigned Width1 =
MRI.getType(Reg1).getSizeInBits();
4812 unsigned Width2 =
MRI.getType(Reg2).getSizeInBits();
4813 return Width1 == Width2;
4849 IsHalfState(HiStat);
4852std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
4858 return {RootReg, Mods};
4861 SearchOptions SO(RootReg, MRI);
4872 MachineInstr *
MI = MRI.getVRegDef(Stat.first);
4874 if (
MI->getOpcode() != AMDGPU::G_BUILD_VECTOR ||
MI->getNumOperands() != 3 ||
4875 (IsDOT && Subtarget->hasDOTOpSelHazard())) {
4877 return {Stat.first, Mods};
4883 if (StatlistHi.
empty()) {
4885 return {Stat.first, Mods};
4891 if (StatlistLo.
empty()) {
4893 return {Stat.first, Mods};
4896 for (
int I = StatlistHi.
size() - 1;
I >= 0;
I--) {
4897 for (
int J = StatlistLo.
size() - 1; J >= 0; J--) {
4898 if (StatlistHi[
I].first == StatlistLo[J].first &&
4900 StatlistHi[
I].first, RootReg, TII, MRI))
4901 return {StatlistHi[
I].first,
4902 updateMods(StatlistHi[
I].second, StatlistLo[J].second, Mods)};
4908 return {Stat.first, Mods};
4918 return RB->
getID() == RBNo;
4935 if (
checkRB(RootReg, AMDGPU::SGPRRegBankID, RBI,
MRI,
TRI) ||
4940 if (
MI->getOpcode() == AMDGPU::COPY && NewReg ==
MI->getOperand(1).getReg()) {
4946 Register DstReg =
MRI.cloneVirtualRegister(RootReg);
4949 BuildMI(*BB,
MI,
MI->getDebugLoc(),
TII.get(AMDGPU::COPY), DstReg)
4957AMDGPUInstructionSelector::selectVOP3PRetHelper(
MachineOperand &Root,
4962 std::tie(
Reg, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI, IsDOT);
4966 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4967 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4972AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
4974 return selectVOP3PRetHelper(Root);
4978AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
4980 return selectVOP3PRetHelper(Root,
true);
4984AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
4987 "expected i1 value");
4993 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5001 switch (Elts.
size()) {
5003 DstRegClass = &AMDGPU::VReg_256RegClass;
5006 DstRegClass = &AMDGPU::VReg_128RegClass;
5009 DstRegClass = &AMDGPU::VReg_64RegClass;
5016 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
5017 .addDef(
MRI.createVirtualRegister(DstRegClass));
5018 for (
unsigned i = 0; i < Elts.
size(); ++i) {
5029 if (ModOpcode == TargetOpcode::G_FNEG) {
5033 for (
auto El : Elts) {
5039 if (Elts.size() != NegAbsElts.
size()) {
5048 assert(ModOpcode == TargetOpcode::G_FABS);
5056AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
5062 assert(BV->getNumSources() > 0);
5064 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
5065 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
5068 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
5069 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
5076 if (BV->getNumSources() == EltsF32.
size()) {
5082 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5083 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5087AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
5093 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5101 if (CV->getNumSources() == EltsV2F16.
size()) {
5108 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5109 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5113AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
5119 assert(CV->getNumSources() > 0);
5120 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
5122 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
5126 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5127 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
5134 if (CV->getNumSources() == EltsV2F16.
size()) {
5141 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5142 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5146AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
5147 std::optional<FPValueAndVReg> FPValReg;
5149 if (TII.isInlineConstant(FPValReg->Value)) {
5150 return {{[=](MachineInstrBuilder &MIB) {
5151 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
5161 if (TII.isInlineConstant(ICst)) {
5171AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
5177 std::optional<ValueAndVReg> ShiftAmt;
5179 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5180 ShiftAmt->Value.getZExtValue() % 8 == 0) {
5181 Key = ShiftAmt->Value.getZExtValue() / 8;
5186 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5187 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5192AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
5199 std::optional<ValueAndVReg> ShiftAmt;
5201 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5202 ShiftAmt->Value.getZExtValue() == 16) {
5208 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5209 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5214AMDGPUInstructionSelector::selectSWMMACIndex32(
MachineOperand &Root)
const {
5221 S32 = matchAnyExtendFromS32(Src);
5225 if (
Def->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
5230 Src =
Def->getOperand(2).getReg();
5237 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5238 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5243AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
5246 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5250 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5251 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5257AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
5260 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5266 [=](MachineInstrBuilder &MIB) {
5268 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5270 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5275AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
5278 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5284 [=](MachineInstrBuilder &MIB) {
5286 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5288 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5295bool AMDGPUInstructionSelector::selectScaleOffset(
MachineOperand &Root,
5297 bool IsSigned)
const {
5298 if (!Subtarget->hasScaleOffset())
5302 MachineMemOperand *MMO = *
MI.memoperands_begin();
5314 OffsetReg =
Def->Reg;
5329 m_BinOp(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO : AMDGPU::S_MUL_U64,
5333 (
Mul->getOpcode() == (IsSigned ? AMDGPU::G_AMDGPU_MAD_I64_I32
5334 : AMDGPU::G_AMDGPU_MAD_U64_U32) ||
5335 (IsSigned &&
Mul->getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32 &&
5336 VT->signBitIsZero(
Mul->getOperand(2).getReg()))) &&
5349bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
5353 bool *ScaleOffset)
const {
5355 MachineBasicBlock *
MBB =
MI->getParent();
5360 getAddrModeInfo(*
MI, *MRI, AddrInfo);
5362 if (AddrInfo.
empty())
5365 const GEPInfo &GEPI = AddrInfo[0];
5366 std::optional<int64_t> EncodedImm;
5369 *ScaleOffset =
false;
5374 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
5375 AddrInfo.
size() > 1) {
5376 const GEPInfo &GEPI2 = AddrInfo[1];
5377 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
5378 Register OffsetReg = GEPI2.SgprParts[1];
5381 selectScaleOffset(Root, OffsetReg,
false );
5382 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5384 Base = GEPI2.SgprParts[0];
5385 *SOffset = OffsetReg;
5394 auto SKnown =
VT->getKnownBits(*SOffset);
5395 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
5407 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
5408 Base = GEPI.SgprParts[0];
5414 if (SOffset && GEPI.SgprParts.size() == 1 &&
isUInt<32>(GEPI.Imm) &&
5420 Base = GEPI.SgprParts[0];
5421 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5422 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
5427 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
5428 Register OffsetReg = GEPI.SgprParts[1];
5430 *ScaleOffset = selectScaleOffset(Root, OffsetReg,
false );
5431 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5433 Base = GEPI.SgprParts[0];
5434 *SOffset = OffsetReg;
5443AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
5446 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset,
5448 return std::nullopt;
5450 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5451 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset); }}};
5455AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
5457 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
5459 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
5460 return std::nullopt;
5462 const GEPInfo &GEPInfo = AddrInfo[0];
5463 Register PtrReg = GEPInfo.SgprParts[0];
5464 std::optional<int64_t> EncodedImm =
5467 return std::nullopt;
5470 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrReg); },
5471 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }
5476AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
5479 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr,
5481 return std::nullopt;
5484 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5485 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5486 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5490AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
5494 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset, &ScaleOffset))
5495 return std::nullopt;
5498 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5499 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5501 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5504std::pair<Register, int>
5505AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
5506 uint64_t FlatVariant)
const {
5511 if (!STI.hasFlatInstOffsets())
5515 int64_t ConstOffset;
5517 std::tie(PtrBase, ConstOffset, IsInBounds) =
5518 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5524 if (ConstOffset == 0 ||
5526 !isFlatScratchBaseLegal(Root.
getReg())) ||
5530 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
5531 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
5534 return std::pair(PtrBase, ConstOffset);
5538AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
5542 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5543 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5548AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
5552 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5553 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5558AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
5562 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5563 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5569AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root,
5571 bool NeedIOffset)
const {
5574 int64_t ConstOffset;
5575 int64_t ImmOffset = 0;
5579 std::tie(PtrBase, ConstOffset, std::ignore) =
5580 getPtrBaseWithConstantOffset(Addr, *MRI);
5582 if (ConstOffset != 0) {
5587 ImmOffset = ConstOffset;
5590 if (isSGPR(PtrBaseDef->Reg)) {
5591 if (ConstOffset > 0) {
5597 int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
5599 std::tie(SplitImmOffset, RemainderOffset) =
5604 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
5607 MachineBasicBlock *
MBB =
MI->getParent();
5609 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5611 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5613 .
addImm(RemainderOffset);
5617 [=](MachineInstrBuilder &MIB) {
5620 [=](MachineInstrBuilder &MIB) {
5623 [=](MachineInstrBuilder &MIB) { MIB.
addImm(SplitImmOffset); },
5624 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5627 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrBase); },
5628 [=](MachineInstrBuilder &MIB) {
5631 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
5641 unsigned NumLiterals =
5642 !TII.isInlineConstant(APInt(32,
Lo_32(ConstOffset))) +
5643 !TII.isInlineConstant(APInt(32,
Hi_32(ConstOffset)));
5644 if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
5645 return std::nullopt;
5652 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5657 if (isSGPR(SAddr)) {
5658 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
5662 bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
5663 Subtarget->hasSignedGVSOffset());
5664 if (
Register VOffset = matchExtendFromS32OrS32(
5665 PtrBaseOffset, Subtarget->hasSignedGVSOffset())) {
5667 return {{[=](MachineInstrBuilder &MIB) {
5670 [=](MachineInstrBuilder &MIB) {
5673 [=](MachineInstrBuilder &MIB) {
5676 [=](MachineInstrBuilder &MIB) {
5680 return {{[=](MachineInstrBuilder &MIB) {
5683 [=](MachineInstrBuilder &MIB) {
5686 [=](MachineInstrBuilder &MIB) {
5696 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
5697 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
5698 return std::nullopt;
5703 MachineBasicBlock *
MBB =
MI->getParent();
5704 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5706 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
5711 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
5712 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
5713 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5714 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
5717 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
5718 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
5719 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
5724AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
5725 return selectGlobalSAddr(Root, 0);
5729AMDGPUInstructionSelector::selectGlobalSAddrCPol(
MachineOperand &Root)
const {
5735 return selectGlobalSAddr(Root, PassedCPol);
5739AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(
MachineOperand &Root)
const {
5745 return selectGlobalSAddr(Root, PassedCPol);
5749AMDGPUInstructionSelector::selectGlobalSAddrGLC(
MachineOperand &Root)
const {
5754AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
5761 return selectGlobalSAddr(Root, PassedCPol,
false);
5765AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0(
5772 return selectGlobalSAddr(Root, PassedCPol,
false);
5776AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
5779 int64_t ConstOffset;
5780 int64_t ImmOffset = 0;
5784 std::tie(PtrBase, ConstOffset, std::ignore) =
5785 getPtrBaseWithConstantOffset(Addr, *MRI);
5787 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
5791 ImmOffset = ConstOffset;
5795 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5796 int FI = AddrDef->MI->getOperand(1).
getIndex();
5799 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
5805 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
5806 Register LHS = AddrDef->MI->getOperand(1).getReg();
5807 Register RHS = AddrDef->MI->getOperand(2).getReg();
5811 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
5812 isSGPR(RHSDef->Reg)) {
5813 int FI = LHSDef->MI->getOperand(1).getIndex();
5817 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5819 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
5827 return std::nullopt;
5830 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SAddr); },
5831 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
5836bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
5838 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
5844 auto VKnown =
VT->getKnownBits(VAddr);
5847 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
5848 uint64_t
SMax = SKnown.getMaxValue().getZExtValue();
5849 return (VMax & 3) + (
SMax & 3) >= 4;
5853AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
5856 int64_t ConstOffset;
5857 int64_t ImmOffset = 0;
5861 std::tie(PtrBase, ConstOffset, std::ignore) =
5862 getPtrBaseWithConstantOffset(Addr, *MRI);
5865 if (ConstOffset != 0 &&
5869 ImmOffset = ConstOffset;
5873 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
5874 return std::nullopt;
5876 Register RHS = AddrDef->MI->getOperand(2).getReg();
5877 if (RBI.getRegBank(
RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
5878 return std::nullopt;
5880 Register LHS = AddrDef->MI->getOperand(1).getReg();
5883 if (OrigAddr != Addr) {
5884 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
5885 return std::nullopt;
5887 if (!isFlatScratchBaseLegalSV(OrigAddr))
5888 return std::nullopt;
5891 if (checkFlatScratchSVSSwizzleBug(
RHS,
LHS, ImmOffset))
5892 return std::nullopt;
5894 unsigned CPol = selectScaleOffset(Root,
RHS,
true )
5898 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
5899 int FI = LHSDef->MI->getOperand(1).getIndex();
5901 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
5903 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5904 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
5913 return std::nullopt;
5916 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
5917 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
LHS); },
5918 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
5919 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
5924AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
5926 MachineBasicBlock *
MBB =
MI->getParent();
5928 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
5933 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5938 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
5942 return {{[=](MachineInstrBuilder &MIB) {
5945 [=](MachineInstrBuilder &MIB) {
5948 [=](MachineInstrBuilder &MIB) {
5953 [=](MachineInstrBuilder &MIB) {
5962 std::optional<int> FI;
5965 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
5967 int64_t ConstOffset;
5968 std::tie(PtrBase, ConstOffset, std::ignore) =
5969 getPtrBaseWithConstantOffset(VAddr, *MRI);
5970 if (ConstOffset != 0) {
5971 if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
5972 (!STI.privateMemoryResourceIsRangeChecked() ||
5973 VT->signBitIsZero(PtrBase))) {
5974 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
5975 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
5981 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
5985 return {{[=](MachineInstrBuilder &MIB) {
5988 [=](MachineInstrBuilder &MIB) {
5994 [=](MachineInstrBuilder &MIB) {
5999 [=](MachineInstrBuilder &MIB) {
6004bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
6009 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6014 return VT->signBitIsZero(
Base);
6017bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
6019 unsigned Size)
const {
6020 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
6025 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6030 return VT->signBitIsZero(
Base);
6035 return Addr->
getOpcode() == TargetOpcode::G_OR ||
6036 (Addr->
getOpcode() == TargetOpcode::G_PTR_ADD &&
6043bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
6051 if (STI.hasSignedScratchOffsets())
6057 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
6058 std::optional<ValueAndVReg> RhsValReg =
6064 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
6065 RhsValReg->Value.getSExtValue() > -0x40000000)
6069 return VT->signBitIsZero(
LHS);
6074bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
6082 if (STI.hasSignedScratchOffsets())
6087 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6092bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
6096 if (STI.hasSignedScratchOffsets())
6101 std::optional<DefinitionAndSourceRegister> BaseDef =
6103 std::optional<ValueAndVReg> RHSOffset =
6113 (RHSOffset->Value.getSExtValue() < 0 &&
6114 RHSOffset->Value.getSExtValue() > -0x40000000)))
6117 Register LHS = BaseDef->MI->getOperand(1).getReg();
6118 Register RHS = BaseDef->MI->getOperand(2).getReg();
6119 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6122bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
6123 unsigned ShAmtBits)
const {
6124 assert(
MI.getOpcode() == TargetOpcode::G_AND);
6126 std::optional<APInt>
RHS =
6131 if (
RHS->countr_one() >= ShAmtBits)
6134 const APInt &LHSKnownZeros =
VT->getKnownZeroes(
MI.getOperand(1).getReg());
6135 return (LHSKnownZeros | *
RHS).countr_one() >= ShAmtBits;
6139AMDGPUInstructionSelector::selectMUBUFScratchOffset(
6142 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6144 std::optional<DefinitionAndSourceRegister>
Def =
6146 assert(Def &&
"this shouldn't be an optional result");
6151 [=](MachineInstrBuilder &MIB) {
6154 [=](MachineInstrBuilder &MIB) {
6157 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
6168 if (!TII.isLegalMUBUFImmOffset(
Offset))
6176 [=](MachineInstrBuilder &MIB) {
6179 [=](MachineInstrBuilder &MIB) {
6187 !TII.isLegalMUBUFImmOffset(
Offset))
6191 [=](MachineInstrBuilder &MIB) {
6194 [=](MachineInstrBuilder &MIB) {
6201std::pair<Register, unsigned>
6202AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
6203 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6204 int64_t ConstAddr = 0;
6208 std::tie(PtrBase,
Offset, std::ignore) =
6209 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6212 if (isDSOffsetLegal(PtrBase,
Offset)) {
6214 return std::pair(PtrBase,
Offset);
6216 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6225 return std::pair(Root.
getReg(), 0);
6229AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
6232 std::tie(
Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
6234 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6240AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
6241 return selectDSReadWrite2(Root, 4);
6245AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
6246 return selectDSReadWrite2(Root, 8);
6250AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
6251 unsigned Size)
const {
6256 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6258 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset+1); }
6262std::pair<Register, unsigned>
6263AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
6264 unsigned Size)
const {
6265 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6266 int64_t ConstAddr = 0;
6270 std::tie(PtrBase,
Offset, std::ignore) =
6271 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6274 int64_t OffsetValue0 =
Offset;
6276 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
6278 return std::pair(PtrBase, OffsetValue0 /
Size);
6280 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6288 return std::pair(Root.
getReg(), 0);
6296std::tuple<Register, int64_t, bool>
6297AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
6300 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
6301 return {Root, 0,
false};
6304 std::optional<ValueAndVReg> MaybeOffset =
6307 return {Root, 0,
false};
6322 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6323 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
6324 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6325 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
6327 B.buildInstr(AMDGPU::S_MOV_B32)
6330 B.buildInstr(AMDGPU::S_MOV_B32)
6337 B.buildInstr(AMDGPU::REG_SEQUENCE)
6340 .addImm(AMDGPU::sub0)
6342 .addImm(AMDGPU::sub1);
6346 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
6347 B.buildInstr(AMDGPU::S_MOV_B64)
6352 B.buildInstr(AMDGPU::REG_SEQUENCE)
6355 .addImm(AMDGPU::sub0_sub1)
6357 .addImm(AMDGPU::sub2_sub3);
6364 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6373 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6380AMDGPUInstructionSelector::MUBUFAddressData
6381AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
6382 MUBUFAddressData
Data;
6388 std::tie(PtrBase,
Offset, std::ignore) =
6389 getPtrBaseWithConstantOffset(Src, *MRI);
6395 if (MachineInstr *InputAdd
6397 Data.N2 = InputAdd->getOperand(1).getReg();
6398 Data.N3 = InputAdd->getOperand(2).getReg();
6413bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr)
const {
6419 const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
6420 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
6426void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
6428 if (TII.isLegalMUBUFImmOffset(ImmOffset))
6432 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6433 B.buildInstr(AMDGPU::S_MOV_B32)
6439bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
6444 if (!STI.hasAddr64() || STI.useFlatForGlobal())
6447 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6448 if (!shouldUseAddr64(AddrData))
6454 Offset = AddrData.Offset;
6460 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6462 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6475 }
else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6486 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6490bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
6495 if (STI.useFlatForGlobal())
6498 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6499 if (shouldUseAddr64(AddrData))
6505 Offset = AddrData.Offset;
6511 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6516AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
6522 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
6528 [=](MachineInstrBuilder &MIB) {
6531 [=](MachineInstrBuilder &MIB) {
6534 [=](MachineInstrBuilder &MIB) {
6537 else if (STI.hasRestrictedSOffset())
6538 MIB.
addReg(AMDGPU::SGPR_NULL);
6542 [=](MachineInstrBuilder &MIB) {
6552AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
6557 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
6561 [=](MachineInstrBuilder &MIB) {
6564 [=](MachineInstrBuilder &MIB) {
6567 else if (STI.hasRestrictedSOffset())
6568 MIB.
addReg(AMDGPU::SGPR_NULL);
6580AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
6585 SOffset = AMDGPU::SGPR_NULL;
6587 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); }}};
6591static std::optional<uint64_t>
6595 if (!OffsetVal || !
isInt<32>(*OffsetVal))
6596 return std::nullopt;
6597 return Lo_32(*OffsetVal);
6601AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
6602 std::optional<uint64_t> OffsetVal =
6607 std::optional<int64_t> EncodedImm =
6612 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6616AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
6623 std::optional<int64_t> EncodedImm =
6628 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
6632AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
6640 return std::nullopt;
6642 std::optional<int64_t> EncodedOffset =
6645 return std::nullopt;
6648 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
6649 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedOffset); }}};
6652std::pair<Register, unsigned>
6653AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
6654 bool &Matched)
const {
6659 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
6669 const auto CheckAbsNeg = [&]() {
6674 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
6705AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
6710 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
6715 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
6716 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
6721AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
6725 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
6728 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
6729 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
6733bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
6737 Register CCReg =
I.getOperand(0).getReg();
6742 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
6743 .
addImm(
I.getOperand(2).getImm());
6747 I.eraseFromParent();
6748 return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
6752bool AMDGPUInstructionSelector::selectSGetBarrierState(
6756 MachineOperand BarOp =
I.getOperand(2);
6757 std::optional<int64_t> BarValImm =
6761 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6765 MachineInstrBuilder MIB;
6766 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
6767 : AMDGPU::S_GET_BARRIER_STATE_M0;
6770 auto DstReg =
I.getOperand(0).getReg();
6771 const TargetRegisterClass *DstRC =
6772 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6773 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
6779 I.eraseFromParent();
6784 if (HasInlineConst) {
6788 case Intrinsic::amdgcn_s_barrier_join:
6789 return AMDGPU::S_BARRIER_JOIN_IMM;
6790 case Intrinsic::amdgcn_s_get_named_barrier_state:
6791 return AMDGPU::S_GET_BARRIER_STATE_IMM;
6797 case Intrinsic::amdgcn_s_barrier_join:
6798 return AMDGPU::S_BARRIER_JOIN_M0;
6799 case Intrinsic::amdgcn_s_get_named_barrier_state:
6800 return AMDGPU::S_GET_BARRIER_STATE_M0;
6805bool AMDGPUInstructionSelector::selectNamedBarrierInit(
6809 MachineOperand BarOp =
I.getOperand(1);
6810 MachineOperand CntOp =
I.getOperand(2);
6813 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6819 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6826 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6832 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6833 constexpr unsigned ShAmt = 16;
6839 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6849 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
6850 ? AMDGPU::S_BARRIER_INIT_M0
6851 : AMDGPU::S_BARRIER_SIGNAL_M0;
6852 MachineInstrBuilder MIB;
6855 I.eraseFromParent();
6859bool AMDGPUInstructionSelector::selectNamedBarrierInst(
6863 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
6866 std::optional<int64_t> BarValImm =
6871 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6877 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6883 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
6888 MachineInstrBuilder MIB;
6892 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
6893 auto DstReg =
I.getOperand(0).getReg();
6894 const TargetRegisterClass *DstRC =
6895 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
6896 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
6902 auto BarId = ((*BarValImm) >> 4) & 0x3F;
6906 I.eraseFromParent();
6913 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6914 "Expected G_CONSTANT");
6915 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
6921 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6922 "Expected G_CONSTANT");
6923 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
6929 const MachineOperand &
Op =
MI.getOperand(1);
6930 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1);
6931 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
6937 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
6938 "Expected G_CONSTANT");
6939 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
6947 const MachineOperand &
Op =
MI.getOperand(
OpIdx);
6964 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6968void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
6970 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6975void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
6977 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6983void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
6985 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6990void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
6992 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
6998void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
7000 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7005void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
7007 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7012void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
7014 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7019void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
7021 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7030 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7039 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7046void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
7048 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7049 const uint32_t Cpol =
MI.getOperand(
OpIdx).getImm() &
7064 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
7066 assert(ExpVal != INT_MIN);
7084 if (
MI.getOperand(
OpIdx).getImm())
7086 MIB.
addImm((int64_t)Mods);
7093 if (
MI.getOperand(
OpIdx).getImm())
7095 MIB.
addImm((int64_t)Mods);
7101 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7109 MIB.
addImm((int64_t)Mods);
7115 uint32_t
V =
MI.getOperand(2).getImm();
7118 if (!Subtarget->hasSafeCUPrefetch())
7124void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
7126 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7135bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
7136 return TII.isInlineConstant(Imm);
7139bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
7140 return TII.isInlineConstant(Imm);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static Register getLegalRegBank(Register NewReg, Register RootReg, const AMDGPURegisterBankInfo &RBI, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const SIInstrInfo &TII)
static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is shift left with half bits, such as reg0:2n =G_SHL reg1:2n, CONST(n)
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
static bool checkRB(Register Reg, unsigned int RBNo, const AMDGPURegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods)
static bool isTruncHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is truncating to half, such as reg0:n = G_TRUNC reg1:2n
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static TypeClass isVectorOfTwoOrScalar(Register Reg, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static bool isSameBitWidth(Register Reg1, Register Reg2, const MachineRegisterInfo &MRI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelValueTracking &ValueTracking)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::pair< Register, SrcStatus > getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static std::optional< std::pair< Register, SrcStatus > > calcNextStatus(std::pair< Register, SrcStatus > Curr, const MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg, Register RootReg, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static SmallVector< std::pair< Register, SrcStatus > > getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static bool isUnmergeHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test function, if the MI is reg0:n, reg1:n = G_UNMERGE_VALUES reg2:2n
static SrcStatus getNegStatus(Register Reg, SrcStatus S, const MachineRegisterInfo &MRI)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is logic shift right with half bits, such as reg0:2n =G_LSHR reg1:2n,...
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Machine Check Debug Module
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
This is used to control valid status that current MI supports.
bool checkOptions(SrcStatus Stat) const
SearchOptions(Register Reg, const MachineRegisterInfo &MRI)
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelValueTracking *VT, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
LLVM_ABI DILocation * get() const
Get the underlying DILocation.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
TypeSize getValue() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setReturnAddressIsTaken(bool s)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis providing profile information.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
SpecificRegisterMatch m_SpecificReg(Register RequestedReg)
Matches a register only if it is equal to RequestedReg.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, TargetOpcode::G_MUL, true > m_GMul(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
NodeAddr< DefNode * > Def
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
FunctionAddr VTableAddr Value
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
unsigned getUndefRegState(bool B)
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
@ Default
The result values are uniform if and only if all operands are uniform.
unsigned AtomicNoRetBaseOpcode
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false)
Compute knownbits resulting from addition of LHS and RHS.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.