29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
35using namespace MIPatternMatch;
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 :
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI),
TM(
TM),
50#include
"AMDGPUGenGlobalISel.inc"
53#include
"AMDGPUGenGlobalISel.inc"
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
77bool AMDGPUInstructionSelector::isVCC(
Register Reg,
83 auto &RegClassOrBank =
MRI.getRegClassOrRegBank(Reg);
87 const LLT Ty =
MRI.getType(Reg);
91 return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
116 if (!DstRC || DstRC != SrcRC)
123bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
126 I.setDesc(TII.get(TargetOpcode::COPY));
133 if (isVCC(DstReg, *MRI)) {
134 if (SrcReg == AMDGPU::SCC) {
142 if (!isVCC(SrcReg, *MRI)) {
150 std::optional<ValueAndVReg> ConstVal =
154 STI.
isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
156 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
158 Register MaskedReg =
MRI->createVirtualRegister(SrcRC);
166 IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
171 And.setOperandDead(3);
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
178 if (!
MRI->getRegClassOrNull(SrcReg))
179 MRI->setRegClass(SrcReg, SrcRC);
193 if (MO.getReg().isPhysical())
205bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
206 const Register DefReg =
I.getOperand(0).getReg();
207 const LLT DefTy =
MRI->getType(DefReg);
219 MRI->getRegClassOrRegBank(DefReg);
238 I.setDesc(TII.get(TargetOpcode::PHI));
245 unsigned SubIdx)
const {
249 Register DstReg =
MRI->createVirtualRegister(&SubRC);
252 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
254 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
255 .
addReg(Reg, 0, ComposedSubIdx);
280 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
282 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
284 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
290bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
291 Register DstReg =
I.getOperand(0).getReg();
295 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
296 DstRB->
getID() != AMDGPU::VCCRegBankID)
299 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
311bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
314 Register DstReg =
I.getOperand(0).getReg();
316 LLT Ty =
MRI->getType(DstReg);
322 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
323 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
327 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
330 .
add(
I.getOperand(1))
331 .
add(
I.getOperand(2))
338 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
339 I.setDesc(TII.get(Opc));
345 const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
351 .
add(
I.getOperand(1))
352 .
add(
I.getOperand(2))
358 assert(!Sub &&
"illegal sub should not reach here");
361 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
363 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
365 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
366 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
367 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
368 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
370 Register DstLo =
MRI->createVirtualRegister(&HalfRC);
371 Register DstHi =
MRI->createVirtualRegister(&HalfRC);
374 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
377 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
383 Register CarryReg =
MRI->createVirtualRegister(CarryRC);
384 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
400 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
414bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
419 Register Dst0Reg =
I.getOperand(0).getReg();
420 Register Dst1Reg =
I.getOperand(1).getReg();
421 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
422 I.getOpcode() == AMDGPU::G_UADDE;
423 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
424 I.getOpcode() == AMDGPU::G_USUBE;
426 if (isVCC(Dst1Reg, *MRI)) {
427 unsigned NoCarryOpc =
428 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
429 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
430 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
436 Register Src0Reg =
I.getOperand(2).getReg();
437 Register Src1Reg =
I.getOperand(3).getReg();
440 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
441 .
addReg(
I.getOperand(4).getReg());
444 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
445 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
447 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
448 .
add(
I.getOperand(2))
449 .
add(
I.getOperand(3));
451 if (
MRI->use_nodbg_empty(Dst1Reg)) {
454 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
456 if (!
MRI->getRegClassOrNull(Dst1Reg))
457 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
467 AMDGPU::SReg_32RegClass, *MRI))
474bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
478 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
482 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
483 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
485 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
486 I.setDesc(TII.get(Opc));
488 I.addImplicitDefUseOperands(*
MF);
493bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
495 Register DstReg =
I.getOperand(0).getReg();
496 Register SrcReg =
I.getOperand(1).getReg();
497 LLT DstTy =
MRI->getType(DstReg);
498 LLT SrcTy =
MRI->getType(SrcReg);
503 unsigned Offset =
I.getOperand(2).getImm();
504 if (
Offset % 32 != 0 || DstSize > 128)
524 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
529 *SrcRC,
I.getOperand(1));
531 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
538bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
541 LLT DstTy =
MRI->getType(DstReg);
542 LLT SrcTy =
MRI->getType(
MI.getOperand(1).getReg());
558 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
559 for (
int I = 0, E =
MI.getNumOperands() - 1;
I != E; ++
I) {
573 MI.eraseFromParent();
577bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
579 const int NumDst =
MI.getNumOperands() - 1;
585 LLT DstTy =
MRI->getType(DstReg0);
586 LLT SrcTy =
MRI->getType(SrcReg);
602 for (
int I = 0, E = NumDst;
I != E; ++
I) {
604 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
605 .
addReg(SrcReg, 0, SubRegs[
I]);
608 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
618 MI.eraseFromParent();
622bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
623 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
624 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
628 LLT SrcTy =
MRI->getType(Src0);
632 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
633 return selectG_MERGE_VALUES(
MI);
640 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
645 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
648 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
649 DstBank->
getID() == AMDGPU::VGPRRegBankID);
650 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
663 const int64_t K0 = ConstSrc0->Value.getSExtValue();
664 const int64_t K1 = ConstSrc1->Value.getSExtValue();
672 MI.eraseFromParent();
678 MI.eraseFromParent();
690 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
691 MI.setDesc(TII.get(AMDGPU::COPY));
694 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
701 Register TmpReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
702 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
708 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
715 MI.eraseFromParent();
740 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
741 if (Shift0 && Shift1) {
742 Opc = AMDGPU::S_PACK_HH_B32_B16;
743 MI.getOperand(1).setReg(ShiftSrc0);
744 MI.getOperand(2).setReg(ShiftSrc1);
746 Opc = AMDGPU::S_PACK_LH_B32_B16;
747 MI.getOperand(2).setReg(ShiftSrc1);
751 if (ConstSrc1 && ConstSrc1->Value == 0) {
753 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
758 MI.eraseFromParent();
762 Opc = AMDGPU::S_PACK_HL_B32_B16;
763 MI.getOperand(1).setReg(ShiftSrc0);
767 MI.setDesc(TII.get(Opc));
771bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
777 if ((!RC && !
MRI->getRegBankOrNull(MO.
getReg())) ||
779 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
786bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
789 Register DstReg =
I.getOperand(0).getReg();
790 Register Src0Reg =
I.getOperand(1).getReg();
791 Register Src1Reg =
I.getOperand(2).getReg();
792 LLT Src1Ty =
MRI->getType(Src1Reg);
794 unsigned DstSize =
MRI->getType(DstReg).getSizeInBits();
797 int64_t
Offset =
I.getOperand(3).getImm();
800 if (
Offset % 32 != 0 || InsSize % 32 != 0)
808 if (
SubReg == AMDGPU::NoSubRegister)
826 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
827 if (!Src0RC || !Src1RC)
836 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
845bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
848 Register OffsetReg =
MI.getOperand(2).getReg();
849 Register WidthReg =
MI.getOperand(3).getReg();
852 "scalar BFX instructions are expanded in regbankselect");
853 assert(
MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
854 "64-bit vector BFX instructions are expanded in regbankselect");
859 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
860 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
865 MI.eraseFromParent();
869bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
888 Register InterpMov =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
894 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
910 MI.eraseFromParent();
919bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
928 Register LaneSelect =
MI.getOperand(3).getReg();
931 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
933 std::optional<ValueAndVReg> ConstSelect =
939 MIB.
addImm(ConstSelect->Value.getSExtValue() &
942 std::optional<ValueAndVReg> ConstVal =
949 MIB.
addImm(ConstVal->Value.getSExtValue());
959 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
967 MI.eraseFromParent();
973bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
977 LLT Ty =
MRI->getType(Dst0);
980 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
982 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
993 unsigned ChooseDenom =
MI.getOperand(5).getImm();
995 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1008 MI.eraseFromParent();
1012bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1013 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
1014 switch (IntrinsicID) {
1015 case Intrinsic::amdgcn_if_break: {
1020 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1021 .
add(
I.getOperand(0))
1022 .
add(
I.getOperand(2))
1023 .
add(
I.getOperand(3));
1025 Register DstReg =
I.getOperand(0).getReg();
1026 Register Src0Reg =
I.getOperand(2).getReg();
1027 Register Src1Reg =
I.getOperand(3).getReg();
1029 I.eraseFromParent();
1031 for (
Register Reg : { DstReg, Src0Reg, Src1Reg })
1036 case Intrinsic::amdgcn_interp_p1_f16:
1037 return selectInterpP1F16(
I);
1038 case Intrinsic::amdgcn_wqm:
1039 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1040 case Intrinsic::amdgcn_softwqm:
1041 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1042 case Intrinsic::amdgcn_strict_wwm:
1043 case Intrinsic::amdgcn_wwm:
1044 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1045 case Intrinsic::amdgcn_strict_wqm:
1046 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1047 case Intrinsic::amdgcn_writelane:
1048 return selectWritelane(
I);
1049 case Intrinsic::amdgcn_div_scale:
1050 return selectDivScale(
I);
1051 case Intrinsic::amdgcn_icmp:
1052 case Intrinsic::amdgcn_fcmp:
1055 return selectIntrinsicCmp(
I);
1056 case Intrinsic::amdgcn_ballot:
1057 return selectBallot(
I);
1058 case Intrinsic::amdgcn_reloc_constant:
1059 return selectRelocConstant(
I);
1060 case Intrinsic::amdgcn_groupstaticsize:
1061 return selectGroupStaticSize(
I);
1062 case Intrinsic::returnaddress:
1063 return selectReturnAddress(
I);
1064 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1065 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1066 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1067 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1068 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1069 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1070 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1071 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1072 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1073 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1074 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1075 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1076 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1077 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1078 return selectSMFMACIntrin(
I);
1089 if (
Size == 16 && !ST.has16BitInsts())
1092 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
unsigned S32Opc,
1095 return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc;
1105 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1106 AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64);
1108 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1109 AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64);
1111 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1112 AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64);
1114 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1115 AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64);
1117 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1118 AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64);
1120 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1121 AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64);
1123 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1124 AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64);
1126 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1127 AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64);
1129 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1130 AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64);
1132 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1133 AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64);
1136 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1137 AMDGPU::V_CMP_EQ_F32_e64, AMDGPU::V_CMP_EQ_F64_e64);
1139 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1140 AMDGPU::V_CMP_GT_F32_e64, AMDGPU::V_CMP_GT_F64_e64);
1142 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1143 AMDGPU::V_CMP_GE_F32_e64, AMDGPU::V_CMP_GE_F64_e64);
1145 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1146 AMDGPU::V_CMP_LT_F32_e64, AMDGPU::V_CMP_LT_F64_e64);
1148 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1149 AMDGPU::V_CMP_LE_F32_e64, AMDGPU::V_CMP_LE_F64_e64);
1151 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1152 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1154 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1155 AMDGPU::V_CMP_O_F32_e64, AMDGPU::V_CMP_O_F64_e64);
1157 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1158 AMDGPU::V_CMP_U_F32_e64, AMDGPU::V_CMP_U_F64_e64);
1160 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1161 AMDGPU::V_CMP_NLG_F32_e64, AMDGPU::V_CMP_NLG_F64_e64);
1163 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1164 AMDGPU::V_CMP_NLE_F32_e64, AMDGPU::V_CMP_NLE_F64_e64);
1166 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1167 AMDGPU::V_CMP_NLT_F32_e64, AMDGPU::V_CMP_NLT_F64_e64);
1169 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1170 AMDGPU::V_CMP_NGE_F32_e64, AMDGPU::V_CMP_NGE_F64_e64);
1172 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1173 AMDGPU::V_CMP_NGT_F32_e64, AMDGPU::V_CMP_NGT_F64_e64);
1175 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1176 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1178 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1179 AMDGPU::V_CMP_TRU_F32_e64, AMDGPU::V_CMP_TRU_F64_e64);
1181 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1182 AMDGPU::V_CMP_F_F32_e64, AMDGPU::V_CMP_F_F64_e64);
1187 unsigned Size)
const {
1194 return AMDGPU::S_CMP_LG_U64;
1196 return AMDGPU::S_CMP_EQ_U64;
1205 return AMDGPU::S_CMP_LG_U32;
1207 return AMDGPU::S_CMP_EQ_U32;
1209 return AMDGPU::S_CMP_GT_I32;
1211 return AMDGPU::S_CMP_GE_I32;
1213 return AMDGPU::S_CMP_LT_I32;
1215 return AMDGPU::S_CMP_LE_I32;
1217 return AMDGPU::S_CMP_GT_U32;
1219 return AMDGPU::S_CMP_GE_U32;
1221 return AMDGPU::S_CMP_LT_U32;
1223 return AMDGPU::S_CMP_LE_U32;
1225 return AMDGPU::S_CMP_EQ_F32;
1227 return AMDGPU::S_CMP_GT_F32;
1229 return AMDGPU::S_CMP_GE_F32;
1231 return AMDGPU::S_CMP_LT_F32;
1233 return AMDGPU::S_CMP_LE_F32;
1235 return AMDGPU::S_CMP_LG_F32;
1237 return AMDGPU::S_CMP_O_F32;
1239 return AMDGPU::S_CMP_U_F32;
1241 return AMDGPU::S_CMP_NLG_F32;
1243 return AMDGPU::S_CMP_NLE_F32;
1245 return AMDGPU::S_CMP_NLT_F32;
1247 return AMDGPU::S_CMP_NGE_F32;
1249 return AMDGPU::S_CMP_NGT_F32;
1251 return AMDGPU::S_CMP_NEQ_F32;
1263 return AMDGPU::S_CMP_EQ_F16;
1265 return AMDGPU::S_CMP_GT_F16;
1267 return AMDGPU::S_CMP_GE_F16;
1269 return AMDGPU::S_CMP_LT_F16;
1271 return AMDGPU::S_CMP_LE_F16;
1273 return AMDGPU::S_CMP_LG_F16;
1275 return AMDGPU::S_CMP_O_F16;
1277 return AMDGPU::S_CMP_U_F16;
1279 return AMDGPU::S_CMP_NLG_F16;
1281 return AMDGPU::S_CMP_NLE_F16;
1283 return AMDGPU::S_CMP_NLT_F16;
1285 return AMDGPU::S_CMP_NGE_F16;
1287 return AMDGPU::S_CMP_NGT_F16;
1289 return AMDGPU::S_CMP_NEQ_F16;
1298bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1303 Register SrcReg =
I.getOperand(2).getReg();
1308 Register CCReg =
I.getOperand(0).getReg();
1309 if (!isVCC(CCReg, *MRI)) {
1310 int Opcode = getS_CMPOpcode(Pred,
Size);
1314 .
add(
I.getOperand(2))
1315 .
add(
I.getOperand(3));
1316 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1321 I.eraseFromParent();
1325 if (
I.getOpcode() == AMDGPU::G_FCMP)
1333 I.getOperand(0).getReg())
1334 .
add(
I.getOperand(2))
1335 .
add(
I.getOperand(3));
1339 I.eraseFromParent();
1343bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1344 Register Dst =
I.getOperand(0).getReg();
1345 if (isVCC(Dst, *MRI))
1348 LLT DstTy =
MRI->getType(Dst);
1354 Register SrcReg =
I.getOperand(2).getReg();
1363 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1364 I.eraseFromParent();
1375 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1376 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1378 copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &
I,
true);
1380 copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &
I,
true);
1381 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1383 SelectedMI.
addImm(Src0Mods);
1384 SelectedMI.
addReg(Src0Reg);
1386 SelectedMI.
addImm(Src1Mods);
1387 SelectedMI.
addReg(Src1Reg);
1397 I.eraseFromParent();
1401bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1404 Register DstReg =
I.getOperand(0).getReg();
1405 const unsigned Size =
MRI->getType(DstReg).getSizeInBits();
1406 const bool Is64 =
Size == 64;
1414 std::optional<ValueAndVReg> Arg =
1417 const auto BuildCopy = [&](
Register SrcReg) {
1419 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), DstReg)
1425 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1427 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1435 const int64_t
Value = Arg->
Value.getSExtValue();
1437 unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1439 }
else if (
Value == -1)
1440 BuildCopy(IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC);
1444 BuildCopy(
I.getOperand(2).getReg());
1446 I.eraseFromParent();
1450bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1451 Register DstReg =
I.getOperand(0).getReg();
1457 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1462 auto RelocSymbol = cast<GlobalVariable>(
1467 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1470 I.eraseFromParent();
1474bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1477 Register DstReg =
I.getOperand(0).getReg();
1479 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1480 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1497 I.eraseFromParent();
1501bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1508 unsigned Depth =
I.getOperand(2).getImm();
1521 I.eraseFromParent();
1532 AMDGPU::SReg_64RegClass,
DL);
1535 I.eraseFromParent();
1539bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1543 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1544 .
add(
MI.getOperand(1));
1547 MI.eraseFromParent();
1549 if (!
MRI->getRegClassOrNull(Reg))
1554bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1560 unsigned IndexOperand =
MI.getOperand(7).getImm();
1561 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1562 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1564 if (WaveDone && !WaveRelease)
1567 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1568 IndexOperand &= ~0x3f;
1569 unsigned CountDw = 0;
1572 CountDw = (IndexOperand >> 24) & 0xf;
1573 IndexOperand &= ~(0xf << 24);
1575 if (CountDw < 1 || CountDw > 4) {
1577 "ds_ordered_count: dword count must be between 1 and 4");
1584 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1587 unsigned Offset0 = OrderedCountIndex << 2;
1588 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (
Instruction << 4);
1591 Offset1 |= (CountDw - 1) << 6;
1594 Offset1 |= ShaderType << 2;
1596 unsigned Offset = Offset0 | (Offset1 << 8);
1605 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1614 MI.eraseFromParent();
1620 case Intrinsic::amdgcn_ds_gws_init:
1621 return AMDGPU::DS_GWS_INIT;
1622 case Intrinsic::amdgcn_ds_gws_barrier:
1623 return AMDGPU::DS_GWS_BARRIER;
1624 case Intrinsic::amdgcn_ds_gws_sema_v:
1625 return AMDGPU::DS_GWS_SEMA_V;
1626 case Intrinsic::amdgcn_ds_gws_sema_br:
1627 return AMDGPU::DS_GWS_SEMA_BR;
1628 case Intrinsic::amdgcn_ds_gws_sema_p:
1629 return AMDGPU::DS_GWS_SEMA_P;
1630 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1631 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1637bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1639 if (!STI.
hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1644 const bool HasVSrc =
MI.getNumOperands() == 3;
1645 assert(HasVSrc ||
MI.getNumOperands() == 2);
1647 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1649 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1663 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1664 Readfirstlane = OffsetDef;
1669 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1679 std::tie(BaseOffset, ImmOffset) =
1682 if (Readfirstlane) {
1692 AMDGPU::SReg_32RegClass, *MRI))
1696 Register M0Base =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1724 MI.eraseFromParent();
1728bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1729 bool IsAppend)
const {
1730 Register PtrBase =
MI.getOperand(2).getReg();
1731 LLT PtrTy =
MRI->getType(PtrBase);
1735 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1738 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1739 PtrBase =
MI.getOperand(2).getReg();
1745 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1756 MI.eraseFromParent();
1760bool AMDGPUInstructionSelector::selectSBarrier(
MachineInstr &
MI)
const {
1767 MI.eraseFromParent();
1780 MI.eraseFromParent();
1792 TFE = (TexFailCtrl & 0x1) ?
true :
false;
1794 LWE = (TexFailCtrl & 0x2) ?
true :
false;
1797 return TexFailCtrl == 0;
1800bool AMDGPUInstructionSelector::selectImageIntrinsic(
1809 unsigned IntrOpcode =
Intr->BaseOpcode;
1814 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
1818 int NumVDataDwords = -1;
1819 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
1820 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
1826 Unorm =
MI.getOperand(ArgOffset +
Intr->UnormIndex).getImm() != 0;
1830 bool IsTexFail =
false;
1832 TFE, LWE, IsTexFail))
1835 const int Flags =
MI.getOperand(ArgOffset +
Intr->NumArgs).getImm();
1836 const bool IsA16 = (
Flags & 1) != 0;
1837 const bool IsG16 = (
Flags & 2) != 0;
1840 if (IsA16 && !STI.
hasG16() && !IsG16)
1844 unsigned DMaskLanes = 0;
1846 if (BaseOpcode->
Atomic) {
1847 VDataOut =
MI.getOperand(0).getReg();
1848 VDataIn =
MI.getOperand(2).getReg();
1849 LLT Ty =
MRI->getType(VDataIn);
1852 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
1857 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
1859 DMask = Is64Bit ? 0xf : 0x3;
1860 NumVDataDwords = Is64Bit ? 4 : 2;
1862 DMask = Is64Bit ? 0x3 : 0x1;
1863 NumVDataDwords = Is64Bit ? 2 : 1;
1866 DMask =
MI.getOperand(ArgOffset +
Intr->DMaskIndex).getImm();
1869 if (BaseOpcode->
Store) {
1870 VDataIn =
MI.getOperand(1).getReg();
1871 VDataTy =
MRI->getType(VDataIn);
1876 VDataOut =
MI.getOperand(0).getReg();
1877 VDataTy =
MRI->getType(VDataOut);
1878 NumVDataDwords = DMaskLanes;
1881 NumVDataDwords = (DMaskLanes + 1) / 2;
1886 if (Subtarget->
hasG16() && IsG16) {
1890 IntrOpcode = G16MappingInfo->
G16;
1894 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
1896 unsigned CPol =
MI.getOperand(ArgOffset +
Intr->CachePolicyIndex).getImm();
1903 int NumVAddrRegs = 0;
1904 int NumVAddrDwords = 0;
1905 for (
unsigned I =
Intr->VAddrStart; I < Intr->VAddrEnd;
I++) {
1908 if (!AddrOp.
isReg())
1916 NumVAddrDwords += (
MRI->getType(
Addr).getSizeInBits() + 31) / 32;
1923 NumVAddrRegs != 1 &&
1925 : NumVAddrDwords == NumVAddrRegs);
1926 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
1937 NumVDataDwords, NumVAddrDwords);
1938 }
else if (IsGFX11Plus) {
1940 UseNSA ? AMDGPU::MIMGEncGfx11NSA
1941 : AMDGPU::MIMGEncGfx11Default,
1942 NumVDataDwords, NumVAddrDwords);
1943 }
else if (IsGFX10Plus) {
1945 UseNSA ? AMDGPU::MIMGEncGfx10NSA
1946 : AMDGPU::MIMGEncGfx10Default,
1947 NumVDataDwords, NumVAddrDwords);
1951 NumVDataDwords, NumVAddrDwords);
1955 <<
"requested image instruction is not supported on this GPU\n");
1962 NumVDataDwords, NumVAddrDwords);
1965 NumVDataDwords, NumVAddrDwords);
1975 const bool Is64 =
MRI->getType(VDataOut).getSizeInBits() == 64;
1978 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
1979 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
1982 if (!
MRI->use_empty(VDataOut)) {
1995 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
1997 if (
SrcOp.isReg()) {
2003 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->RsrcIndex).getReg());
2005 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->SampIndex).getReg());
2016 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2018 MIB.
addImm(IsA16 ? -1 : 0);
2032 MIB.
addImm(IsD16 ? -1 : 0);
2034 MI.eraseFromParent();
2042bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2053 unsigned Offset =
MI.getOperand(6).getImm();
2055 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)
2063 MI.eraseFromParent();
2067bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2069 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
2070 switch (IntrinsicID) {
2071 case Intrinsic::amdgcn_end_cf:
2072 return selectEndCfIntrinsic(
I);
2073 case Intrinsic::amdgcn_ds_ordered_add:
2074 case Intrinsic::amdgcn_ds_ordered_swap:
2075 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2076 case Intrinsic::amdgcn_ds_gws_init:
2077 case Intrinsic::amdgcn_ds_gws_barrier:
2078 case Intrinsic::amdgcn_ds_gws_sema_v:
2079 case Intrinsic::amdgcn_ds_gws_sema_br:
2080 case Intrinsic::amdgcn_ds_gws_sema_p:
2081 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2082 return selectDSGWSIntrinsic(
I, IntrinsicID);
2083 case Intrinsic::amdgcn_ds_append:
2084 return selectDSAppendConsume(
I,
true);
2085 case Intrinsic::amdgcn_ds_consume:
2086 return selectDSAppendConsume(
I,
false);
2087 case Intrinsic::amdgcn_s_barrier:
2088 return selectSBarrier(
I);
2089 case Intrinsic::amdgcn_raw_buffer_load_lds:
2090 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2091 case Intrinsic::amdgcn_struct_buffer_load_lds:
2092 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2093 return selectBufferLoadLds(
I);
2094 case Intrinsic::amdgcn_global_load_lds:
2095 return selectGlobalLoadLds(
I);
2096 case Intrinsic::amdgcn_exp_compr:
2100 F,
"intrinsic not supported on subtarget",
I.getDebugLoc(),
DS_Error);
2101 F.getContext().diagnose(NoFpRet);
2105 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2106 return selectDSBvhStackIntrinsic(
I);
2107 case Intrinsic::amdgcn_s_barrier_init:
2108 case Intrinsic::amdgcn_s_barrier_join:
2109 case Intrinsic::amdgcn_s_wakeup_barrier:
2110 case Intrinsic::amdgcn_s_get_barrier_state:
2111 return selectNamedBarrierInst(
I, IntrinsicID);
2112 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2113 case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
2114 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2115 case Intrinsic::amdgcn_s_barrier_leave:
2116 return selectSBarrierLeave(
I);
2121bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2128 Register DstReg =
I.getOperand(0).getReg();
2133 if (!isVCC(CCReg, *MRI)) {
2134 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2135 AMDGPU::S_CSELECT_B32;
2142 if (!
MRI->getRegClassOrNull(CCReg))
2145 .
add(
I.getOperand(2))
2146 .
add(
I.getOperand(3));
2151 I.eraseFromParent();
2160 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2162 .
add(
I.getOperand(3))
2164 .
add(
I.getOperand(2))
2165 .
add(
I.getOperand(1));
2168 I.eraseFromParent();
2172bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2173 Register DstReg =
I.getOperand(0).getReg();
2174 Register SrcReg =
I.getOperand(1).getReg();
2175 const LLT DstTy =
MRI->getType(DstReg);
2176 const LLT SrcTy =
MRI->getType(SrcReg);
2191 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2200 if (!SrcRC || !DstRC)
2213 Register LoReg =
MRI->createVirtualRegister(DstRC);
2214 Register HiReg =
MRI->createVirtualRegister(DstRC);
2216 .
addReg(SrcReg, 0, AMDGPU::sub0);
2218 .
addReg(SrcReg, 0, AMDGPU::sub1);
2220 if (IsVALU && STI.
hasSDWA()) {
2224 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2234 Register TmpReg0 =
MRI->createVirtualRegister(DstRC);
2235 Register TmpReg1 =
MRI->createVirtualRegister(DstRC);
2236 Register ImmReg =
MRI->createVirtualRegister(DstRC);
2238 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2248 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2249 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2250 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2262 And.setOperandDead(3);
2263 Or.setOperandDead(3);
2267 I.eraseFromParent();
2275 unsigned SubRegIdx =
2277 if (SubRegIdx == AMDGPU::NoSubRegister)
2283 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2287 if (SrcWithSubRC != SrcRC) {
2292 I.getOperand(1).setSubReg(SubRegIdx);
2295 I.setDesc(TII.get(TargetOpcode::COPY));
2301 Mask = maskTrailingOnes<unsigned>(
Size);
2302 int SignedMask =
static_cast<int>(Mask);
2303 return SignedMask >= -16 && SignedMask <= 64;
2307const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2320bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2321 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2322 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2325 const Register DstReg =
I.getOperand(0).getReg();
2326 const Register SrcReg =
I.getOperand(1).getReg();
2328 const LLT DstTy =
MRI->getType(DstReg);
2329 const LLT SrcTy =
MRI->getType(SrcReg);
2330 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2337 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2340 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2342 return selectCOPY(
I);
2345 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2348 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2350 Register UndefReg =
MRI->createVirtualRegister(SrcRC);
2351 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2357 I.eraseFromParent();
2363 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2373 I.eraseFromParent();
2377 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2383 I.eraseFromParent();
2387 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2389 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2393 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2394 const unsigned SextOpc = SrcSize == 8 ?
2395 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2398 I.eraseFromParent();
2404 if (DstSize > 32 && SrcSize == 32) {
2405 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2406 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2421 I.eraseFromParent();
2426 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2427 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2430 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2432 Register ExtReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2433 Register UndefReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2434 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2436 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2447 I.eraseFromParent();
2463 I.eraseFromParent();
2498 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2505 assert(Mask.size() == 2);
2507 if (Mask[0] == 1 && Mask[1] <= 1) {
2515bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2519 Register Dst =
I.getOperand(0).getReg();
2521 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2524 Register Src =
I.getOperand(1).getReg();
2530 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2532 I.eraseFromParent();
2540bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2554 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2569 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2570 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2571 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2572 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2574 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2575 .
addReg(Src, 0, AMDGPU::sub0);
2576 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2577 .
addReg(Src, 0, AMDGPU::sub1);
2578 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2582 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2587 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2592 MI.eraseFromParent();
2597bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2600 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2607 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2608 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2609 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2610 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2616 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2617 .
addReg(Src, 0, AMDGPU::sub0);
2618 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2619 .
addReg(Src, 0, AMDGPU::sub1);
2620 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2625 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2629 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2635 MI.eraseFromParent();
2640 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2643void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2646 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2648 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2652 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2657 for (
unsigned i = 1; i != 3; ++i) {
2664 assert(GEPInfo.Imm == 0);
2669 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2670 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2672 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2676 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2679bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2680 return RBI.
getRegBank(Reg, *MRI, TRI)->
getID() == AMDGPU::SGPRRegBankID;
2683bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2684 if (!
MI.hasOneMemOperand())
2694 if (!
Ptr || isa<UndefValue>(
Ptr) || isa<Argument>(
Ptr) ||
2695 isa<Constant>(
Ptr) || isa<GlobalValue>(
Ptr))
2701 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2703 AMDGPU::SGPRRegBankID;
2706 return I &&
I->getMetadata(
"amdgpu.uniform");
2710 for (
const GEPInfo &GEPInfo : AddrInfo) {
2711 if (!GEPInfo.VgprParts.empty())
2717void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2718 const LLT PtrTy =
MRI->getType(
I.getOperand(1).getReg());
2725 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2730bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2737 if (Reg.isPhysical())
2741 const unsigned Opcode =
MI.getOpcode();
2743 if (Opcode == AMDGPU::COPY)
2746 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2747 Opcode == AMDGPU::G_XOR)
2751 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI))
2752 return GI->is(Intrinsic::amdgcn_class);
2754 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2757bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
2772 if (!isVCC(CondReg, *MRI)) {
2776 CondPhysReg = AMDGPU::SCC;
2777 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
2778 ConstrainRC = &AMDGPU::SReg_32RegClass;
2786 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
2787 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
2790 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
2797 CondPhysReg =
TRI.getVCC();
2798 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
2799 ConstrainRC =
TRI.getBoolRC();
2802 if (!
MRI->getRegClassOrNull(CondReg))
2803 MRI->setRegClass(CondReg, ConstrainRC);
2805 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
2808 .
addMBB(
I.getOperand(1).getMBB());
2810 I.eraseFromParent();
2814bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
2816 Register DstReg =
I.getOperand(0).getReg();
2818 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2819 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
2824 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
2827bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
2828 Register DstReg =
I.getOperand(0).getReg();
2829 Register SrcReg =
I.getOperand(1).getReg();
2830 Register MaskReg =
I.getOperand(2).getReg();
2831 LLT Ty =
MRI->getType(DstReg);
2832 LLT MaskTy =
MRI->getType(MaskReg);
2839 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2849 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
2850 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
2853 !CanCopyLow32 && !CanCopyHi32) {
2854 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
2858 I.eraseFromParent();
2862 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2864 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
2869 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
2878 "ptrmask should have been narrowed during legalize");
2880 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
2886 I.eraseFromParent();
2890 Register HiReg =
MRI->createVirtualRegister(&RegRC);
2891 Register LoReg =
MRI->createVirtualRegister(&RegRC);
2894 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
2895 .
addReg(SrcReg, 0, AMDGPU::sub0);
2896 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
2897 .
addReg(SrcReg, 0, AMDGPU::sub1);
2906 Register MaskLo =
MRI->createVirtualRegister(&RegRC);
2907 MaskedLo =
MRI->createVirtualRegister(&RegRC);
2909 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
2910 .
addReg(MaskReg, 0, AMDGPU::sub0);
2911 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
2920 Register MaskHi =
MRI->createVirtualRegister(&RegRC);
2921 MaskedHi =
MRI->createVirtualRegister(&RegRC);
2923 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
2924 .
addReg(MaskReg, 0, AMDGPU::sub1);
2925 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
2930 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
2935 I.eraseFromParent();
2941static std::pair<Register, unsigned>
2948 std::tie(IdxBaseReg,
Offset) =
2950 if (IdxBaseReg == AMDGPU::NoRegister) {
2954 IdxBaseReg = IdxReg;
2961 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
2962 return std::pair(IdxReg, SubRegs[0]);
2963 return std::pair(IdxBaseReg, SubRegs[
Offset]);
2966bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
2972 LLT DstTy =
MRI->getType(DstReg);
2973 LLT SrcTy =
MRI->getType(SrcReg);
2981 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
2985 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
2987 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
2988 if (!SrcRC || !DstRC)
3003 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3007 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3010 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3014 MI.eraseFromParent();
3022 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3024 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3027 MI.eraseFromParent();
3038 MI.eraseFromParent();
3043bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3050 LLT VecTy =
MRI->getType(DstReg);
3051 LLT ValTy =
MRI->getType(ValReg);
3063 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3067 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3069 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3077 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3081 std::tie(IdxReg,
SubReg) =
3084 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3091 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3095 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3100 MI.eraseFromParent();
3112 MI.eraseFromParent();
3116bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3119 unsigned Size =
MI.getOperand(3).getImm();
3122 const bool HasVIndex =
MI.getNumOperands() == 9;
3126 VIndex =
MI.getOperand(4).getReg();
3130 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3131 std::optional<ValueAndVReg> MaybeVOffset =
3133 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3139 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3140 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3141 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3142 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3145 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3146 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3147 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3148 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3151 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3152 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3153 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3154 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3161 .
add(
MI.getOperand(2));
3165 if (HasVIndex && HasVOffset) {
3166 Register IdxReg =
MRI->createVirtualRegister(
TRI.getVGPR64Class());
3167 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3174 }
else if (HasVIndex) {
3176 }
else if (HasVOffset) {
3180 MIB.
add(
MI.getOperand(1));
3181 MIB.
add(
MI.getOperand(5 + OpOffset));
3182 MIB.
add(
MI.getOperand(6 + OpOffset));
3183 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3189 LoadPtrI.
Offset =
MI.getOperand(6 + OpOffset).getImm();
3191 StorePtrI.
V =
nullptr;
3205 MI.eraseFromParent();
3217 if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3220 assert(Def->getNumOperands() == 3 &&
3223 return Def->getOperand(1).getReg();
3229bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3231 unsigned Size =
MI.getOperand(3).getImm();
3237 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3240 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3243 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3250 .
add(
MI.getOperand(2));
3256 if (!isSGPR(
Addr)) {
3258 if (isSGPR(AddrDef->Reg)) {
3259 Addr = AddrDef->Reg;
3260 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3263 if (isSGPR(SAddr)) {
3264 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3276 VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3288 MIB.
add(
MI.getOperand(4))
3289 .
add(
MI.getOperand(5));
3293 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3303 sizeof(int32_t),
Align(4));
3307 MI.eraseFromParent();
3311bool AMDGPUInstructionSelector::selectBVHIntrinsic(
MachineInstr &
MI)
const{
3312 MI.setDesc(TII.get(
MI.getOperand(1).getImm()));
3313 MI.removeOperand(1);
3314 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3318bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3321 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3322 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3324 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3325 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3327 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3328 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3330 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3331 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3333 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3334 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3336 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3337 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3339 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3340 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3342 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3343 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3345 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3346 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3348 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3349 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3351 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3352 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3354 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3355 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3357 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3358 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3360 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3361 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3367 auto VDst_In =
MI.getOperand(4);
3369 MI.setDesc(TII.get(Opc));
3370 MI.removeOperand(4);
3371 MI.removeOperand(1);
3372 MI.addOperand(VDst_In);
3373 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3377bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3381 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3386 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3397 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3401 MI.eraseFromParent();
3405bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
3418 WaveAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3428 MI.eraseFromParent();
3434 if (!
I.isPreISelOpcode()) {
3436 return selectCOPY(
I);
3440 switch (
I.getOpcode()) {
3441 case TargetOpcode::G_AND:
3442 case TargetOpcode::G_OR:
3443 case TargetOpcode::G_XOR:
3446 return selectG_AND_OR_XOR(
I);
3447 case TargetOpcode::G_ADD:
3448 case TargetOpcode::G_SUB:
3449 case TargetOpcode::G_PTR_ADD:
3452 return selectG_ADD_SUB(
I);
3453 case TargetOpcode::G_UADDO:
3454 case TargetOpcode::G_USUBO:
3455 case TargetOpcode::G_UADDE:
3456 case TargetOpcode::G_USUBE:
3457 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
3458 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3459 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3460 return selectG_AMDGPU_MAD_64_32(
I);
3461 case TargetOpcode::G_INTTOPTR:
3462 case TargetOpcode::G_BITCAST:
3463 case TargetOpcode::G_PTRTOINT:
3464 case TargetOpcode::G_FREEZE:
3465 return selectCOPY(
I);
3466 case TargetOpcode::G_FNEG:
3469 return selectG_FNEG(
I);
3470 case TargetOpcode::G_FABS:
3473 return selectG_FABS(
I);
3474 case TargetOpcode::G_EXTRACT:
3475 return selectG_EXTRACT(
I);
3476 case TargetOpcode::G_MERGE_VALUES:
3477 case TargetOpcode::G_CONCAT_VECTORS:
3478 return selectG_MERGE_VALUES(
I);
3479 case TargetOpcode::G_UNMERGE_VALUES:
3480 return selectG_UNMERGE_VALUES(
I);
3481 case TargetOpcode::G_BUILD_VECTOR:
3482 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
3483 return selectG_BUILD_VECTOR(
I);
3484 case TargetOpcode::G_IMPLICIT_DEF:
3485 return selectG_IMPLICIT_DEF(
I);
3486 case TargetOpcode::G_INSERT:
3487 return selectG_INSERT(
I);
3488 case TargetOpcode::G_INTRINSIC:
3489 case TargetOpcode::G_INTRINSIC_CONVERGENT:
3490 return selectG_INTRINSIC(
I);
3491 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3492 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
3493 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
3494 case TargetOpcode::G_ICMP:
3495 case TargetOpcode::G_FCMP:
3496 if (selectG_ICMP_or_FCMP(
I))
3499 case TargetOpcode::G_LOAD:
3500 case TargetOpcode::G_STORE:
3501 case TargetOpcode::G_ATOMIC_CMPXCHG:
3502 case TargetOpcode::G_ATOMICRMW_XCHG:
3503 case TargetOpcode::G_ATOMICRMW_ADD:
3504 case TargetOpcode::G_ATOMICRMW_SUB:
3505 case TargetOpcode::G_ATOMICRMW_AND:
3506 case TargetOpcode::G_ATOMICRMW_OR:
3507 case TargetOpcode::G_ATOMICRMW_XOR:
3508 case TargetOpcode::G_ATOMICRMW_MIN:
3509 case TargetOpcode::G_ATOMICRMW_MAX:
3510 case TargetOpcode::G_ATOMICRMW_UMIN:
3511 case TargetOpcode::G_ATOMICRMW_UMAX:
3512 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
3513 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
3514 case TargetOpcode::G_ATOMICRMW_FADD:
3515 case TargetOpcode::G_ATOMICRMW_FMIN:
3516 case TargetOpcode::G_ATOMICRMW_FMAX:
3517 return selectG_LOAD_STORE_ATOMICRMW(
I);
3518 case TargetOpcode::G_SELECT:
3519 return selectG_SELECT(
I);
3520 case TargetOpcode::G_TRUNC:
3521 return selectG_TRUNC(
I);
3522 case TargetOpcode::G_SEXT:
3523 case TargetOpcode::G_ZEXT:
3524 case TargetOpcode::G_ANYEXT:
3525 case TargetOpcode::G_SEXT_INREG:
3532 return selectG_SZA_EXT(
I);
3533 case TargetOpcode::G_FPEXT:
3534 if (selectG_FPEXT(
I))
3537 case TargetOpcode::G_BRCOND:
3538 return selectG_BRCOND(
I);
3539 case TargetOpcode::G_GLOBAL_VALUE:
3540 return selectG_GLOBAL_VALUE(
I);
3541 case TargetOpcode::G_PTRMASK:
3542 return selectG_PTRMASK(
I);
3543 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3544 return selectG_EXTRACT_VECTOR_ELT(
I);
3545 case TargetOpcode::G_INSERT_VECTOR_ELT:
3546 return selectG_INSERT_VECTOR_ELT(
I);
3547 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3548 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3549 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
3550 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3551 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3554 assert(
Intr &&
"not an image intrinsic with image pseudo");
3555 return selectImageIntrinsic(
I,
Intr);
3557 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
3558 return selectBVHIntrinsic(
I);
3559 case AMDGPU::G_SBFX:
3560 case AMDGPU::G_UBFX:
3561 return selectG_SBFX_UBFX(
I);
3562 case AMDGPU::G_SI_CALL:
3563 I.setDesc(TII.get(AMDGPU::SI_CALL));
3565 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
3566 return selectWaveAddress(
I);
3567 case AMDGPU::G_STACKRESTORE:
3568 return selectStackRestore(
I);
3570 return selectPHI(
I);
3571 case TargetOpcode::G_CONSTANT:
3572 case TargetOpcode::G_FCONSTANT:
3580AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
3587std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
3588 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
3592 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
3593 Src =
MI->getOperand(1).getReg();
3596 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
3601 if (LHS &&
LHS->isZero()) {
3603 Src =
MI->getOperand(2).getReg();
3607 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
3608 Src =
MI->getOperand(1).getReg();
3615 return std::pair(Src, Mods);
3618Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
3620 bool ForceVGPR)
const {
3621 if ((Mods != 0 || ForceVGPR) &&
3629 TII.get(AMDGPU::COPY), VGPRSrc)
3641AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
3648AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
3651 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
3655 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3664AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
3667 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
3673 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3682AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
3691AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
3694 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
3698 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3705AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
3709 std::tie(Src, Mods) =
3710 selectVOP3ModsImpl(Root.
getReg(),
false);
3714 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3721AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
3724 std::tie(Src, Mods) =
3725 selectVOP3ModsImpl(Root.
getReg(),
true,
3730 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3737AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
3740 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
3747std::pair<Register, unsigned>
3748AMDGPUInstructionSelector::selectVOP3PModsImpl(
3753 if (
MI &&
MI->getOpcode() == AMDGPU::G_FNEG &&
3758 Src =
MI->getOperand(1).getReg();
3759 MI =
MRI.getVRegDef(Src);
3770 return std::pair(Src, Mods);
3774AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
3780 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI);
3789AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
3795 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true);
3804AMDGPUInstructionSelector::selectVOP3PModsNeg(
MachineOperand &Root)
const {
3809 "expected i1 value");
3819AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
3822 "expected i1 value");
3836 switch (Elts.
size()) {
3838 DstRegClass = &AMDGPU::VReg_256RegClass;
3841 DstRegClass = &AMDGPU::VReg_128RegClass;
3844 DstRegClass = &AMDGPU::VReg_64RegClass;
3851 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
3852 .addDef(
MRI.createVirtualRegister(DstRegClass));
3853 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3864 if (ModOpcode == TargetOpcode::G_FNEG) {
3868 for (
auto El : Elts) {
3874 if (Elts.size() != NegAbsElts.
size()) {
3883 assert(ModOpcode == TargetOpcode::G_FABS);
3891AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
3896 if (
GBuildVector *BV = dyn_cast<GBuildVector>(
MRI->getVRegDef(Src))) {
3897 assert(BV->getNumSources() > 0);
3900 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
3903 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
3904 ElF32 =
MRI->getVRegDef(BV->getSourceReg(i));
3911 if (BV->getNumSources() == EltsF32.
size()) {
3922AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
3928 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
3936 if (CV->getNumSources() == EltsV2F16.
size()) {
3948AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
3954 assert(CV->getNumSources() > 0);
3957 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
3961 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
3962 ElV2F16 =
MRI->getVRegDef(CV->getSourceReg(i));
3969 if (CV->getNumSources() == EltsV2F16.
size()) {
3981AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
3982 std::optional<FPValueAndVReg> FPValReg;
3986 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
4006AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
4012 std::optional<ValueAndVReg> ShiftAmt;
4014 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4015 ShiftAmt->Value.getZExtValue() % 8 == 0) {
4016 Key = ShiftAmt->Value.getZExtValue() / 8;
4027AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
4034 std::optional<ValueAndVReg> ShiftAmt;
4036 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4037 ShiftAmt->Value.getZExtValue() == 16) {
4049AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
4052 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4062AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
4065 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4073 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4080AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
4083 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4091 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4097bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
4107 getAddrModeInfo(*
MI, *MRI, AddrInfo);
4109 if (AddrInfo.
empty())
4112 const GEPInfo &GEPI = AddrInfo[0];
4113 std::optional<int64_t> EncodedImm;
4118 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
4119 AddrInfo.
size() > 1) {
4120 const GEPInfo &GEPI2 = AddrInfo[1];
4121 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
4124 Base = GEPI2.SgprParts[0];
4125 *SOffset = OffsetReg;
4135 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
4147 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
4148 Base = GEPI.SgprParts[0];
4154 if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
4160 Base = GEPI.SgprParts[0];
4161 *SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4162 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
4167 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
4169 Base = GEPI.SgprParts[0];
4170 *SOffset = OffsetReg;
4179AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
4182 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset))
4183 return std::nullopt;
4190AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
4192 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
4194 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
4195 return std::nullopt;
4197 const GEPInfo &GEPInfo = AddrInfo[0];
4198 Register PtrReg = GEPInfo.SgprParts[0];
4199 std::optional<int64_t> EncodedImm =
4202 return std::nullopt;
4211AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
4213 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr))
4214 return std::nullopt;
4221AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
4224 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset))
4225 return std::nullopt;
4232std::pair<Register, int>
4233AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
4243 int64_t ConstOffset;
4244 std::tie(PtrBase, ConstOffset) =
4245 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4248 !isFlatScratchBaseLegal(Root.
getReg())))
4255 return std::pair(PtrBase, ConstOffset);
4259AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
4269AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
4279AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
4290AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
4293 int64_t ConstOffset;
4294 int64_t ImmOffset = 0;
4298 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4300 if (ConstOffset != 0) {
4304 ImmOffset = ConstOffset;
4307 if (isSGPR(PtrBaseDef->Reg)) {
4308 if (ConstOffset > 0) {
4314 int64_t SplitImmOffset, RemainderOffset;
4318 if (isUInt<32>(RemainderOffset)) {
4322 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4324 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4326 .
addImm(RemainderOffset);
4343 unsigned NumLiterals =
4347 return std::nullopt;
4354 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4359 if (isSGPR(SAddr)) {
4360 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
4380 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
4381 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
4382 return std::nullopt;
4388 Register VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4390 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
4401AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
4404 int64_t ConstOffset;
4405 int64_t ImmOffset = 0;
4409 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4411 if (ConstOffset != 0 && isFlatScratchBaseLegal(
Addr) &&
4415 ImmOffset = ConstOffset;
4419 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4420 int FI = AddrDef->MI->getOperand(1).getIndex();
4429 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4430 Register LHS = AddrDef->MI->getOperand(1).getReg();
4431 Register RHS = AddrDef->MI->getOperand(2).getReg();
4435 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
4436 isSGPR(RHSDef->Reg)) {
4437 int FI = LHSDef->MI->getOperand(1).getIndex();
4441 SAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4443 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
4451 return std::nullopt;
4460bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
4472 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
4474 return (VMax & 3) + (
SMax & 3) >= 4;
4478AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
4481 int64_t ConstOffset;
4482 int64_t ImmOffset = 0;
4486 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4489 if (ConstOffset != 0 &&
4492 ImmOffset = ConstOffset;
4496 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
4497 return std::nullopt;
4499 Register RHS = AddrDef->MI->getOperand(2).getReg();
4501 return std::nullopt;
4503 Register LHS = AddrDef->MI->getOperand(1).getReg();
4506 if (OrigAddr !=
Addr) {
4507 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
4508 return std::nullopt;
4510 if (!isFlatScratchBaseLegalSV(OrigAddr))
4511 return std::nullopt;
4514 if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
4515 return std::nullopt;
4517 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4518 int FI = LHSDef->MI->getOperand(1).getIndex();
4527 return std::nullopt;
4537AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
4546 Register HighBits =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4551 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4575 std::optional<int> FI;
4579 int64_t ConstOffset;
4580 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
4581 if (ConstOffset != 0) {
4586 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
4592 }
else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4593 FI = RootDef->getOperand(1).getIndex();
4616bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
4629bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
4631 unsigned Size)
const {
4632 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
4634 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
4647 return Addr->getOpcode() == TargetOpcode::G_OR ||
4648 (
Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
4655bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {