29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
35using namespace MIPatternMatch;
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 :
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI),
TM(
TM),
50#include
"AMDGPUGenGlobalISel.inc"
53#include
"AMDGPUGenGlobalISel.inc"
71 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
72 ? Def->getOperand(1).getReg()
76bool AMDGPUInstructionSelector::isVCC(
Register Reg,
82 auto &RegClassOrBank =
MRI.getRegClassOrRegBank(Reg);
86 const LLT Ty =
MRI.getType(Reg);
90 return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&
95 return RB->
getID() == AMDGPU::VCCRegBankID;
98bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
99 unsigned NewOpc)
const {
100 MI.setDesc(TII.get(NewOpc));
115 if (!DstRC || DstRC != SrcRC)
122bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
125 I.setDesc(TII.get(TargetOpcode::COPY));
132 if (isVCC(DstReg, *MRI)) {
133 if (SrcReg == AMDGPU::SCC) {
141 if (!isVCC(SrcReg, *MRI)) {
149 std::optional<ValueAndVReg> ConstVal =
153 STI.
isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
155 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
157 Register MaskedReg =
MRI->createVirtualRegister(SrcRC);
165 IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
170 And.setOperandDead(3);
172 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
177 if (!
MRI->getRegClassOrNull(SrcReg))
178 MRI->setRegClass(SrcReg, SrcRC);
192 if (MO.getReg().isPhysical())
204bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
205 const Register DefReg =
I.getOperand(0).getReg();
206 const LLT DefTy =
MRI->getType(DefReg);
218 MRI->getRegClassOrRegBank(DefReg);
237 I.setDesc(TII.get(TargetOpcode::PHI));
244 unsigned SubIdx)
const {
248 Register DstReg =
MRI->createVirtualRegister(&SubRC);
251 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
253 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
254 .
addReg(Reg, 0, ComposedSubIdx);
279 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
281 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
283 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
289bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
290 Register DstReg =
I.getOperand(0).getReg();
294 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
295 DstRB->
getID() != AMDGPU::VCCRegBankID)
298 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
310bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
313 Register DstReg =
I.getOperand(0).getReg();
315 LLT Ty =
MRI->getType(DstReg);
321 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
322 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
326 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
329 .
add(
I.getOperand(1))
330 .
add(
I.getOperand(2))
337 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
338 I.setDesc(TII.get(Opc));
344 const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
350 .
add(
I.getOperand(1))
351 .
add(
I.getOperand(2))
357 assert(!Sub &&
"illegal sub should not reach here");
360 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
362 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
364 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
365 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
366 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
367 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
369 Register DstLo =
MRI->createVirtualRegister(&HalfRC);
370 Register DstHi =
MRI->createVirtualRegister(&HalfRC);
373 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
376 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
382 Register CarryReg =
MRI->createVirtualRegister(CarryRC);
383 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
399 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
413bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
418 Register Dst0Reg =
I.getOperand(0).getReg();
419 Register Dst1Reg =
I.getOperand(1).getReg();
420 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
421 I.getOpcode() == AMDGPU::G_UADDE;
422 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
423 I.getOpcode() == AMDGPU::G_USUBE;
425 if (isVCC(Dst1Reg, *MRI)) {
426 unsigned NoCarryOpc =
427 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
428 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
429 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
435 Register Src0Reg =
I.getOperand(2).getReg();
436 Register Src1Reg =
I.getOperand(3).getReg();
439 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
440 .
addReg(
I.getOperand(4).getReg());
443 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
444 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
446 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
447 .
add(
I.getOperand(2))
448 .
add(
I.getOperand(3));
450 if (
MRI->use_nodbg_empty(Dst1Reg)) {
453 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
455 if (!
MRI->getRegClassOrNull(Dst1Reg))
456 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
466 AMDGPU::SReg_32RegClass, *MRI))
473bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
477 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
481 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
482 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
484 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
485 I.setDesc(TII.get(Opc));
487 I.addImplicitDefUseOperands(*
MF);
492bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
494 Register DstReg =
I.getOperand(0).getReg();
495 Register SrcReg =
I.getOperand(1).getReg();
496 LLT DstTy =
MRI->getType(DstReg);
497 LLT SrcTy =
MRI->getType(SrcReg);
502 unsigned Offset =
I.getOperand(2).getImm();
503 if (
Offset % 32 != 0 || DstSize > 128)
523 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
528 *SrcRC,
I.getOperand(1));
530 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
537bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
540 LLT DstTy =
MRI->getType(DstReg);
541 LLT SrcTy =
MRI->getType(
MI.getOperand(1).getReg());
557 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
558 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
572 MI.eraseFromParent();
576bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
578 const int NumDst =
MI.getNumOperands() - 1;
584 LLT DstTy =
MRI->getType(DstReg0);
585 LLT SrcTy =
MRI->getType(SrcReg);
601 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
603 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
604 .
addReg(SrcReg, 0, SubRegs[
I]);
607 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
617 MI.eraseFromParent();
621bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
622 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
623 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
627 LLT SrcTy =
MRI->getType(Src0);
631 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
632 return selectG_MERGE_VALUES(
MI);
639 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
644 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
647 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
648 DstBank->
getID() == AMDGPU::VGPRRegBankID);
649 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
662 const int64_t K0 = ConstSrc0->Value.getSExtValue();
663 const int64_t K1 = ConstSrc1->Value.getSExtValue();
671 MI.eraseFromParent();
677 MI.eraseFromParent();
689 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
690 MI.setDesc(TII.get(AMDGPU::COPY));
693 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
700 Register TmpReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
701 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
707 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
714 MI.eraseFromParent();
739 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
740 if (Shift0 && Shift1) {
741 Opc = AMDGPU::S_PACK_HH_B32_B16;
742 MI.getOperand(1).setReg(ShiftSrc0);
743 MI.getOperand(2).setReg(ShiftSrc1);
745 Opc = AMDGPU::S_PACK_LH_B32_B16;
746 MI.getOperand(2).setReg(ShiftSrc1);
750 if (ConstSrc1 && ConstSrc1->Value == 0) {
752 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
757 MI.eraseFromParent();
761 Opc = AMDGPU::S_PACK_HL_B32_B16;
762 MI.getOperand(1).setReg(ShiftSrc0);
766 MI.setDesc(TII.get(Opc));
770bool AMDGPUInstructionSelector::selectG_PTR_ADD(
MachineInstr &
I)
const {
771 return selectG_ADD_SUB(
I);
774bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
780 if ((!RC && !
MRI->getRegBankOrNull(MO.
getReg())) ||
782 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
789bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
792 Register DstReg =
I.getOperand(0).getReg();
793 Register Src0Reg =
I.getOperand(1).getReg();
794 Register Src1Reg =
I.getOperand(2).getReg();
795 LLT Src1Ty =
MRI->getType(Src1Reg);
797 unsigned DstSize =
MRI->getType(DstReg).getSizeInBits();
800 int64_t
Offset =
I.getOperand(3).getImm();
803 if (
Offset % 32 != 0 || InsSize % 32 != 0)
811 if (
SubReg == AMDGPU::NoSubRegister)
829 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
830 if (!Src0RC || !Src1RC)
839 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
848bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
851 Register OffsetReg =
MI.getOperand(2).getReg();
852 Register WidthReg =
MI.getOperand(3).getReg();
855 "scalar BFX instructions are expanded in regbankselect");
856 assert(
MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
857 "64-bit vector BFX instructions are expanded in regbankselect");
862 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
863 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
868 MI.eraseFromParent();
872bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
891 Register InterpMov =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
897 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
913 MI.eraseFromParent();
922bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
931 Register LaneSelect =
MI.getOperand(3).getReg();
934 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
936 std::optional<ValueAndVReg> ConstSelect =
942 MIB.
addImm(ConstSelect->Value.getSExtValue() &
945 std::optional<ValueAndVReg> ConstVal =
952 MIB.
addImm(ConstVal->Value.getSExtValue());
962 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
970 MI.eraseFromParent();
976bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
980 LLT Ty =
MRI->getType(Dst0);
983 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
985 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
996 unsigned ChooseDenom =
MI.getOperand(5).getImm();
998 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1011 MI.eraseFromParent();
1015bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1016 unsigned IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
1017 switch (IntrinsicID) {
1018 case Intrinsic::amdgcn_if_break: {
1023 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1024 .
add(
I.getOperand(0))
1025 .
add(
I.getOperand(2))
1026 .
add(
I.getOperand(3));
1028 Register DstReg =
I.getOperand(0).getReg();
1029 Register Src0Reg =
I.getOperand(2).getReg();
1030 Register Src1Reg =
I.getOperand(3).getReg();
1032 I.eraseFromParent();
1034 for (
Register Reg : { DstReg, Src0Reg, Src1Reg })
1039 case Intrinsic::amdgcn_interp_p1_f16:
1040 return selectInterpP1F16(
I);
1041 case Intrinsic::amdgcn_wqm:
1042 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1043 case Intrinsic::amdgcn_softwqm:
1044 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1045 case Intrinsic::amdgcn_strict_wwm:
1046 case Intrinsic::amdgcn_wwm:
1047 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1048 case Intrinsic::amdgcn_strict_wqm:
1049 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1050 case Intrinsic::amdgcn_writelane:
1051 return selectWritelane(
I);
1052 case Intrinsic::amdgcn_div_scale:
1053 return selectDivScale(
I);
1054 case Intrinsic::amdgcn_icmp:
1055 case Intrinsic::amdgcn_fcmp:
1058 return selectIntrinsicCmp(
I);
1059 case Intrinsic::amdgcn_ballot:
1060 return selectBallot(
I);
1061 case Intrinsic::amdgcn_inverse_ballot:
1062 return selectInverseBallot(
I);
1063 case Intrinsic::amdgcn_reloc_constant:
1064 return selectRelocConstant(
I);
1065 case Intrinsic::amdgcn_groupstaticsize:
1066 return selectGroupStaticSize(
I);
1067 case Intrinsic::returnaddress:
1068 return selectReturnAddress(
I);
1069 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1070 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1071 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1072 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1073 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1074 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1075 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1076 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1077 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1078 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1079 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1080 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1081 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1082 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1083 return selectSMFMACIntrin(
I);
1094 if (
Size == 16 && !ST.has16BitInsts())
1097 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
unsigned S32Opc,
1100 return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc;
1110 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1111 AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64);
1113 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1114 AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64);
1116 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1117 AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64);
1119 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1120 AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64);
1122 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1123 AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64);
1125 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1126 AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64);
1128 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1129 AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64);
1131 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1132 AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64);
1134 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1135 AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64);
1137 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1138 AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64);
1141 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1142 AMDGPU::V_CMP_EQ_F32_e64, AMDGPU::V_CMP_EQ_F64_e64);
1144 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1145 AMDGPU::V_CMP_GT_F32_e64, AMDGPU::V_CMP_GT_F64_e64);
1147 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1148 AMDGPU::V_CMP_GE_F32_e64, AMDGPU::V_CMP_GE_F64_e64);
1150 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1151 AMDGPU::V_CMP_LT_F32_e64, AMDGPU::V_CMP_LT_F64_e64);
1153 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1154 AMDGPU::V_CMP_LE_F32_e64, AMDGPU::V_CMP_LE_F64_e64);
1156 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1157 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1159 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1160 AMDGPU::V_CMP_O_F32_e64, AMDGPU::V_CMP_O_F64_e64);
1162 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1163 AMDGPU::V_CMP_U_F32_e64, AMDGPU::V_CMP_U_F64_e64);
1165 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1166 AMDGPU::V_CMP_NLG_F32_e64, AMDGPU::V_CMP_NLG_F64_e64);
1168 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1169 AMDGPU::V_CMP_NLE_F32_e64, AMDGPU::V_CMP_NLE_F64_e64);
1171 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1172 AMDGPU::V_CMP_NLT_F32_e64, AMDGPU::V_CMP_NLT_F64_e64);
1174 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1175 AMDGPU::V_CMP_NGE_F32_e64, AMDGPU::V_CMP_NGE_F64_e64);
1177 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1178 AMDGPU::V_CMP_NGT_F32_e64, AMDGPU::V_CMP_NGT_F64_e64);
1180 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1181 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1183 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1184 AMDGPU::V_CMP_TRU_F32_e64, AMDGPU::V_CMP_TRU_F64_e64);
1186 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1187 AMDGPU::V_CMP_F_F32_e64, AMDGPU::V_CMP_F_F64_e64);
1192 unsigned Size)
const {
1199 return AMDGPU::S_CMP_LG_U64;
1201 return AMDGPU::S_CMP_EQ_U64;
1210 return AMDGPU::S_CMP_LG_U32;
1212 return AMDGPU::S_CMP_EQ_U32;
1214 return AMDGPU::S_CMP_GT_I32;
1216 return AMDGPU::S_CMP_GE_I32;
1218 return AMDGPU::S_CMP_LT_I32;
1220 return AMDGPU::S_CMP_LE_I32;
1222 return AMDGPU::S_CMP_GT_U32;
1224 return AMDGPU::S_CMP_GE_U32;
1226 return AMDGPU::S_CMP_LT_U32;
1228 return AMDGPU::S_CMP_LE_U32;
1230 return AMDGPU::S_CMP_EQ_F32;
1232 return AMDGPU::S_CMP_GT_F32;
1234 return AMDGPU::S_CMP_GE_F32;
1236 return AMDGPU::S_CMP_LT_F32;
1238 return AMDGPU::S_CMP_LE_F32;
1240 return AMDGPU::S_CMP_LG_F32;
1242 return AMDGPU::S_CMP_O_F32;
1244 return AMDGPU::S_CMP_U_F32;
1246 return AMDGPU::S_CMP_NLG_F32;
1248 return AMDGPU::S_CMP_NLE_F32;
1250 return AMDGPU::S_CMP_NLT_F32;
1252 return AMDGPU::S_CMP_NGE_F32;
1254 return AMDGPU::S_CMP_NGT_F32;
1256 return AMDGPU::S_CMP_NEQ_F32;
1268 return AMDGPU::S_CMP_EQ_F16;
1270 return AMDGPU::S_CMP_GT_F16;
1272 return AMDGPU::S_CMP_GE_F16;
1274 return AMDGPU::S_CMP_LT_F16;
1276 return AMDGPU::S_CMP_LE_F16;
1278 return AMDGPU::S_CMP_LG_F16;
1280 return AMDGPU::S_CMP_O_F16;
1282 return AMDGPU::S_CMP_U_F16;
1284 return AMDGPU::S_CMP_NLG_F16;
1286 return AMDGPU::S_CMP_NLE_F16;
1288 return AMDGPU::S_CMP_NLT_F16;
1290 return AMDGPU::S_CMP_NGE_F16;
1292 return AMDGPU::S_CMP_NGT_F16;
1294 return AMDGPU::S_CMP_NEQ_F16;
1303bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1308 Register SrcReg =
I.getOperand(2).getReg();
1313 Register CCReg =
I.getOperand(0).getReg();
1314 if (!isVCC(CCReg, *MRI)) {
1315 int Opcode = getS_CMPOpcode(Pred,
Size);
1319 .
add(
I.getOperand(2))
1320 .
add(
I.getOperand(3));
1321 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1326 I.eraseFromParent();
1330 if (
I.getOpcode() == AMDGPU::G_FCMP)
1338 I.getOperand(0).getReg())
1339 .
add(
I.getOperand(2))
1340 .
add(
I.getOperand(3));
1344 I.eraseFromParent();
1348bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1349 Register Dst =
I.getOperand(0).getReg();
1350 if (isVCC(Dst, *MRI))
1353 LLT DstTy =
MRI->getType(Dst);
1359 Register SrcReg =
I.getOperand(2).getReg();
1368 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1369 I.eraseFromParent();
1380 auto [Src0, Src0Mods] = selectVOP3ModsImpl(LHS);
1381 auto [Src1, Src1Mods] = selectVOP3ModsImpl(RHS);
1383 copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &
I,
true);
1385 copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &
I,
true);
1386 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1388 SelectedMI.
addImm(Src0Mods);
1389 SelectedMI.
addReg(Src0Reg);
1391 SelectedMI.
addImm(Src1Mods);
1392 SelectedMI.
addReg(Src1Reg);
1402 I.eraseFromParent();
1406bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1409 Register DstReg =
I.getOperand(0).getReg();
1410 const unsigned Size =
MRI->getType(DstReg).getSizeInBits();
1411 const bool Is64 =
Size == 64;
1419 std::optional<ValueAndVReg> Arg =
1422 const auto BuildCopy = [&](
Register SrcReg) {
1424 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), DstReg)
1430 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1432 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1440 const int64_t
Value = Arg->
Value.getSExtValue();
1442 unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1444 }
else if (
Value == -1)
1445 BuildCopy(IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC);
1449 BuildCopy(
I.getOperand(2).getReg());
1451 I.eraseFromParent();
1455bool AMDGPUInstructionSelector::selectInverseBallot(
MachineInstr &
I)
const {
1458 const Register DstReg =
I.getOperand(0).getReg();
1459 const Register MaskReg =
I.getOperand(2).getReg();
1462 I.eraseFromParent();
1466bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1467 Register DstReg =
I.getOperand(0).getReg();
1473 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1478 auto RelocSymbol = cast<GlobalVariable>(
1483 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1486 I.eraseFromParent();
1490bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1493 Register DstReg =
I.getOperand(0).getReg();
1495 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1496 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1513 I.eraseFromParent();
1517bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1524 unsigned Depth =
I.getOperand(2).getImm();
1537 I.eraseFromParent();
1548 AMDGPU::SReg_64RegClass,
DL);
1551 I.eraseFromParent();
1555bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1559 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1560 .
add(
MI.getOperand(1));
1563 MI.eraseFromParent();
1565 if (!
MRI->getRegClassOrNull(Reg))
1570bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1576 unsigned IndexOperand =
MI.getOperand(7).getImm();
1577 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1578 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1580 if (WaveDone && !WaveRelease)
1583 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1584 IndexOperand &= ~0x3f;
1585 unsigned CountDw = 0;
1588 CountDw = (IndexOperand >> 24) & 0xf;
1589 IndexOperand &= ~(0xf << 24);
1591 if (CountDw < 1 || CountDw > 4) {
1593 "ds_ordered_count: dword count must be between 1 and 4");
1600 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1603 unsigned Offset0 = OrderedCountIndex << 2;
1604 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (
Instruction << 4);
1607 Offset1 |= (CountDw - 1) << 6;
1610 Offset1 |= ShaderType << 2;
1612 unsigned Offset = Offset0 | (Offset1 << 8);
1621 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1630 MI.eraseFromParent();
1636 case Intrinsic::amdgcn_ds_gws_init:
1637 return AMDGPU::DS_GWS_INIT;
1638 case Intrinsic::amdgcn_ds_gws_barrier:
1639 return AMDGPU::DS_GWS_BARRIER;
1640 case Intrinsic::amdgcn_ds_gws_sema_v:
1641 return AMDGPU::DS_GWS_SEMA_V;
1642 case Intrinsic::amdgcn_ds_gws_sema_br:
1643 return AMDGPU::DS_GWS_SEMA_BR;
1644 case Intrinsic::amdgcn_ds_gws_sema_p:
1645 return AMDGPU::DS_GWS_SEMA_P;
1646 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1647 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1653bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1655 if (!STI.
hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1660 const bool HasVSrc =
MI.getNumOperands() == 3;
1661 assert(HasVSrc ||
MI.getNumOperands() == 2);
1663 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1665 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1679 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1680 Readfirstlane = OffsetDef;
1685 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1695 std::tie(BaseOffset, ImmOffset) =
1698 if (Readfirstlane) {
1708 AMDGPU::SReg_32RegClass, *MRI))
1712 Register M0Base =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1740 MI.eraseFromParent();
1744bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1745 bool IsAppend)
const {
1746 Register PtrBase =
MI.getOperand(2).getReg();
1747 LLT PtrTy =
MRI->getType(PtrBase);
1751 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1754 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1755 PtrBase =
MI.getOperand(2).getReg();
1761 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1772 MI.eraseFromParent();
1776bool AMDGPUInstructionSelector::selectSBarrier(
MachineInstr &
MI)
const {
1783 MI.eraseFromParent();
1796 MI.eraseFromParent();
1808 TFE = (TexFailCtrl & 0x1) ?
true :
false;
1810 LWE = (TexFailCtrl & 0x2) ?
true :
false;
1813 return TexFailCtrl == 0;
1816bool AMDGPUInstructionSelector::selectImageIntrinsic(
1825 unsigned IntrOpcode =
Intr->BaseOpcode;
1830 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
1834 int NumVDataDwords = -1;
1835 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
1836 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
1842 Unorm =
MI.getOperand(ArgOffset +
Intr->UnormIndex).getImm() != 0;
1846 bool IsTexFail =
false;
1848 TFE, LWE, IsTexFail))
1851 const int Flags =
MI.getOperand(ArgOffset +
Intr->NumArgs).getImm();
1852 const bool IsA16 = (
Flags & 1) != 0;
1853 const bool IsG16 = (
Flags & 2) != 0;
1856 if (IsA16 && !STI.
hasG16() && !IsG16)
1860 unsigned DMaskLanes = 0;
1862 if (BaseOpcode->
Atomic) {
1863 VDataOut =
MI.getOperand(0).getReg();
1864 VDataIn =
MI.getOperand(2).getReg();
1865 LLT Ty =
MRI->getType(VDataIn);
1868 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
1873 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
1875 DMask = Is64Bit ? 0xf : 0x3;
1876 NumVDataDwords = Is64Bit ? 4 : 2;
1878 DMask = Is64Bit ? 0x3 : 0x1;
1879 NumVDataDwords = Is64Bit ? 2 : 1;
1882 DMask =
MI.getOperand(ArgOffset +
Intr->DMaskIndex).getImm();
1885 if (BaseOpcode->
Store) {
1886 VDataIn =
MI.getOperand(1).getReg();
1887 VDataTy =
MRI->getType(VDataIn);
1890 VDataOut =
MI.getOperand(0).getReg();
1891 VDataTy =
MRI->getType(VDataOut);
1892 NumVDataDwords = DMaskLanes;
1895 NumVDataDwords = (DMaskLanes + 1) / 2;
1900 if (Subtarget->
hasG16() && IsG16) {
1904 IntrOpcode = G16MappingInfo->
G16;
1908 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
1910 unsigned CPol =
MI.getOperand(ArgOffset +
Intr->CachePolicyIndex).getImm();
1917 int NumVAddrRegs = 0;
1918 int NumVAddrDwords = 0;
1919 for (
unsigned I =
Intr->VAddrStart; I < Intr->VAddrEnd;
I++) {
1922 if (!AddrOp.
isReg())
1930 NumVAddrDwords += (
MRI->getType(
Addr).getSizeInBits() + 31) / 32;
1937 NumVAddrRegs != 1 &&
1939 : NumVAddrDwords == NumVAddrRegs);
1940 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
1951 NumVDataDwords, NumVAddrDwords);
1952 }
else if (IsGFX11Plus) {
1954 UseNSA ? AMDGPU::MIMGEncGfx11NSA
1955 : AMDGPU::MIMGEncGfx11Default,
1956 NumVDataDwords, NumVAddrDwords);
1957 }
else if (IsGFX10Plus) {
1959 UseNSA ? AMDGPU::MIMGEncGfx10NSA
1960 : AMDGPU::MIMGEncGfx10Default,
1961 NumVDataDwords, NumVAddrDwords);
1965 NumVDataDwords, NumVAddrDwords);
1969 <<
"requested image instruction is not supported on this GPU\n");
1976 NumVDataDwords, NumVAddrDwords);
1979 NumVDataDwords, NumVAddrDwords);
1989 const bool Is64 =
MRI->getType(VDataOut).getSizeInBits() == 64;
1992 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
1993 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
1996 if (!
MRI->use_empty(VDataOut)) {
2009 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2011 if (
SrcOp.isReg()) {
2017 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->RsrcIndex).getReg());
2019 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->SampIndex).getReg());
2030 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2032 MIB.
addImm(IsA16 ? -1 : 0);
2046 MIB.
addImm(IsD16 ? -1 : 0);
2052 assert(VDataOut && !VDataIn);
2053 Register Tied =
MRI->cloneVirtualRegister(VDataOut);
2055 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::V_MOV_B32_e32), Zero)
2062 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), Tied);
2063 for (
auto Sub : Parts)
2064 RegSeq.addReg(Zero).addImm(Sub);
2069 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
2071 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), Tied);
2072 for (
auto Sub : Parts.drop_back(1))
2073 RegSeq.addReg(Undef).addImm(Sub);
2074 RegSeq.addReg(Zero).addImm(Parts.back());
2080 MI.eraseFromParent();
2088bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2099 unsigned Offset =
MI.getOperand(6).getImm();
2101 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)
2109 MI.eraseFromParent();
2113bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2115 unsigned IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
2116 switch (IntrinsicID) {
2117 case Intrinsic::amdgcn_end_cf:
2118 return selectEndCfIntrinsic(
I);
2119 case Intrinsic::amdgcn_ds_ordered_add:
2120 case Intrinsic::amdgcn_ds_ordered_swap:
2121 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2122 case Intrinsic::amdgcn_ds_gws_init:
2123 case Intrinsic::amdgcn_ds_gws_barrier:
2124 case Intrinsic::amdgcn_ds_gws_sema_v:
2125 case Intrinsic::amdgcn_ds_gws_sema_br:
2126 case Intrinsic::amdgcn_ds_gws_sema_p:
2127 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2128 return selectDSGWSIntrinsic(
I, IntrinsicID);
2129 case Intrinsic::amdgcn_ds_append:
2130 return selectDSAppendConsume(
I,
true);
2131 case Intrinsic::amdgcn_ds_consume:
2132 return selectDSAppendConsume(
I,
false);
2133 case Intrinsic::amdgcn_s_barrier:
2134 return selectSBarrier(
I);
2135 case Intrinsic::amdgcn_raw_buffer_load_lds:
2136 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2137 case Intrinsic::amdgcn_struct_buffer_load_lds:
2138 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2139 return selectBufferLoadLds(
I);
2140 case Intrinsic::amdgcn_global_load_lds:
2141 return selectGlobalLoadLds(
I);
2142 case Intrinsic::amdgcn_exp_compr:
2146 F,
"intrinsic not supported on subtarget",
I.getDebugLoc(),
DS_Error);
2147 F.getContext().diagnose(NoFpRet);
2151 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2152 return selectDSBvhStackIntrinsic(
I);
2153 case Intrinsic::amdgcn_s_barrier_init:
2154 case Intrinsic::amdgcn_s_barrier_join:
2155 case Intrinsic::amdgcn_s_wakeup_barrier:
2156 case Intrinsic::amdgcn_s_get_barrier_state:
2157 return selectNamedBarrierInst(
I, IntrinsicID);
2158 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2159 case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
2160 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2161 case Intrinsic::amdgcn_s_barrier_leave:
2162 return selectSBarrierLeave(
I);
2167bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2174 Register DstReg =
I.getOperand(0).getReg();
2179 if (!isVCC(CCReg, *MRI)) {
2180 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2181 AMDGPU::S_CSELECT_B32;
2188 if (!
MRI->getRegClassOrNull(CCReg))
2191 .
add(
I.getOperand(2))
2192 .
add(
I.getOperand(3));
2197 I.eraseFromParent();
2206 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2208 .
add(
I.getOperand(3))
2210 .
add(
I.getOperand(2))
2211 .
add(
I.getOperand(1));
2214 I.eraseFromParent();
2221 return AMDGPU::sub0;
2223 return AMDGPU::sub0_sub1;
2225 return AMDGPU::sub0_sub1_sub2;
2227 return AMDGPU::sub0_sub1_sub2_sub3;
2229 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
2232 return AMDGPU::sub0;
2239bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2240 Register DstReg =
I.getOperand(0).getReg();
2241 Register SrcReg =
I.getOperand(1).getReg();
2242 const LLT DstTy =
MRI->getType(DstReg);
2243 const LLT SrcTy =
MRI->getType(SrcReg);
2258 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2267 if (!SrcRC || !DstRC)
2280 Register LoReg =
MRI->createVirtualRegister(DstRC);
2281 Register HiReg =
MRI->createVirtualRegister(DstRC);
2283 .
addReg(SrcReg, 0, AMDGPU::sub0);
2285 .
addReg(SrcReg, 0, AMDGPU::sub1);
2287 if (IsVALU && STI.
hasSDWA()) {
2291 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2301 Register TmpReg0 =
MRI->createVirtualRegister(DstRC);
2302 Register TmpReg1 =
MRI->createVirtualRegister(DstRC);
2303 Register ImmReg =
MRI->createVirtualRegister(DstRC);
2305 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2315 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2316 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2317 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2329 And.setOperandDead(3);
2330 Or.setOperandDead(3);
2334 I.eraseFromParent();
2343 if (SubRegIdx == -1)
2349 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2353 if (SrcWithSubRC != SrcRC) {
2358 I.getOperand(1).setSubReg(SubRegIdx);
2361 I.setDesc(TII.get(TargetOpcode::COPY));
2367 Mask = maskTrailingOnes<unsigned>(
Size);
2368 int SignedMask =
static_cast<int>(Mask);
2369 return SignedMask >= -16 && SignedMask <= 64;
2373const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2386bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2387 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2388 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2391 const Register DstReg =
I.getOperand(0).getReg();
2392 const Register SrcReg =
I.getOperand(1).getReg();
2394 const LLT DstTy =
MRI->getType(DstReg);
2395 const LLT SrcTy =
MRI->getType(SrcReg);
2396 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2403 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2406 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2408 return selectCOPY(
I);
2411 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2414 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2416 Register UndefReg =
MRI->createVirtualRegister(SrcRC);
2417 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2423 I.eraseFromParent();
2429 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2439 I.eraseFromParent();
2443 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2449 I.eraseFromParent();
2453 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2455 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2459 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2460 const unsigned SextOpc = SrcSize == 8 ?
2461 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2464 I.eraseFromParent();
2470 if (DstSize > 32 && SrcSize == 32) {
2471 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2472 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2487 I.eraseFromParent();
2492 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2493 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2496 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2498 Register ExtReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2499 Register UndefReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2500 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2502 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2513 I.eraseFromParent();
2529 I.eraseFromParent();
2547bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2551 Register Dst =
I.getOperand(0).getReg();
2553 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2556 Register Src =
I.getOperand(1).getReg();
2562 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2564 I.eraseFromParent();
2572bool AMDGPUInstructionSelector::selectG_CONSTANT(
MachineInstr &
I)
const {
2575 Register DstReg =
I.getOperand(0).getReg();
2576 unsigned Size =
MRI->getType(DstReg).getSizeInBits();
2584 }
else if (ImmOp.
isCImm()) {
2591 const bool IsSgpr = DstRB->
getID() == AMDGPU::SGPRRegBankID;
2594 if (DstRB->
getID() == AMDGPU::VCCRegBankID) {
2595 Opcode = STI.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2596 }
else if (
Size == 64 &&
2598 Opcode = IsSgpr ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::V_MOV_B64_PSEUDO;
2599 I.setDesc(TII.get(Opcode));
2600 I.addImplicitDefUseOperands(*
MF);
2603 Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2613 I.setDesc(TII.get(Opcode));
2614 I.addImplicitDefUseOperands(*
MF);
2624 ResInst =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
2625 .
addImm(
I.getOperand(1).getImm());
2628 &AMDGPU::SReg_32RegClass : &AMDGPU::VGPR_32RegClass;
2638 ResInst =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
2647 I.eraseFromParent();
2649 TRI.getConstrainedRegClassForOperand(ResInst->
getOperand(0), *MRI);
2655bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2669 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2684 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2685 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2686 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2687 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2689 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2690 .
addReg(Src, 0, AMDGPU::sub0);
2691 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2692 .
addReg(Src, 0, AMDGPU::sub1);
2693 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2697 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2702 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2707 MI.eraseFromParent();
2712bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2715 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2722 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2723 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2724 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2725 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2731 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2732 .
addReg(Src, 0, AMDGPU::sub0);
2733 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2734 .
addReg(Src, 0, AMDGPU::sub1);
2735 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2740 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2744 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2750 MI.eraseFromParent();
2755 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2758void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2761 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2763 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2767 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2772 for (
unsigned i = 1; i != 3; ++i) {
2779 assert(GEPInfo.Imm == 0);
2784 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2785 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2787 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2791 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2794bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2795 return RBI.
getRegBank(Reg, *MRI, TRI)->
getID() == AMDGPU::SGPRRegBankID;
2798bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2799 if (!
MI.hasOneMemOperand())
2809 if (!
Ptr || isa<UndefValue>(
Ptr) || isa<Argument>(
Ptr) ||
2810 isa<Constant>(
Ptr) || isa<GlobalValue>(
Ptr))
2816 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2818 AMDGPU::SGPRRegBankID;
2821 return I &&
I->getMetadata(
"amdgpu.uniform");
2825 for (
const GEPInfo &GEPInfo : AddrInfo) {
2826 if (!GEPInfo.VgprParts.empty())
2832void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2833 const LLT PtrTy =
MRI->getType(
I.getOperand(1).getReg());
2840 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2845bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2852 if (Reg.isPhysical())
2856 const unsigned Opcode =
MI.getOpcode();
2858 if (Opcode == AMDGPU::COPY)
2861 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2862 Opcode == AMDGPU::G_XOR)
2866 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI))
2867 return GI->is(Intrinsic::amdgcn_class);
2869 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2872bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
2887 if (!isVCC(CondReg, *MRI)) {
2891 CondPhysReg = AMDGPU::SCC;
2892 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
2893 ConstrainRC = &AMDGPU::SReg_32RegClass;
2901 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
2902 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
2905 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
2912 CondPhysReg =
TRI.getVCC();
2913 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
2914 ConstrainRC =
TRI.getBoolRC();
2917 if (!
MRI->getRegClassOrNull(CondReg))
2918 MRI->setRegClass(CondReg, ConstrainRC);
2920 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
2923 .
addMBB(
I.getOperand(1).getMBB());
2925 I.eraseFromParent();
2929bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
2931 Register DstReg =
I.getOperand(0).getReg();
2933 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2934 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
2939 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
2942bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
2943 Register DstReg =
I.getOperand(0).getReg();
2944 Register SrcReg =
I.getOperand(1).getReg();
2945 Register MaskReg =
I.getOperand(2).getReg();
2946 LLT Ty =
MRI->getType(DstReg);
2947 LLT MaskTy =
MRI->getType(MaskReg);
2954 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2964 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
2965 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
2968 !CanCopyLow32 && !CanCopyHi32) {
2969 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
2973 I.eraseFromParent();
2977 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2979 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
2984 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
2993 "ptrmask should have been narrowed during legalize");
2995 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3001 I.eraseFromParent();
3005 Register HiReg =
MRI->createVirtualRegister(&RegRC);
3006 Register LoReg =
MRI->createVirtualRegister(&RegRC);
3009 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3010 .
addReg(SrcReg, 0, AMDGPU::sub0);
3011 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3012 .
addReg(SrcReg, 0, AMDGPU::sub1);
3021 Register MaskLo =
MRI->createVirtualRegister(&RegRC);
3022 MaskedLo =
MRI->createVirtualRegister(&RegRC);
3024 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3025 .
addReg(MaskReg, 0, AMDGPU::sub0);
3026 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3035 Register MaskHi =
MRI->createVirtualRegister(&RegRC);
3036 MaskedHi =
MRI->createVirtualRegister(&RegRC);
3038 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3039 .
addReg(MaskReg, 0, AMDGPU::sub1);
3040 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3045 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3050 I.eraseFromParent();
3056static std::pair<Register, unsigned>
3063 std::tie(IdxBaseReg,
Offset) =
3065 if (IdxBaseReg == AMDGPU::NoRegister) {
3069 IdxBaseReg = IdxReg;
3076 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3077 return std::pair(IdxReg, SubRegs[0]);
3078 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3081bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3087 LLT DstTy =
MRI->getType(DstReg);
3088 LLT SrcTy =
MRI->getType(SrcReg);
3096 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3100 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3102 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3103 if (!SrcRC || !DstRC)
3118 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3122 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3125 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3129 MI.eraseFromParent();
3137 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3139 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3142 MI.eraseFromParent();
3153 MI.eraseFromParent();
3158bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3165 LLT VecTy =
MRI->getType(DstReg);
3166 LLT ValTy =
MRI->getType(ValReg);
3178 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3182 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3184 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3192 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3196 std::tie(IdxReg,
SubReg) =
3199 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3206 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3210 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3215 MI.eraseFromParent();
3227 MI.eraseFromParent();
3231bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3234 unsigned Size =
MI.getOperand(3).getImm();
3237 const bool HasVIndex =
MI.getNumOperands() == 9;
3241 VIndex =
MI.getOperand(4).getReg();
3245 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3246 std::optional<ValueAndVReg> MaybeVOffset =
3248 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3254 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3255 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3256 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3257 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3260 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3261 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3262 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3263 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3266 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3267 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3268 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3269 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3276 .
add(
MI.getOperand(2));
3280 if (HasVIndex && HasVOffset) {
3281 Register IdxReg =
MRI->createVirtualRegister(
TRI.getVGPR64Class());
3282 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3289 }
else if (HasVIndex) {
3291 }
else if (HasVOffset) {
3295 MIB.
add(
MI.getOperand(1));
3296 MIB.
add(
MI.getOperand(5 + OpOffset));
3297 MIB.
add(
MI.getOperand(6 + OpOffset));
3298 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3304 LoadPtrI.
Offset =
MI.getOperand(6 + OpOffset).getImm();
3306 StorePtrI.
V =
nullptr;
3320 MI.eraseFromParent();
3332 if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3335 assert(Def->getNumOperands() == 3 &&
3338 return Def->getOperand(1).getReg();
3344bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3346 unsigned Size =
MI.getOperand(3).getImm();
3352 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3355 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3358 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3365 .
add(
MI.getOperand(2));
3371 if (!isSGPR(
Addr)) {
3373 if (isSGPR(AddrDef->Reg)) {
3374 Addr = AddrDef->Reg;
3375 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3378 if (isSGPR(SAddr)) {
3379 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3391 VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3403 MIB.
add(
MI.getOperand(4))
3404 .
add(
MI.getOperand(5));
3408 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3418 sizeof(int32_t),
Align(4));
3422 MI.eraseFromParent();
3426bool AMDGPUInstructionSelector::selectBVHIntrinsic(
MachineInstr &
MI)
const{
3427 MI.setDesc(TII.get(
MI.getOperand(1).getImm()));
3428 MI.removeOperand(1);
3429 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3433bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3436 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3437 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3439 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3440 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3442 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3443 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3445 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3446 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3448 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3449 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3451 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3452 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3454 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3455 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3457 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3458 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3460 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3461 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3463 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3464 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3466 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3467 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3469 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3470 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3472 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3473 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3475 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3476 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3482 auto VDst_In =
MI.getOperand(4);
3484 MI.setDesc(TII.get(Opc));
3485 MI.removeOperand(4);
3486 MI.removeOperand(1);
3487 MI.addOperand(VDst_In);
3488 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3492bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3496 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3501 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3512 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3516 MI.eraseFromParent();
3520bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
3533 WaveAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3543 MI.eraseFromParent();
3549 if (!
I.isPreISelOpcode()) {
3551 return selectCOPY(
I);
3555 switch (
I.getOpcode()) {
3556 case TargetOpcode::G_AND:
3557 case TargetOpcode::G_OR:
3558 case TargetOpcode::G_XOR:
3561 return selectG_AND_OR_XOR(
I);
3562 case TargetOpcode::G_ADD:
3563 case TargetOpcode::G_SUB:
3566 return selectG_ADD_SUB(
I);
3567 case TargetOpcode::G_UADDO:
3568 case TargetOpcode::G_USUBO:
3569 case TargetOpcode::G_UADDE:
3570 case TargetOpcode::G_USUBE:
3571 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
3572 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3573 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3574 return selectG_AMDGPU_MAD_64_32(
I);
3575 case TargetOpcode::G_INTTOPTR:
3576 case TargetOpcode::G_BITCAST:
3577 case TargetOpcode::G_PTRTOINT:
3578 return selectCOPY(
I);
3579 case TargetOpcode::G_CONSTANT:
3580 case TargetOpcode::G_FCONSTANT:
3581 return selectG_CONSTANT(
I);
3582 case TargetOpcode::G_FNEG:
3585 return selectG_FNEG(
I);
3586 case TargetOpcode::G_FABS:
3589 return selectG_FABS(
I);
3590 case TargetOpcode::G_EXTRACT:
3591 return selectG_EXTRACT(
I);
3592 case TargetOpcode::G_MERGE_VALUES:
3593 case TargetOpcode::G_CONCAT_VECTORS:
3594 return selectG_MERGE_VALUES(
I);
3595 case TargetOpcode::G_UNMERGE_VALUES:
3596 return selectG_UNMERGE_VALUES(
I);
3597 case TargetOpcode::G_BUILD_VECTOR:
3598 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
3599 return selectG_BUILD_VECTOR(
I);
3600 case TargetOpcode::G_PTR_ADD:
3603 return selectG_PTR_ADD(
I);
3604 case TargetOpcode::G_IMPLICIT_DEF:
3605 return selectG_IMPLICIT_DEF(
I);
3606 case TargetOpcode::G_FREEZE:
3607 return selectCOPY(
I);
3608 case TargetOpcode::G_INSERT:
3609 return selectG_INSERT(
I);
3610 case TargetOpcode::G_INTRINSIC:
3611 case TargetOpcode::G_INTRINSIC_CONVERGENT:
3612 return selectG_INTRINSIC(
I);
3613 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3614 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
3615 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
3616 case TargetOpcode::G_ICMP:
3617 case TargetOpcode::G_FCMP:
3618 if (selectG_ICMP_or_FCMP(
I))
3621 case TargetOpcode::G_LOAD:
3622 case TargetOpcode::G_STORE:
3623 case TargetOpcode::G_ATOMIC_CMPXCHG:
3624 case TargetOpcode::G_ATOMICRMW_XCHG:
3625 case TargetOpcode::G_ATOMICRMW_ADD:
3626 case TargetOpcode::G_ATOMICRMW_SUB:
3627 case TargetOpcode::G_ATOMICRMW_AND:
3628 case TargetOpcode::G_ATOMICRMW_OR:
3629 case TargetOpcode::G_ATOMICRMW_XOR:
3630 case TargetOpcode::G_ATOMICRMW_MIN:
3631 case TargetOpcode::G_ATOMICRMW_MAX:
3632 case TargetOpcode::G_ATOMICRMW_UMIN:
3633 case TargetOpcode::G_ATOMICRMW_UMAX:
3634 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
3635 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
3636 case TargetOpcode::G_ATOMICRMW_FADD:
3637 case AMDGPU::G_AMDGPU_ATOMIC_FMIN:
3638 case AMDGPU::G_AMDGPU_ATOMIC_FMAX:
3639 return selectG_LOAD_STORE_ATOMICRMW(
I);
3640 case TargetOpcode::G_SELECT:
3641 return selectG_SELECT(
I);
3642 case TargetOpcode::G_TRUNC:
3643 return selectG_TRUNC(
I);
3644 case TargetOpcode::G_SEXT:
3645 case TargetOpcode::G_ZEXT:
3646 case TargetOpcode::G_ANYEXT:
3647 case TargetOpcode::G_SEXT_INREG:
3654 return selectG_SZA_EXT(
I);
3655 case TargetOpcode::G_FPEXT:
3656 if (selectG_FPEXT(
I))
3659 case TargetOpcode::G_BRCOND:
3660 return selectG_BRCOND(
I);
3661 case TargetOpcode::G_GLOBAL_VALUE:
3662 return selectG_GLOBAL_VALUE(
I);
3663 case TargetOpcode::G_PTRMASK:
3664 return selectG_PTRMASK(
I);
3665 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3666 return selectG_EXTRACT_VECTOR_ELT(
I);
3667 case TargetOpcode::G_INSERT_VECTOR_ELT:
3668 return selectG_INSERT_VECTOR_ELT(
I);
3669 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3670 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3671 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3672 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3675 assert(
Intr &&
"not an image intrinsic with image pseudo");
3676 return selectImageIntrinsic(
I,
Intr);
3678 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
3679 return selectBVHIntrinsic(
I);
3680 case AMDGPU::G_SBFX:
3681 case AMDGPU::G_UBFX:
3682 return selectG_SBFX_UBFX(
I);
3683 case AMDGPU::G_SI_CALL:
3684 I.setDesc(TII.get(AMDGPU::SI_CALL));
3686 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
3687 return selectWaveAddress(
I);
3688 case AMDGPU::G_STACKRESTORE:
3689 return selectStackRestore(
I);
3691 return selectPHI(
I);
3699AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
3706std::pair<Register, unsigned>
3707AMDGPUInstructionSelector::selectVOP3ModsImpl(
MachineOperand &Root,
3708 bool IsCanonicalizing,
3709 bool AllowAbs,
bool OpSel)
const {
3714 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
3715 Src =
MI->getOperand(1).getReg();
3718 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
3723 if (LHS &&
LHS->isZero()) {
3725 Src =
MI->getOperand(2).getReg();
3729 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
3730 Src =
MI->getOperand(1).getReg();
3737 return std::pair(Src, Mods);
3740Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
3742 bool ForceVGPR)
const {
3743 if ((Mods != 0 || ForceVGPR) &&
3751 TII.get(AMDGPU::COPY), VGPRSrc)
3763AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
3770AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
3773 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
3777 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3786AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
3789 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
3795 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3804AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
3813AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
3816 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
3820 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3827AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
3831 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
false);
3835 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3842AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
3845 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
true,
3850 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3857AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
3860 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
3867std::pair<Register, unsigned>
3868AMDGPUInstructionSelector::selectVOP3PModsImpl(
3873 if (
MI &&
MI->getOpcode() == AMDGPU::G_FNEG &&
3878 Src =
MI->getOperand(1).getReg();
3879 MI =
MRI.getVRegDef(Src);
3890 return std::pair(Src, Mods);
3894AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
3900 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI);
3909AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
3915 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true);
3924AMDGPUInstructionSelector::selectVOP3PModsNeg(
MachineOperand &Root)
const {
3929 "expected i1 value");
3939AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
3942 "expected i1 value");
3956 switch (Elts.
size()) {
3958 DstRegClass = &AMDGPU::VReg_256RegClass;
3961 DstRegClass = &AMDGPU::VReg_128RegClass;
3964 DstRegClass = &AMDGPU::VReg_64RegClass;
3971 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
3972 .addDef(
MRI.createVirtualRegister(DstRegClass));
3973 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3984 if (ModOpcode == TargetOpcode::G_FNEG) {
3988 for (
auto El : Elts) {
3994 if (Elts.size() != NegAbsElts.
size()) {
4003 assert(ModOpcode == TargetOpcode::G_FABS);
4011AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
4016 if (
GBuildVector *BV = dyn_cast<GBuildVector>(
MRI->getVRegDef(Src))) {
4017 assert(BV->getNumSources() > 0);
4020 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
4023 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
4024 ElF32 =
MRI->getVRegDef(BV->getSourceReg(i));
4031 if (BV->getNumSources() == EltsF32.
size()) {
4042AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
4048 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4056 if (CV->getNumSources() == EltsV2F16.
size()) {
4068AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
4074 assert(CV->getNumSources() > 0);
4077 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
4081 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4082 ElV2F16 =
MRI->getVRegDef(CV->getSourceReg(i));
4089 if (CV->getNumSources() == EltsV2F16.
size()) {
4101AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
4102 std::optional<FPValueAndVReg> FPValReg;
4106 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
4126AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
4132 std::optional<ValueAndVReg> ShiftAmt;
4134 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4135 ShiftAmt->Value.getZExtValue() % 8 == 0) {
4136 Key = ShiftAmt->Value.getZExtValue() / 8;
4147AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
4154 std::optional<ValueAndVReg> ShiftAmt;
4156 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4157 ShiftAmt->Value.getZExtValue() == 16) {
4169AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
4172 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
4182AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
4185 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
4193 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4200AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
4203 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
4211 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4217bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
4227 getAddrModeInfo(*
MI, *MRI, AddrInfo);
4229 if (AddrInfo.
empty())
4232 const GEPInfo &GEPI = AddrInfo[0];
4233 std::optional<int64_t> EncodedImm =
4237 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
4238 AddrInfo.
size() > 1) {
4239 const GEPInfo &GEPI2 = AddrInfo[1];
4240 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
4243 Base = GEPI2.SgprParts[0];
4244 *SOffset = OffsetReg;
4253 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
4254 Base = GEPI.SgprParts[0];
4260 if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
4266 Base = GEPI.SgprParts[0];
4267 *SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4268 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
4273 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
4275 Base = GEPI.SgprParts[0];
4276 *SOffset = OffsetReg;
4285AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
4288 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset))
4289 return std::nullopt;
4296AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
4298 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
4300 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
4301 return std::nullopt;
4303 const GEPInfo &GEPInfo = AddrInfo[0];
4304 Register PtrReg = GEPInfo.SgprParts[0];
4305 std::optional<int64_t> EncodedImm =
4308 return std::nullopt;
4317AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
4319 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr))
4320 return std::nullopt;
4327AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
4330 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset))
4331 return std::nullopt;
4338std::pair<Register, int>
4339AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
4349 int64_t ConstOffset;
4350 std::tie(PtrBase, ConstOffset) =
4351 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4354 !isFlatScratchBaseLegal(Root.
getReg())))
4361 return std::pair(PtrBase, ConstOffset);
4365AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
4375AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
4385AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
4396AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
4399 int64_t ConstOffset;
4400 int64_t ImmOffset = 0;
4404 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4406 if (ConstOffset != 0) {
4410 ImmOffset = ConstOffset;
4413 if (isSGPR(PtrBaseDef->Reg)) {
4414 if (ConstOffset > 0) {
4420 int64_t SplitImmOffset, RemainderOffset;
4424 if (isUInt<32>(RemainderOffset)) {
4428 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4430 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4432 .
addImm(RemainderOffset);
4449 unsigned NumLiterals =
4453 return std::nullopt;
4460 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4465 if (isSGPR(SAddr)) {
4466 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
4486 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
4487 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
4488 return std::nullopt;
4494 Register VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4496 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
4507AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
4510 int64_t ConstOffset;
4511 int64_t ImmOffset = 0;
4515 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4517 if (ConstOffset != 0 && isFlatScratchBaseLegal(
Addr) &&
4521 ImmOffset = ConstOffset;
4525 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4526 int FI = AddrDef->MI->getOperand(1).getIndex();
4535 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4536 Register LHS = AddrDef->MI->getOperand(1).getReg();
4537 Register RHS = AddrDef->MI->getOperand(2).getReg();
4541 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
4542 isSGPR(RHSDef->Reg)) {
4543 int FI = LHSDef->MI->getOperand(1).getIndex();
4547 SAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4549 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
4557 return std::nullopt;
4566bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
4578 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
4580 return (VMax & 3) + (
SMax & 3) >= 4;
4584AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
4587 int64_t ConstOffset;
4588 int64_t ImmOffset = 0;
4592 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4595 if (ConstOffset != 0 &&
4598 ImmOffset = ConstOffset;
4602 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
4603 return std::nullopt;
4605 Register RHS = AddrDef->MI->getOperand(2).getReg();
4607 return std::nullopt;
4609 Register LHS = AddrDef->MI->getOperand(1).getReg();
4612 if (OrigAddr !=
Addr) {
4613 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
4614 return std::nullopt;
4616 if (!isFlatScratchBaseLegalSV(OrigAddr))
4617 return std::nullopt;
4620 if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
4621 return std::nullopt;
4623 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4624 int FI = LHSDef->MI->getOperand(1).getIndex();
4633 return std::nullopt;
4643AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
4652 Register HighBits =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4657 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4681 std::optional<int> FI;
4685 int64_t ConstOffset;
4686 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
4687 if (ConstOffset != 0) {
4692 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
4698 }
else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4699 FI = RootDef->getOperand(1).getIndex();
4722bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
4735bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
4737 unsigned Size)
const {
4738 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
4740 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
4753 return Addr->getOpcode() == TargetOpcode::G_OR ||
4754 (
Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
4761bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
4775 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
4776 std::optional<ValueAndVReg> RhsValReg =
4782 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
4783 RhsValReg->Value.getSExtValue() > -0x40000000)
4792bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
4810bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
4819 std::optional<DefinitionAndSourceRegister> BaseDef =
4821 std::optional<ValueAndVReg> RHSOffset =
4831 (RHSOffset->Value.getSExtValue() < 0 &&
4832 RHSOffset->Value.getSExtValue() > -0x40000000)))
4835 Register LHS = BaseDef->MI->getOperand(1).getReg();
4836 Register RHS = BaseDef->MI->getOperand(2).getReg();
4840bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
4841 unsigned ShAmtBits)
const {
4842 assert(
MI.getOpcode() == TargetOpcode::G_AND);
4844 std::optional<APInt>
RHS =
4849 if (
RHS->countr_one() >= ShAmtBits)
4853 return (LHSKnownZeros | *RHS).
countr_one() >= ShAmtBits;
4857AMDGPUInstructionSelector::selectMUBUFScratchOffset(
4862 std::optional<DefinitionAndSourceRegister>
Def =
4864 assert(Def &&
"this shouldn't be an optional result");
4919std::pair<Register, unsigned>
4920AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
4923 return std::pair(Root.
getReg(), 0);
4925 int64_t ConstAddr = 0;
4929 std::tie(PtrBase,
Offset) =
4930 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4933 if (isDSOffsetLegal(PtrBase,
Offset)) {
4935 return std::pair(PtrBase,
Offset);
4937 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
4946 return std::pair(Root.
getReg(), 0);
4950AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
4953 std::tie(Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
4961AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
4962 return selectDSReadWrite2(Root, 4);
4966AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
4967 return selectDSReadWrite2(Root, 8);
4971AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
4972 unsigned Size)
const {
4975 std::tie(Reg,
Offset) = selectDSReadWrite2Impl(Root,
Size);
4983std::pair<Register, unsigned>
4984AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
4985 unsigned Size)
const {
4988 return std::pair(Root.
getReg(), 0);
4990 int64_t ConstAddr = 0;
4994 std::tie(PtrBase,
Offset) =
4995 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4998 int64_t OffsetValue0 =
Offset;
5000 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
5002 return std::pair(PtrBase, OffsetValue0 /
Size);
5004 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
5012 return std::pair(Root.
getReg(), 0);
5019std::pair<Register, int64_t>
5020AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
5023 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
5027 std::optional<ValueAndVReg> MaybeOffset =
5043 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5044 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5045 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5046 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
5048 B.buildInstr(AMDGPU::S_MOV_B32)
5051 B.buildInstr(AMDGPU::S_MOV_B32)
5058 B.buildInstr(AMDGPU::REG_SEQUENCE)
5061 .addImm(AMDGPU::sub0)
5063 .addImm(AMDGPU::sub1);
5067 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5068 B.buildInstr(AMDGPU::S_MOV_B64)
5073 B.buildInstr(AMDGPU::REG_SEQUENCE)
5076 .addImm(AMDGPU::sub0_sub1)
5078 .addImm(AMDGPU::sub2_sub3);
5085 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5094 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5101AMDGPUInstructionSelector::MUBUFAddressData
5102AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
5103 MUBUFAddressData
Data;
5109 std::tie(PtrBase,
Offset) = getPtrBaseWithConstantOffset(Src, *MRI);
5110 if (isUInt<32>(
Offset)) {
5117 Data.N2 = InputAdd->getOperand(1).getReg();
5118 Data.N3 = InputAdd->getOperand(2).getReg();
5133bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData
Addr)
const {
5140 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
5146void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
5152 SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5153 B.buildInstr(AMDGPU::S_MOV_B32)
5159bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
5167 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5168 if (!shouldUseAddr64(AddrData))
5174 Offset = AddrData.Offset;
5180 if (RBI.
getRegBank(N2, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5182 if (RBI.
getRegBank(N3, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5195 }
else if (RBI.
getRegBank(N0, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5206 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5210bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
5218 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5219 if (shouldUseAddr64(AddrData))
5225 Offset = AddrData.Offset;
5231 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5236AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
5242 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
5258 MIB.
addReg(AMDGPU::SGPR_NULL);
5272AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
5277 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
5288 MIB.
addReg(AMDGPU::SGPR_NULL);
5300AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
5305 SOffset = AMDGPU::SGPR_NULL;
5311static std::optional<uint64_t>
5315 if (!OffsetVal || !isInt<32>(*OffsetVal))
5316 return std::nullopt;
5317 return Lo_32(*OffsetVal);
5321AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
5326 std::optional<int64_t> EncodedImm =
5335AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
5342 std::optional<int64_t> EncodedImm =
5351AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
5359 return std::nullopt;
5361 std::optional<int64_t> EncodedOffset =
5364 return std::nullopt;
5374 if (
MI->getOpcode() == AMDGPU::G_BITCAST)
5385 if (Inst->
getOpcode() != AMDGPU::G_TRUNC)
5393 if (TruncOp->
getOpcode() == AMDGPU::G_LSHR) {
5396 if (SrlAmount && SrlAmount->Value.getZExtValue() == 16) {
5407 if (TruncOp->
getOpcode() == AMDGPU::G_SHUFFLE_VECTOR) {
5412 assert(Mask.size() == 2);
5414 if (Mask[0] == 1 && Mask[1] <= 1) {
5424std::pair<Register, unsigned>
5425AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
5426 bool &Matched)
const {
5431 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
5434 if (
MI->getOpcode() == AMDGPU::G_FPEXT) {
5443 if (
MI->getOpcode() == AMDGPU::G_BITCAST) {
5444 MO = &
MI->getOperand(1);
5449 const auto CheckAbsNeg = [&]() {
5454 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(*MO);
5476 MI = ExtractHiEltMI;
5477 MO = &
MI->getOperand(0);
5490AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
5495 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5506AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
5510 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5518bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
5522 Register CCReg =
I.getOperand(0).getReg();
5524 bool HasM0 = IntrID == Intrinsic::amdgcn_s_barrier_signal_isfirst_var;
5527 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
5528 .
addReg(
I.getOperand(2).getReg());
5529 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0));
5533 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
5534 .
addImm(
I.getOperand(2).getImm());
5539 I.eraseFromParent();
5545 if (HasInlineConst) {
5549 case Intrinsic::amdgcn_s_barrier_init:
5550 return AMDGPU::S_BARRIER_INIT_IMM;
5551 case Intrinsic::amdgcn_s_barrier_join:
5552 return AMDGPU::S_BARRIER_JOIN_IMM;
5553 case Intrinsic::amdgcn_s_wakeup_barrier:
5554 return AMDGPU::S_WAKEUP_BARRIER_IMM;
5555 case Intrinsic::amdgcn_s_get_barrier_state:
5556 return AMDGPU::S_GET_BARRIER_STATE_IMM;
5562 case Intrinsic::amdgcn_s_barrier_init:
5563 return AMDGPU::S_BARRIER_INIT_M0;
5564 case Intrinsic::amdgcn_s_barrier_join:
5565 return AMDGPU::S_BARRIER_JOIN_M0;
5566 case Intrinsic::amdgcn_s_wakeup_barrier:
5567 return AMDGPU::S_WAKEUP_BARRIER_M0;
5568 case Intrinsic::amdgcn_s_get_barrier_state:
5569 return AMDGPU::S_GET_BARRIER_STATE_M0;
5574bool AMDGPUInstructionSelector::selectNamedBarrierInst(
5578 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_barrier_state
5581 std::optional<int64_t> BarValImm =
5587 if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
5588 Register MemberCount =
I.getOperand(2).getReg();
5589 TmpReg0 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5600 if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
5604 Register TmpReg1 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5625 if (IntrID == Intrinsic::amdgcn_s_get_barrier_state)
5626 MIB.
addDef(
I.getOperand(0).getReg());
5631 I.eraseFromParent();
5635bool AMDGPUInstructionSelector::selectSBarrierLeave(
MachineInstr &
I)
const {
5638 Register CCReg =
I.getOperand(0).getReg();
5640 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_LEAVE));
5643 I.eraseFromParent();
5651 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5652 "Expected G_CONSTANT");
5653 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
5659 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5660 "Expected G_CONSTANT");
5661 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
5670 if (
MI.getOpcode() == TargetOpcode::G_FCONSTANT)
5671 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
5673 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
"Expected G_CONSTANT");
5674 MIB.
addImm(
Op.getCImm()->getSExtValue());
5681 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5682 "Expected G_CONSTANT");
5683 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
5691 MIB.
addImm(
MI.getOperand(OpIdx).getImm());
5697 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5704 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5705 MIB.
addImm(
MI.getOperand(OpIdx).getImm() &
5713 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5714 const bool Swizzle =
MI.getOperand(OpIdx).getImm() &
5720void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
5722 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5723 const uint32_t Cpol =
MI.getOperand(OpIdx).getImm() &
5738 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
5740 assert(ExpVal != INT_MIN);
5744bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
5748bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelKnownBits &KnownBits)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static int sizeToSubRegIndex(unsigned Size)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg)
Match a zero extend from a 32-bit value to 64-bits.
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static MachineInstr * stripBitCast(MachineInstr *MI, MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
amdgpu AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
static Error getAddrSpace(StringRef R, unsigned &AddrSpace)
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelKnownBits *KB, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
bool isEntryFunction() const
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
LLVM_READONLY int getExactLog2Abs() const
APInt bitcastToAPInt() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
Represents a G_BUILD_VECTOR.
bool useVGPRIndexMode() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatInstOffsets() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
bool hasRestrictedSOffset() const
const SITargetLowering * getTargetLowering() const override
bool usePRTStrictNull() const
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
bool hasPartialNSAEncoding() const
Represents a G_CONCAT_VECTORS.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelKnownBits *kb, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
APInt getKnownOnes(Register R)
KnownBits getKnownBits(Register R)
bool signBitIsZero(Register Op)
APInt getKnownZeroes(Register R)
Module * getParent()
Get the module that this global value is contained inside of...
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Describe properties that are true of each instruction in the target description file.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setReturnAddressIsTaken(bool s)
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
const ConstantFP * getFPImm() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
T get() const
Returns the value of the specified pointer type.
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Analysis providing profile information.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCRegister getReturnAddressReg(const MachineFunction &MF) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
const Triple & getTargetTriple() const
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
OSType getOS() const
Get the parsed operating system type of this triple.
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Value(Type *Ty, unsigned scid)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
operand_type_match m_Reg()
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
int popcount(T Value) noexcept
Count the number of set bits in a value.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
unsigned getUndefRegState(bool B)
@ SMax
Signed integer max implemented in terms of select(cmp()).
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
This class contains a discriminated union of information about pointers in memory operands,...
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.