29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
35using namespace MIPatternMatch;
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 :
TII(*STI.getInstrInfo()),
TRI(*STI.getRegisterInfo()), RBI(RBI),
TM(
TM),
50#include
"AMDGPUGenGlobalISel.inc"
53#include
"AMDGPUGenGlobalISel.inc"
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
77bool AMDGPUInstructionSelector::isVCC(
Register Reg,
83 auto &RegClassOrBank =
MRI.getRegClassOrRegBank(Reg);
87 const LLT Ty =
MRI.getType(Reg);
91 return MRI.getVRegDef(Reg)->getOpcode() != AMDGPU::G_TRUNC &&
96 return RB->
getID() == AMDGPU::VCCRegBankID;
99bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
100 unsigned NewOpc)
const {
101 MI.setDesc(TII.get(NewOpc));
116 if (!DstRC || DstRC != SrcRC)
123bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
126 I.setDesc(TII.get(TargetOpcode::COPY));
133 if (isVCC(DstReg, *MRI)) {
134 if (SrcReg == AMDGPU::SCC) {
142 if (!isVCC(SrcReg, *MRI)) {
150 std::optional<ValueAndVReg> ConstVal =
154 STI.
isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
156 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
158 Register MaskedReg =
MRI->createVirtualRegister(SrcRC);
166 IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
171 And.setOperandDead(3);
173 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
178 if (!
MRI->getRegClassOrNull(SrcReg))
179 MRI->setRegClass(SrcReg, SrcRC);
193 if (MO.getReg().isPhysical())
205bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
206 const Register DefReg =
I.getOperand(0).getReg();
207 const LLT DefTy =
MRI->getType(DefReg);
219 MRI->getRegClassOrRegBank(DefReg);
238 I.setDesc(TII.get(TargetOpcode::PHI));
245 unsigned SubIdx)
const {
249 Register DstReg =
MRI->createVirtualRegister(&SubRC);
252 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
254 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
255 .
addReg(Reg, 0, ComposedSubIdx);
280 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
282 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
284 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
290bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
291 Register DstReg =
I.getOperand(0).getReg();
295 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
296 DstRB->
getID() != AMDGPU::VCCRegBankID)
299 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
311bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
314 Register DstReg =
I.getOperand(0).getReg();
316 LLT Ty =
MRI->getType(DstReg);
322 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
323 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
327 const unsigned Opc = Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
330 .
add(
I.getOperand(1))
331 .
add(
I.getOperand(2))
338 const unsigned Opc = Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
339 I.setDesc(TII.get(Opc));
345 const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
351 .
add(
I.getOperand(1))
352 .
add(
I.getOperand(2))
358 assert(!Sub &&
"illegal sub should not reach here");
361 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
363 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
365 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
366 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
367 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
368 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
370 Register DstLo =
MRI->createVirtualRegister(&HalfRC);
371 Register DstHi =
MRI->createVirtualRegister(&HalfRC);
374 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
377 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
383 Register CarryReg =
MRI->createVirtualRegister(CarryRC);
384 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
400 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
414bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
419 Register Dst0Reg =
I.getOperand(0).getReg();
420 Register Dst1Reg =
I.getOperand(1).getReg();
421 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
422 I.getOpcode() == AMDGPU::G_UADDE;
423 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
424 I.getOpcode() == AMDGPU::G_USUBE;
426 if (isVCC(Dst1Reg, *MRI)) {
427 unsigned NoCarryOpc =
428 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
429 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
430 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
436 Register Src0Reg =
I.getOperand(2).getReg();
437 Register Src1Reg =
I.getOperand(3).getReg();
440 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
441 .
addReg(
I.getOperand(4).getReg());
444 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
445 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
447 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
448 .
add(
I.getOperand(2))
449 .
add(
I.getOperand(3));
451 if (
MRI->use_nodbg_empty(Dst1Reg)) {
454 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
456 if (!
MRI->getRegClassOrNull(Dst1Reg))
457 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
467 AMDGPU::SReg_32RegClass, *MRI))
474bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
478 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
482 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
483 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
485 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
486 I.setDesc(TII.get(Opc));
488 I.addImplicitDefUseOperands(*
MF);
493bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
495 Register DstReg =
I.getOperand(0).getReg();
496 Register SrcReg =
I.getOperand(1).getReg();
497 LLT DstTy =
MRI->getType(DstReg);
498 LLT SrcTy =
MRI->getType(SrcReg);
503 unsigned Offset =
I.getOperand(2).getImm();
504 if (
Offset % 32 != 0 || DstSize > 128)
524 SrcRC = TRI.getSubClassWithSubReg(SrcRC,
SubReg);
529 *SrcRC,
I.getOperand(1));
531 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
538bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
541 LLT DstTy =
MRI->getType(DstReg);
542 LLT SrcTy =
MRI->getType(
MI.getOperand(1).getReg());
558 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
559 for (
int I = 0, E =
MI.getNumOperands() - 1;
I != E; ++
I) {
573 MI.eraseFromParent();
577bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
579 const int NumDst =
MI.getNumOperands() - 1;
585 LLT DstTy =
MRI->getType(DstReg0);
586 LLT SrcTy =
MRI->getType(SrcReg);
602 for (
int I = 0, E = NumDst;
I != E; ++
I) {
604 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
605 .
addReg(SrcReg, 0, SubRegs[
I]);
608 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
618 MI.eraseFromParent();
622bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
623 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
624 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
628 LLT SrcTy =
MRI->getType(Src0);
632 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
633 return selectG_MERGE_VALUES(
MI);
640 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
645 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
648 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
649 DstBank->
getID() == AMDGPU::VGPRRegBankID);
650 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
663 const int64_t K0 = ConstSrc0->Value.getSExtValue();
664 const int64_t K1 = ConstSrc1->Value.getSExtValue();
672 MI.eraseFromParent();
678 MI.eraseFromParent();
690 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
691 MI.setDesc(TII.get(AMDGPU::COPY));
694 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
701 Register TmpReg =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
702 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
708 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
715 MI.eraseFromParent();
740 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
741 if (Shift0 && Shift1) {
742 Opc = AMDGPU::S_PACK_HH_B32_B16;
743 MI.getOperand(1).setReg(ShiftSrc0);
744 MI.getOperand(2).setReg(ShiftSrc1);
746 Opc = AMDGPU::S_PACK_LH_B32_B16;
747 MI.getOperand(2).setReg(ShiftSrc1);
751 if (ConstSrc1 && ConstSrc1->Value == 0) {
753 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
758 MI.eraseFromParent();
762 Opc = AMDGPU::S_PACK_HL_B32_B16;
763 MI.getOperand(1).setReg(ShiftSrc0);
767 MI.setDesc(TII.get(Opc));
771bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
777 if ((!RC && !
MRI->getRegBankOrNull(MO.
getReg())) ||
779 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
786bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
789 Register DstReg =
I.getOperand(0).getReg();
790 Register Src0Reg =
I.getOperand(1).getReg();
791 Register Src1Reg =
I.getOperand(2).getReg();
792 LLT Src1Ty =
MRI->getType(Src1Reg);
794 unsigned DstSize =
MRI->getType(DstReg).getSizeInBits();
797 int64_t
Offset =
I.getOperand(3).getImm();
800 if (
Offset % 32 != 0 || InsSize % 32 != 0)
808 if (
SubReg == AMDGPU::NoSubRegister)
826 Src0RC = TRI.getSubClassWithSubReg(Src0RC,
SubReg);
827 if (!Src0RC || !Src1RC)
836 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
845bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
848 Register OffsetReg =
MI.getOperand(2).getReg();
849 Register WidthReg =
MI.getOperand(3).getReg();
852 "scalar BFX instructions are expanded in regbankselect");
853 assert(
MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
854 "64-bit vector BFX instructions are expanded in regbankselect");
859 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
860 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
865 MI.eraseFromParent();
869bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
888 Register InterpMov =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
894 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
910 MI.eraseFromParent();
919bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
928 Register LaneSelect =
MI.getOperand(3).getReg();
931 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
933 std::optional<ValueAndVReg> ConstSelect =
939 MIB.
addImm(ConstSelect->Value.getSExtValue() &
942 std::optional<ValueAndVReg> ConstVal =
949 MIB.
addImm(ConstVal->Value.getSExtValue());
959 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
967 MI.eraseFromParent();
973bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
977 LLT Ty =
MRI->getType(Dst0);
980 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
982 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
993 unsigned ChooseDenom =
MI.getOperand(5).getImm();
995 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1008 MI.eraseFromParent();
1012bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1013 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
1014 switch (IntrinsicID) {
1015 case Intrinsic::amdgcn_if_break: {
1020 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1021 .
add(
I.getOperand(0))
1022 .
add(
I.getOperand(2))
1023 .
add(
I.getOperand(3));
1025 Register DstReg =
I.getOperand(0).getReg();
1026 Register Src0Reg =
I.getOperand(2).getReg();
1027 Register Src1Reg =
I.getOperand(3).getReg();
1029 I.eraseFromParent();
1031 for (
Register Reg : { DstReg, Src0Reg, Src1Reg })
1036 case Intrinsic::amdgcn_interp_p1_f16:
1037 return selectInterpP1F16(
I);
1038 case Intrinsic::amdgcn_wqm:
1039 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1040 case Intrinsic::amdgcn_softwqm:
1041 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1042 case Intrinsic::amdgcn_strict_wwm:
1043 case Intrinsic::amdgcn_wwm:
1044 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1045 case Intrinsic::amdgcn_strict_wqm:
1046 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1047 case Intrinsic::amdgcn_writelane:
1048 return selectWritelane(
I);
1049 case Intrinsic::amdgcn_div_scale:
1050 return selectDivScale(
I);
1051 case Intrinsic::amdgcn_icmp:
1052 case Intrinsic::amdgcn_fcmp:
1055 return selectIntrinsicCmp(
I);
1056 case Intrinsic::amdgcn_ballot:
1057 return selectBallot(
I);
1058 case Intrinsic::amdgcn_reloc_constant:
1059 return selectRelocConstant(
I);
1060 case Intrinsic::amdgcn_groupstaticsize:
1061 return selectGroupStaticSize(
I);
1062 case Intrinsic::returnaddress:
1063 return selectReturnAddress(
I);
1064 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1065 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1066 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1067 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1068 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1069 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1070 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1071 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1072 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1073 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1074 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1075 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1076 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1077 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1078 return selectSMFMACIntrin(
I);
1089 if (
Size == 16 && !ST.has16BitInsts())
1092 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
unsigned S32Opc,
1095 return ST.hasTrue16BitInsts() ? TrueS16Opc : S16Opc;
1105 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1106 AMDGPU::V_CMP_NE_U32_e64, AMDGPU::V_CMP_NE_U64_e64);
1108 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1109 AMDGPU::V_CMP_EQ_U32_e64, AMDGPU::V_CMP_EQ_U64_e64);
1111 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1112 AMDGPU::V_CMP_GT_I32_e64, AMDGPU::V_CMP_GT_I64_e64);
1114 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1115 AMDGPU::V_CMP_GE_I32_e64, AMDGPU::V_CMP_GE_I64_e64);
1117 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1118 AMDGPU::V_CMP_LT_I32_e64, AMDGPU::V_CMP_LT_I64_e64);
1120 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1121 AMDGPU::V_CMP_LE_I32_e64, AMDGPU::V_CMP_LE_I64_e64);
1123 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1124 AMDGPU::V_CMP_GT_U32_e64, AMDGPU::V_CMP_GT_U64_e64);
1126 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1127 AMDGPU::V_CMP_GE_U32_e64, AMDGPU::V_CMP_GE_U64_e64);
1129 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1130 AMDGPU::V_CMP_LT_U32_e64, AMDGPU::V_CMP_LT_U64_e64);
1132 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1133 AMDGPU::V_CMP_LE_U32_e64, AMDGPU::V_CMP_LE_U64_e64);
1136 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1137 AMDGPU::V_CMP_EQ_F32_e64, AMDGPU::V_CMP_EQ_F64_e64);
1139 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1140 AMDGPU::V_CMP_GT_F32_e64, AMDGPU::V_CMP_GT_F64_e64);
1142 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1143 AMDGPU::V_CMP_GE_F32_e64, AMDGPU::V_CMP_GE_F64_e64);
1145 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1146 AMDGPU::V_CMP_LT_F32_e64, AMDGPU::V_CMP_LT_F64_e64);
1148 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1149 AMDGPU::V_CMP_LE_F32_e64, AMDGPU::V_CMP_LE_F64_e64);
1151 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1152 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1154 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1155 AMDGPU::V_CMP_O_F32_e64, AMDGPU::V_CMP_O_F64_e64);
1157 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1158 AMDGPU::V_CMP_U_F32_e64, AMDGPU::V_CMP_U_F64_e64);
1160 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1161 AMDGPU::V_CMP_NLG_F32_e64, AMDGPU::V_CMP_NLG_F64_e64);
1163 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1164 AMDGPU::V_CMP_NLE_F32_e64, AMDGPU::V_CMP_NLE_F64_e64);
1166 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1167 AMDGPU::V_CMP_NLT_F32_e64, AMDGPU::V_CMP_NLT_F64_e64);
1169 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1170 AMDGPU::V_CMP_NGE_F32_e64, AMDGPU::V_CMP_NGE_F64_e64);
1172 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1173 AMDGPU::V_CMP_NGT_F32_e64, AMDGPU::V_CMP_NGT_F64_e64);
1175 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1176 AMDGPU::V_CMP_NEQ_F32_e64, AMDGPU::V_CMP_NEQ_F64_e64);
1178 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1179 AMDGPU::V_CMP_TRU_F32_e64, AMDGPU::V_CMP_TRU_F64_e64);
1181 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1182 AMDGPU::V_CMP_F_F32_e64, AMDGPU::V_CMP_F_F64_e64);
1187 unsigned Size)
const {
1194 return AMDGPU::S_CMP_LG_U64;
1196 return AMDGPU::S_CMP_EQ_U64;
1205 return AMDGPU::S_CMP_LG_U32;
1207 return AMDGPU::S_CMP_EQ_U32;
1209 return AMDGPU::S_CMP_GT_I32;
1211 return AMDGPU::S_CMP_GE_I32;
1213 return AMDGPU::S_CMP_LT_I32;
1215 return AMDGPU::S_CMP_LE_I32;
1217 return AMDGPU::S_CMP_GT_U32;
1219 return AMDGPU::S_CMP_GE_U32;
1221 return AMDGPU::S_CMP_LT_U32;
1223 return AMDGPU::S_CMP_LE_U32;
1225 return AMDGPU::S_CMP_EQ_F32;
1227 return AMDGPU::S_CMP_GT_F32;
1229 return AMDGPU::S_CMP_GE_F32;
1231 return AMDGPU::S_CMP_LT_F32;
1233 return AMDGPU::S_CMP_LE_F32;
1235 return AMDGPU::S_CMP_LG_F32;
1237 return AMDGPU::S_CMP_O_F32;
1239 return AMDGPU::S_CMP_U_F32;
1241 return AMDGPU::S_CMP_NLG_F32;
1243 return AMDGPU::S_CMP_NLE_F32;
1245 return AMDGPU::S_CMP_NLT_F32;
1247 return AMDGPU::S_CMP_NGE_F32;
1249 return AMDGPU::S_CMP_NGT_F32;
1251 return AMDGPU::S_CMP_NEQ_F32;
1263 return AMDGPU::S_CMP_EQ_F16;
1265 return AMDGPU::S_CMP_GT_F16;
1267 return AMDGPU::S_CMP_GE_F16;
1269 return AMDGPU::S_CMP_LT_F16;
1271 return AMDGPU::S_CMP_LE_F16;
1273 return AMDGPU::S_CMP_LG_F16;
1275 return AMDGPU::S_CMP_O_F16;
1277 return AMDGPU::S_CMP_U_F16;
1279 return AMDGPU::S_CMP_NLG_F16;
1281 return AMDGPU::S_CMP_NLE_F16;
1283 return AMDGPU::S_CMP_NLT_F16;
1285 return AMDGPU::S_CMP_NGE_F16;
1287 return AMDGPU::S_CMP_NGT_F16;
1289 return AMDGPU::S_CMP_NEQ_F16;
1298bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1303 Register SrcReg =
I.getOperand(2).getReg();
1308 Register CCReg =
I.getOperand(0).getReg();
1309 if (!isVCC(CCReg, *MRI)) {
1310 int Opcode = getS_CMPOpcode(Pred,
Size);
1314 .
add(
I.getOperand(2))
1315 .
add(
I.getOperand(3));
1316 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1321 I.eraseFromParent();
1325 if (
I.getOpcode() == AMDGPU::G_FCMP)
1333 I.getOperand(0).getReg())
1334 .
add(
I.getOperand(2))
1335 .
add(
I.getOperand(3));
1339 I.eraseFromParent();
1343bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1344 Register Dst =
I.getOperand(0).getReg();
1345 if (isVCC(Dst, *MRI))
1348 LLT DstTy =
MRI->getType(Dst);
1354 Register SrcReg =
I.getOperand(2).getReg();
1363 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1364 I.eraseFromParent();
1375 auto [Src0, Src0Mods] = selectVOP3ModsImpl(LHS);
1376 auto [Src1, Src1Mods] = selectVOP3ModsImpl(RHS);
1378 copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &
I,
true);
1380 copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &
I,
true);
1381 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1383 SelectedMI.
addImm(Src0Mods);
1384 SelectedMI.
addReg(Src0Reg);
1386 SelectedMI.
addImm(Src1Mods);
1387 SelectedMI.
addReg(Src1Reg);
1397 I.eraseFromParent();
1401bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1404 Register DstReg =
I.getOperand(0).getReg();
1405 const unsigned Size =
MRI->getType(DstReg).getSizeInBits();
1406 const bool Is64 =
Size == 64;
1414 std::optional<ValueAndVReg> Arg =
1417 const auto BuildCopy = [&](
Register SrcReg) {
1419 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), DstReg)
1425 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1427 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1435 const int64_t
Value = Arg->
Value.getSExtValue();
1437 unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1439 }
else if (
Value == -1)
1440 BuildCopy(IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC);
1444 BuildCopy(
I.getOperand(2).getReg());
1446 I.eraseFromParent();
1450bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1451 Register DstReg =
I.getOperand(0).getReg();
1457 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1462 auto RelocSymbol = cast<GlobalVariable>(
1467 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1470 I.eraseFromParent();
1474bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1477 Register DstReg =
I.getOperand(0).getReg();
1479 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1480 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1497 I.eraseFromParent();
1501bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1508 unsigned Depth =
I.getOperand(2).getImm();
1521 I.eraseFromParent();
1532 AMDGPU::SReg_64RegClass,
DL);
1535 I.eraseFromParent();
1539bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1543 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1544 .
add(
MI.getOperand(1));
1547 MI.eraseFromParent();
1549 if (!
MRI->getRegClassOrNull(Reg))
1554bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1560 unsigned IndexOperand =
MI.getOperand(7).getImm();
1561 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1562 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1564 if (WaveDone && !WaveRelease)
1567 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1568 IndexOperand &= ~0x3f;
1569 unsigned CountDw = 0;
1572 CountDw = (IndexOperand >> 24) & 0xf;
1573 IndexOperand &= ~(0xf << 24);
1575 if (CountDw < 1 || CountDw > 4) {
1577 "ds_ordered_count: dword count must be between 1 and 4");
1584 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1587 unsigned Offset0 = OrderedCountIndex << 2;
1588 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (
Instruction << 4);
1591 Offset1 |= (CountDw - 1) << 6;
1594 Offset1 |= ShaderType << 2;
1596 unsigned Offset = Offset0 | (Offset1 << 8);
1605 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1614 MI.eraseFromParent();
1620 case Intrinsic::amdgcn_ds_gws_init:
1621 return AMDGPU::DS_GWS_INIT;
1622 case Intrinsic::amdgcn_ds_gws_barrier:
1623 return AMDGPU::DS_GWS_BARRIER;
1624 case Intrinsic::amdgcn_ds_gws_sema_v:
1625 return AMDGPU::DS_GWS_SEMA_V;
1626 case Intrinsic::amdgcn_ds_gws_sema_br:
1627 return AMDGPU::DS_GWS_SEMA_BR;
1628 case Intrinsic::amdgcn_ds_gws_sema_p:
1629 return AMDGPU::DS_GWS_SEMA_P;
1630 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1631 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1637bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1639 if (!STI.
hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1644 const bool HasVSrc =
MI.getNumOperands() == 3;
1645 assert(HasVSrc ||
MI.getNumOperands() == 2);
1647 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1649 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1663 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
1664 Readfirstlane = OffsetDef;
1669 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
1679 std::tie(BaseOffset, ImmOffset) =
1682 if (Readfirstlane) {
1692 AMDGPU::SReg_32RegClass, *MRI))
1696 Register M0Base =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1724 MI.eraseFromParent();
1728bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
1729 bool IsAppend)
const {
1730 Register PtrBase =
MI.getOperand(2).getReg();
1731 LLT PtrTy =
MRI->getType(PtrBase);
1735 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
1738 if (!isDSOffsetLegal(PtrBase,
Offset)) {
1739 PtrBase =
MI.getOperand(2).getReg();
1745 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
1756 MI.eraseFromParent();
1760bool AMDGPUInstructionSelector::selectSBarrier(
MachineInstr &
MI)
const {
1767 MI.eraseFromParent();
1780 MI.eraseFromParent();
1792 TFE = (TexFailCtrl & 0x1) ?
true :
false;
1794 LWE = (TexFailCtrl & 0x2) ?
true :
false;
1797 return TexFailCtrl == 0;
1800bool AMDGPUInstructionSelector::selectImageIntrinsic(
1809 unsigned IntrOpcode =
Intr->BaseOpcode;
1814 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
1818 int NumVDataDwords = -1;
1819 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
1820 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
1826 Unorm =
MI.getOperand(ArgOffset +
Intr->UnormIndex).getImm() != 0;
1830 bool IsTexFail =
false;
1832 TFE, LWE, IsTexFail))
1835 const int Flags =
MI.getOperand(ArgOffset +
Intr->NumArgs).getImm();
1836 const bool IsA16 = (
Flags & 1) != 0;
1837 const bool IsG16 = (
Flags & 2) != 0;
1840 if (IsA16 && !STI.
hasG16() && !IsG16)
1844 unsigned DMaskLanes = 0;
1846 if (BaseOpcode->
Atomic) {
1847 VDataOut =
MI.getOperand(0).getReg();
1848 VDataIn =
MI.getOperand(2).getReg();
1849 LLT Ty =
MRI->getType(VDataIn);
1852 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
1857 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
1859 DMask = Is64Bit ? 0xf : 0x3;
1860 NumVDataDwords = Is64Bit ? 4 : 2;
1862 DMask = Is64Bit ? 0x3 : 0x1;
1863 NumVDataDwords = Is64Bit ? 2 : 1;
1866 DMask =
MI.getOperand(ArgOffset +
Intr->DMaskIndex).getImm();
1869 if (BaseOpcode->
Store) {
1870 VDataIn =
MI.getOperand(1).getReg();
1871 VDataTy =
MRI->getType(VDataIn);
1876 VDataOut =
MI.getOperand(0).getReg();
1877 VDataTy =
MRI->getType(VDataOut);
1878 NumVDataDwords = DMaskLanes;
1881 NumVDataDwords = (DMaskLanes + 1) / 2;
1886 if (Subtarget->
hasG16() && IsG16) {
1890 IntrOpcode = G16MappingInfo->
G16;
1894 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
1896 unsigned CPol =
MI.getOperand(ArgOffset +
Intr->CachePolicyIndex).getImm();
1903 int NumVAddrRegs = 0;
1904 int NumVAddrDwords = 0;
1905 for (
unsigned I =
Intr->VAddrStart; I < Intr->VAddrEnd;
I++) {
1908 if (!AddrOp.
isReg())
1916 NumVAddrDwords += (
MRI->getType(
Addr).getSizeInBits() + 31) / 32;
1923 NumVAddrRegs != 1 &&
1925 : NumVAddrDwords == NumVAddrRegs);
1926 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
1937 NumVDataDwords, NumVAddrDwords);
1938 }
else if (IsGFX11Plus) {
1940 UseNSA ? AMDGPU::MIMGEncGfx11NSA
1941 : AMDGPU::MIMGEncGfx11Default,
1942 NumVDataDwords, NumVAddrDwords);
1943 }
else if (IsGFX10Plus) {
1945 UseNSA ? AMDGPU::MIMGEncGfx10NSA
1946 : AMDGPU::MIMGEncGfx10Default,
1947 NumVDataDwords, NumVAddrDwords);
1951 NumVDataDwords, NumVAddrDwords);
1955 <<
"requested image instruction is not supported on this GPU\n");
1962 NumVDataDwords, NumVAddrDwords);
1965 NumVDataDwords, NumVAddrDwords);
1975 const bool Is64 =
MRI->getType(VDataOut).getSizeInBits() == 64;
1978 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
1979 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
1982 if (!
MRI->use_empty(VDataOut)) {
1995 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
1997 if (
SrcOp.isReg()) {
2003 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->RsrcIndex).getReg());
2005 MIB.
addReg(
MI.getOperand(ArgOffset +
Intr->SampIndex).getReg());
2016 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2018 MIB.
addImm(IsA16 ? -1 : 0);
2032 MIB.
addImm(IsD16 ? -1 : 0);
2034 MI.eraseFromParent();
2042bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2053 unsigned Offset =
MI.getOperand(6).getImm();
2055 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_BVH_STACK_RTN_B32), Dst0)
2063 MI.eraseFromParent();
2067bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2069 Intrinsic::ID IntrinsicID = cast<GIntrinsic>(
I).getIntrinsicID();
2070 switch (IntrinsicID) {
2071 case Intrinsic::amdgcn_end_cf:
2072 return selectEndCfIntrinsic(
I);
2073 case Intrinsic::amdgcn_ds_ordered_add:
2074 case Intrinsic::amdgcn_ds_ordered_swap:
2075 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2076 case Intrinsic::amdgcn_ds_gws_init:
2077 case Intrinsic::amdgcn_ds_gws_barrier:
2078 case Intrinsic::amdgcn_ds_gws_sema_v:
2079 case Intrinsic::amdgcn_ds_gws_sema_br:
2080 case Intrinsic::amdgcn_ds_gws_sema_p:
2081 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2082 return selectDSGWSIntrinsic(
I, IntrinsicID);
2083 case Intrinsic::amdgcn_ds_append:
2084 return selectDSAppendConsume(
I,
true);
2085 case Intrinsic::amdgcn_ds_consume:
2086 return selectDSAppendConsume(
I,
false);
2087 case Intrinsic::amdgcn_s_barrier:
2088 return selectSBarrier(
I);
2089 case Intrinsic::amdgcn_raw_buffer_load_lds:
2090 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2091 case Intrinsic::amdgcn_struct_buffer_load_lds:
2092 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2093 return selectBufferLoadLds(
I);
2094 case Intrinsic::amdgcn_global_load_lds:
2095 return selectGlobalLoadLds(
I);
2096 case Intrinsic::amdgcn_exp_compr:
2100 F,
"intrinsic not supported on subtarget",
I.getDebugLoc(),
DS_Error);
2101 F.getContext().diagnose(NoFpRet);
2105 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2106 return selectDSBvhStackIntrinsic(
I);
2107 case Intrinsic::amdgcn_s_barrier_init:
2108 case Intrinsic::amdgcn_s_barrier_join:
2109 case Intrinsic::amdgcn_s_wakeup_barrier:
2110 case Intrinsic::amdgcn_s_get_barrier_state:
2111 return selectNamedBarrierInst(
I, IntrinsicID);
2112 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2113 case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
2114 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2115 case Intrinsic::amdgcn_s_barrier_leave:
2116 return selectSBarrierLeave(
I);
2121bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2128 Register DstReg =
I.getOperand(0).getReg();
2133 if (!isVCC(CCReg, *MRI)) {
2134 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2135 AMDGPU::S_CSELECT_B32;
2142 if (!
MRI->getRegClassOrNull(CCReg))
2145 .
add(
I.getOperand(2))
2146 .
add(
I.getOperand(3));
2151 I.eraseFromParent();
2160 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2162 .
add(
I.getOperand(3))
2164 .
add(
I.getOperand(2))
2165 .
add(
I.getOperand(1));
2168 I.eraseFromParent();
2175 return AMDGPU::sub0;
2177 return AMDGPU::sub0_sub1;
2179 return AMDGPU::sub0_sub1_sub2;
2181 return AMDGPU::sub0_sub1_sub2_sub3;
2183 return AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
2186 return AMDGPU::sub0;
2193bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2194 Register DstReg =
I.getOperand(0).getReg();
2195 Register SrcReg =
I.getOperand(1).getReg();
2196 const LLT DstTy =
MRI->getType(DstReg);
2197 const LLT SrcTy =
MRI->getType(SrcReg);
2212 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2221 if (!SrcRC || !DstRC)
2234 Register LoReg =
MRI->createVirtualRegister(DstRC);
2235 Register HiReg =
MRI->createVirtualRegister(DstRC);
2237 .
addReg(SrcReg, 0, AMDGPU::sub0);
2239 .
addReg(SrcReg, 0, AMDGPU::sub1);
2241 if (IsVALU && STI.
hasSDWA()) {
2245 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2255 Register TmpReg0 =
MRI->createVirtualRegister(DstRC);
2256 Register TmpReg1 =
MRI->createVirtualRegister(DstRC);
2257 Register ImmReg =
MRI->createVirtualRegister(DstRC);
2259 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2269 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2270 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2271 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2283 And.setOperandDead(3);
2284 Or.setOperandDead(3);
2288 I.eraseFromParent();
2297 if (SubRegIdx == -1)
2303 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2307 if (SrcWithSubRC != SrcRC) {
2312 I.getOperand(1).setSubReg(SubRegIdx);
2315 I.setDesc(TII.get(TargetOpcode::COPY));
2321 Mask = maskTrailingOnes<unsigned>(
Size);
2322 int SignedMask =
static_cast<int>(Mask);
2323 return SignedMask >= -16 && SignedMask <= 64;
2327const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2340bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2341 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2342 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2345 const Register DstReg =
I.getOperand(0).getReg();
2346 const Register SrcReg =
I.getOperand(1).getReg();
2348 const LLT DstTy =
MRI->getType(DstReg);
2349 const LLT SrcTy =
MRI->getType(SrcReg);
2350 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2357 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2360 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2362 return selectCOPY(
I);
2365 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2368 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2370 Register UndefReg =
MRI->createVirtualRegister(SrcRC);
2371 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2377 I.eraseFromParent();
2383 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2393 I.eraseFromParent();
2397 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2403 I.eraseFromParent();
2407 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2409 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2413 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2414 const unsigned SextOpc = SrcSize == 8 ?
2415 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2418 I.eraseFromParent();
2424 if (DstSize > 32 && SrcSize == 32) {
2425 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2426 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2441 I.eraseFromParent();
2446 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2447 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2450 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2452 Register ExtReg =
MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2453 Register UndefReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2454 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2456 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2467 I.eraseFromParent();
2483 I.eraseFromParent();
2501bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2505 Register Dst =
I.getOperand(0).getReg();
2507 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2510 Register Src =
I.getOperand(1).getReg();
2516 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2518 I.eraseFromParent();
2526bool AMDGPUInstructionSelector::selectG_CONSTANT(
MachineInstr &
I)
const {
2529 Register DstReg =
I.getOperand(0).getReg();
2530 unsigned Size =
MRI->getType(DstReg).getSizeInBits();
2538 }
else if (ImmOp.
isCImm()) {
2545 const bool IsSgpr = DstRB->
getID() == AMDGPU::SGPRRegBankID;
2548 if (DstRB->
getID() == AMDGPU::VCCRegBankID) {
2549 Opcode = STI.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2550 }
else if (
Size == 64 &&
2552 Opcode = IsSgpr ? AMDGPU::S_MOV_B64_IMM_PSEUDO : AMDGPU::V_MOV_B64_PSEUDO;
2553 I.setDesc(TII.get(Opcode));
2554 I.addImplicitDefUseOperands(*
MF);
2557 Opcode = IsSgpr ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2567 I.setDesc(TII.get(Opcode));
2568 I.addImplicitDefUseOperands(*
MF);
2578 ResInst =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
2579 .
addImm(
I.getOperand(1).getImm());
2582 &AMDGPU::SReg_32RegClass : &AMDGPU::VGPR_32RegClass;
2592 ResInst =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
2601 I.eraseFromParent();
2603 TRI.getConstrainedRegClassForOperand(ResInst->
getOperand(0), *MRI);
2609bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
2623 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2638 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2639 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2640 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2641 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2643 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2644 .
addReg(Src, 0, AMDGPU::sub0);
2645 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2646 .
addReg(Src, 0, AMDGPU::sub1);
2647 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2651 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
2656 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2661 MI.eraseFromParent();
2666bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
2669 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
2676 Register LoReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2677 Register HiReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2678 Register ConstReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2679 Register OpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2685 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
2686 .
addReg(Src, 0, AMDGPU::sub0);
2687 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
2688 .
addReg(Src, 0, AMDGPU::sub1);
2689 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
2694 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2698 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2704 MI.eraseFromParent();
2709 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
2712void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
2715 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
2717 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
2721 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
2726 for (
unsigned i = 1; i != 3; ++i) {
2733 assert(GEPInfo.Imm == 0);
2738 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
2739 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
2741 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
2745 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
2748bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
2749 return RBI.
getRegBank(Reg, *MRI, TRI)->
getID() == AMDGPU::SGPRRegBankID;
2752bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
2753 if (!
MI.hasOneMemOperand())
2763 if (!
Ptr || isa<UndefValue>(
Ptr) || isa<Argument>(
Ptr) ||
2764 isa<Constant>(
Ptr) || isa<GlobalValue>(
Ptr))
2770 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
2772 AMDGPU::SGPRRegBankID;
2775 return I &&
I->getMetadata(
"amdgpu.uniform");
2779 for (
const GEPInfo &GEPInfo : AddrInfo) {
2780 if (!GEPInfo.VgprParts.empty())
2786void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
2787 const LLT PtrTy =
MRI->getType(
I.getOperand(1).getReg());
2794 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2799bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
2806 if (Reg.isPhysical())
2810 const unsigned Opcode =
MI.getOpcode();
2812 if (Opcode == AMDGPU::COPY)
2815 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
2816 Opcode == AMDGPU::G_XOR)
2820 if (
auto *GI = dyn_cast<GIntrinsic>(&
MI))
2821 return GI->is(Intrinsic::amdgcn_class);
2823 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
2826bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
2841 if (!isVCC(CondReg, *MRI)) {
2845 CondPhysReg = AMDGPU::SCC;
2846 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
2847 ConstrainRC = &AMDGPU::SReg_32RegClass;
2855 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
2856 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
2859 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
2866 CondPhysReg =
TRI.getVCC();
2867 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
2868 ConstrainRC =
TRI.getBoolRC();
2871 if (!
MRI->getRegClassOrNull(CondReg))
2872 MRI->setRegClass(CondReg, ConstrainRC);
2874 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
2877 .
addMBB(
I.getOperand(1).getMBB());
2879 I.eraseFromParent();
2883bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
2885 Register DstReg =
I.getOperand(0).getReg();
2887 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2888 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
2893 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
2896bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
2897 Register DstReg =
I.getOperand(0).getReg();
2898 Register SrcReg =
I.getOperand(1).getReg();
2899 Register MaskReg =
I.getOperand(2).getReg();
2900 LLT Ty =
MRI->getType(DstReg);
2901 LLT MaskTy =
MRI->getType(MaskReg);
2908 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2918 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
2919 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
2922 !CanCopyLow32 && !CanCopyHi32) {
2923 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
2927 I.eraseFromParent();
2931 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2933 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
2938 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
2947 "ptrmask should have been narrowed during legalize");
2949 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
2955 I.eraseFromParent();
2959 Register HiReg =
MRI->createVirtualRegister(&RegRC);
2960 Register LoReg =
MRI->createVirtualRegister(&RegRC);
2963 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
2964 .
addReg(SrcReg, 0, AMDGPU::sub0);
2965 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
2966 .
addReg(SrcReg, 0, AMDGPU::sub1);
2975 Register MaskLo =
MRI->createVirtualRegister(&RegRC);
2976 MaskedLo =
MRI->createVirtualRegister(&RegRC);
2978 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
2979 .
addReg(MaskReg, 0, AMDGPU::sub0);
2980 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
2989 Register MaskHi =
MRI->createVirtualRegister(&RegRC);
2990 MaskedHi =
MRI->createVirtualRegister(&RegRC);
2992 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
2993 .
addReg(MaskReg, 0, AMDGPU::sub1);
2994 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
2999 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3004 I.eraseFromParent();
3010static std::pair<Register, unsigned>
3017 std::tie(IdxBaseReg,
Offset) =
3019 if (IdxBaseReg == AMDGPU::NoRegister) {
3023 IdxBaseReg = IdxReg;
3030 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3031 return std::pair(IdxReg, SubRegs[0]);
3032 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3035bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3041 LLT DstTy =
MRI->getType(DstReg);
3042 LLT SrcTy =
MRI->getType(SrcReg);
3050 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3054 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3056 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3057 if (!SrcRC || !DstRC)
3072 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3076 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3079 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3083 MI.eraseFromParent();
3091 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3093 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3096 MI.eraseFromParent();
3107 MI.eraseFromParent();
3112bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3119 LLT VecTy =
MRI->getType(DstReg);
3120 LLT ValTy =
MRI->getType(ValReg);
3132 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3136 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3138 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3146 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3150 std::tie(IdxReg,
SubReg) =
3153 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3160 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3164 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3169 MI.eraseFromParent();
3181 MI.eraseFromParent();
3185bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3188 unsigned Size =
MI.getOperand(3).getImm();
3191 const bool HasVIndex =
MI.getNumOperands() == 9;
3195 VIndex =
MI.getOperand(4).getReg();
3199 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3200 std::optional<ValueAndVReg> MaybeVOffset =
3202 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3208 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3209 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3210 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3211 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3214 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3215 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3216 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3217 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3220 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3221 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3222 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3223 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3230 .
add(
MI.getOperand(2));
3234 if (HasVIndex && HasVOffset) {
3235 Register IdxReg =
MRI->createVirtualRegister(
TRI.getVGPR64Class());
3236 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3243 }
else if (HasVIndex) {
3245 }
else if (HasVOffset) {
3249 MIB.
add(
MI.getOperand(1));
3250 MIB.
add(
MI.getOperand(5 + OpOffset));
3251 MIB.
add(
MI.getOperand(6 + OpOffset));
3252 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3258 LoadPtrI.
Offset =
MI.getOperand(6 + OpOffset).getImm();
3260 StorePtrI.
V =
nullptr;
3274 MI.eraseFromParent();
3286 if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3289 assert(Def->getNumOperands() == 3 &&
3292 return Def->getOperand(1).getReg();
3298bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3300 unsigned Size =
MI.getOperand(3).getImm();
3306 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3309 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3312 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3319 .
add(
MI.getOperand(2));
3325 if (!isSGPR(
Addr)) {
3327 if (isSGPR(AddrDef->Reg)) {
3328 Addr = AddrDef->Reg;
3329 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3332 if (isSGPR(SAddr)) {
3333 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3345 VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3357 MIB.
add(
MI.getOperand(4))
3358 .
add(
MI.getOperand(5));
3362 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3372 sizeof(int32_t),
Align(4));
3376 MI.eraseFromParent();
3380bool AMDGPUInstructionSelector::selectBVHIntrinsic(
MachineInstr &
MI)
const{
3381 MI.setDesc(TII.get(
MI.getOperand(1).getImm()));
3382 MI.removeOperand(1);
3383 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3387bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3390 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3391 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3393 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3394 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3396 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3397 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3399 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3400 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3402 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3403 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3405 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3406 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3408 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3409 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3411 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3412 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3414 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3415 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3417 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3418 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3420 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3421 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3423 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3424 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3426 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3427 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3429 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3430 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3436 auto VDst_In =
MI.getOperand(4);
3438 MI.setDesc(TII.get(Opc));
3439 MI.removeOperand(4);
3440 MI.removeOperand(1);
3441 MI.addOperand(VDst_In);
3442 MI.addImplicitDefUseOperands(*
MI.getParent()->getParent());
3446bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
3450 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3455 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
3466 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3470 MI.eraseFromParent();
3474bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
3487 WaveAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3497 MI.eraseFromParent();
3503 if (!
I.isPreISelOpcode()) {
3505 return selectCOPY(
I);
3509 switch (
I.getOpcode()) {
3510 case TargetOpcode::G_AND:
3511 case TargetOpcode::G_OR:
3512 case TargetOpcode::G_XOR:
3515 return selectG_AND_OR_XOR(
I);
3516 case TargetOpcode::G_ADD:
3517 case TargetOpcode::G_SUB:
3518 case TargetOpcode::G_PTR_ADD:
3521 return selectG_ADD_SUB(
I);
3522 case TargetOpcode::G_UADDO:
3523 case TargetOpcode::G_USUBO:
3524 case TargetOpcode::G_UADDE:
3525 case TargetOpcode::G_USUBE:
3526 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
3527 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3528 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3529 return selectG_AMDGPU_MAD_64_32(
I);
3530 case TargetOpcode::G_INTTOPTR:
3531 case TargetOpcode::G_BITCAST:
3532 case TargetOpcode::G_PTRTOINT:
3533 case TargetOpcode::G_FREEZE:
3534 return selectCOPY(
I);
3535 case TargetOpcode::G_CONSTANT:
3536 case TargetOpcode::G_FCONSTANT:
3537 return selectG_CONSTANT(
I);
3538 case TargetOpcode::G_FNEG:
3541 return selectG_FNEG(
I);
3542 case TargetOpcode::G_FABS:
3545 return selectG_FABS(
I);
3546 case TargetOpcode::G_EXTRACT:
3547 return selectG_EXTRACT(
I);
3548 case TargetOpcode::G_MERGE_VALUES:
3549 case TargetOpcode::G_CONCAT_VECTORS:
3550 return selectG_MERGE_VALUES(
I);
3551 case TargetOpcode::G_UNMERGE_VALUES:
3552 return selectG_UNMERGE_VALUES(
I);
3553 case TargetOpcode::G_BUILD_VECTOR:
3554 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
3555 return selectG_BUILD_VECTOR(
I);
3556 case TargetOpcode::G_IMPLICIT_DEF:
3557 return selectG_IMPLICIT_DEF(
I);
3558 case TargetOpcode::G_INSERT:
3559 return selectG_INSERT(
I);
3560 case TargetOpcode::G_INTRINSIC:
3561 case TargetOpcode::G_INTRINSIC_CONVERGENT:
3562 return selectG_INTRINSIC(
I);
3563 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3564 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
3565 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
3566 case TargetOpcode::G_ICMP:
3567 case TargetOpcode::G_FCMP:
3568 if (selectG_ICMP_or_FCMP(
I))
3571 case TargetOpcode::G_LOAD:
3572 case TargetOpcode::G_STORE:
3573 case TargetOpcode::G_ATOMIC_CMPXCHG:
3574 case TargetOpcode::G_ATOMICRMW_XCHG:
3575 case TargetOpcode::G_ATOMICRMW_ADD:
3576 case TargetOpcode::G_ATOMICRMW_SUB:
3577 case TargetOpcode::G_ATOMICRMW_AND:
3578 case TargetOpcode::G_ATOMICRMW_OR:
3579 case TargetOpcode::G_ATOMICRMW_XOR:
3580 case TargetOpcode::G_ATOMICRMW_MIN:
3581 case TargetOpcode::G_ATOMICRMW_MAX:
3582 case TargetOpcode::G_ATOMICRMW_UMIN:
3583 case TargetOpcode::G_ATOMICRMW_UMAX:
3584 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
3585 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
3586 case TargetOpcode::G_ATOMICRMW_FADD:
3587 case TargetOpcode::G_ATOMICRMW_FMIN:
3588 case TargetOpcode::G_ATOMICRMW_FMAX:
3589 return selectG_LOAD_STORE_ATOMICRMW(
I);
3590 case TargetOpcode::G_SELECT:
3591 return selectG_SELECT(
I);
3592 case TargetOpcode::G_TRUNC:
3593 return selectG_TRUNC(
I);
3594 case TargetOpcode::G_SEXT:
3595 case TargetOpcode::G_ZEXT:
3596 case TargetOpcode::G_ANYEXT:
3597 case TargetOpcode::G_SEXT_INREG:
3604 return selectG_SZA_EXT(
I);
3605 case TargetOpcode::G_FPEXT:
3606 if (selectG_FPEXT(
I))
3609 case TargetOpcode::G_BRCOND:
3610 return selectG_BRCOND(
I);
3611 case TargetOpcode::G_GLOBAL_VALUE:
3612 return selectG_GLOBAL_VALUE(
I);
3613 case TargetOpcode::G_PTRMASK:
3614 return selectG_PTRMASK(
I);
3615 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3616 return selectG_EXTRACT_VECTOR_ELT(
I);
3617 case TargetOpcode::G_INSERT_VECTOR_ELT:
3618 return selectG_INSERT_VECTOR_ELT(
I);
3619 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3620 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3621 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
3622 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3623 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3626 assert(
Intr &&
"not an image intrinsic with image pseudo");
3627 return selectImageIntrinsic(
I,
Intr);
3629 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY:
3630 return selectBVHIntrinsic(
I);
3631 case AMDGPU::G_SBFX:
3632 case AMDGPU::G_UBFX:
3633 return selectG_SBFX_UBFX(
I);
3634 case AMDGPU::G_SI_CALL:
3635 I.setDesc(TII.get(AMDGPU::SI_CALL));
3637 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
3638 return selectWaveAddress(
I);
3639 case AMDGPU::G_STACKRESTORE:
3640 return selectStackRestore(
I);
3642 return selectPHI(
I);
3650AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
3657std::pair<Register, unsigned>
3658AMDGPUInstructionSelector::selectVOP3ModsImpl(
MachineOperand &Root,
3659 bool IsCanonicalizing,
3660 bool AllowAbs,
bool OpSel)
const {
3665 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
3666 Src =
MI->getOperand(1).getReg();
3669 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
3674 if (LHS &&
LHS->isZero()) {
3676 Src =
MI->getOperand(2).getReg();
3680 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
3681 Src =
MI->getOperand(1).getReg();
3688 return std::pair(Src, Mods);
3691Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
3693 bool ForceVGPR)
const {
3694 if ((Mods != 0 || ForceVGPR) &&
3702 TII.get(AMDGPU::COPY), VGPRSrc)
3714AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
3721AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
3724 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
3728 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3737AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
3740 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
3746 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3755AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
3764AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
3767 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
3771 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3778AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
3782 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
false);
3786 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3793AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
3796 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
true,
3801 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
3808AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
3811 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
3818std::pair<Register, unsigned>
3819AMDGPUInstructionSelector::selectVOP3PModsImpl(
3824 if (
MI &&
MI->getOpcode() == AMDGPU::G_FNEG &&
3829 Src =
MI->getOperand(1).getReg();
3830 MI =
MRI.getVRegDef(Src);
3841 return std::pair(Src, Mods);
3845AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
3851 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI);
3860AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
3866 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true);
3875AMDGPUInstructionSelector::selectVOP3PModsNeg(
MachineOperand &Root)
const {
3880 "expected i1 value");
3890AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
3893 "expected i1 value");
3907 switch (Elts.
size()) {
3909 DstRegClass = &AMDGPU::VReg_256RegClass;
3912 DstRegClass = &AMDGPU::VReg_128RegClass;
3915 DstRegClass = &AMDGPU::VReg_64RegClass;
3922 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
3923 .addDef(
MRI.createVirtualRegister(DstRegClass));
3924 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3935 if (ModOpcode == TargetOpcode::G_FNEG) {
3939 for (
auto El : Elts) {
3945 if (Elts.size() != NegAbsElts.
size()) {
3954 assert(ModOpcode == TargetOpcode::G_FABS);
3962AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
3967 if (
GBuildVector *BV = dyn_cast<GBuildVector>(
MRI->getVRegDef(Src))) {
3968 assert(BV->getNumSources() > 0);
3971 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
3974 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
3975 ElF32 =
MRI->getVRegDef(BV->getSourceReg(i));
3982 if (BV->getNumSources() == EltsF32.
size()) {
3993AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
3999 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4007 if (CV->getNumSources() == EltsV2F16.
size()) {
4019AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
4025 assert(CV->getNumSources() > 0);
4028 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
4032 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
4033 ElV2F16 =
MRI->getVRegDef(CV->getSourceReg(i));
4040 if (CV->getNumSources() == EltsV2F16.
size()) {
4052AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
4053 std::optional<FPValueAndVReg> FPValReg;
4057 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
4077AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
4083 std::optional<ValueAndVReg> ShiftAmt;
4085 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4086 ShiftAmt->Value.getZExtValue() % 8 == 0) {
4087 Key = ShiftAmt->Value.getZExtValue() / 8;
4098AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
4105 std::optional<ValueAndVReg> ShiftAmt;
4107 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
4108 ShiftAmt->Value.getZExtValue() == 16) {
4120AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
4123 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
4133AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
4136 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
4144 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4151AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
4154 std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
4162 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
4168bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
4178 getAddrModeInfo(*
MI, *MRI, AddrInfo);
4180 if (AddrInfo.
empty())
4183 const GEPInfo &GEPI = AddrInfo[0];
4184 std::optional<int64_t> EncodedImm;
4189 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
4190 AddrInfo.
size() > 1) {
4191 const GEPInfo &GEPI2 = AddrInfo[1];
4192 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
4195 Base = GEPI2.SgprParts[0];
4196 *SOffset = OffsetReg;
4206 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
4218 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
4219 Base = GEPI.SgprParts[0];
4225 if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) &&
4231 Base = GEPI.SgprParts[0];
4232 *SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4233 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
4238 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
4240 Base = GEPI.SgprParts[0];
4241 *SOffset = OffsetReg;
4250AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
4253 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset))
4254 return std::nullopt;
4261AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
4263 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
4265 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
4266 return std::nullopt;
4268 const GEPInfo &GEPInfo = AddrInfo[0];
4269 Register PtrReg = GEPInfo.SgprParts[0];
4270 std::optional<int64_t> EncodedImm =
4273 return std::nullopt;
4282AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
4284 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr))
4285 return std::nullopt;
4292AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
4295 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset))
4296 return std::nullopt;
4303std::pair<Register, int>
4304AMDGPUInstructionSelector::selectFlatOffsetImpl(
MachineOperand &Root,
4314 int64_t ConstOffset;
4315 std::tie(PtrBase, ConstOffset) =
4316 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4319 !isFlatScratchBaseLegal(Root.
getReg())))
4326 return std::pair(PtrBase, ConstOffset);
4330AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
4340AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
4350AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
4361AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
4364 int64_t ConstOffset;
4365 int64_t ImmOffset = 0;
4369 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4371 if (ConstOffset != 0) {
4375 ImmOffset = ConstOffset;
4378 if (isSGPR(PtrBaseDef->Reg)) {
4379 if (ConstOffset > 0) {
4385 int64_t SplitImmOffset, RemainderOffset;
4389 if (isUInt<32>(RemainderOffset)) {
4393 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4395 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4397 .
addImm(RemainderOffset);
4414 unsigned NumLiterals =
4418 return std::nullopt;
4425 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4430 if (isSGPR(SAddr)) {
4431 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
4451 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
4452 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
4453 return std::nullopt;
4459 Register VOffset =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4461 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
4472AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
4475 int64_t ConstOffset;
4476 int64_t ImmOffset = 0;
4480 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4482 if (ConstOffset != 0 && isFlatScratchBaseLegal(
Addr) &&
4486 ImmOffset = ConstOffset;
4490 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4491 int FI = AddrDef->MI->getOperand(1).getIndex();
4500 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
4501 Register LHS = AddrDef->MI->getOperand(1).getReg();
4502 Register RHS = AddrDef->MI->getOperand(2).getReg();
4506 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
4507 isSGPR(RHSDef->Reg)) {
4508 int FI = LHSDef->MI->getOperand(1).getIndex();
4512 SAddr =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4514 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
4522 return std::nullopt;
4531bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
4543 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
4545 return (VMax & 3) + (
SMax & 3) >= 4;
4549AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
4552 int64_t ConstOffset;
4553 int64_t ImmOffset = 0;
4557 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(
Addr, *MRI);
4560 if (ConstOffset != 0 &&
4563 ImmOffset = ConstOffset;
4567 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
4568 return std::nullopt;
4570 Register RHS = AddrDef->MI->getOperand(2).getReg();
4572 return std::nullopt;
4574 Register LHS = AddrDef->MI->getOperand(1).getReg();
4577 if (OrigAddr !=
Addr) {
4578 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
4579 return std::nullopt;
4581 if (!isFlatScratchBaseLegalSV(OrigAddr))
4582 return std::nullopt;
4585 if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
4586 return std::nullopt;
4588 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4589 int FI = LHSDef->MI->getOperand(1).getIndex();
4598 return std::nullopt;
4608AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
4617 Register HighBits =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4622 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
4646 std::optional<int> FI;
4650 int64_t ConstOffset;
4651 std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI);
4652 if (ConstOffset != 0) {
4657 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
4663 }
else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) {
4664 FI = RootDef->getOperand(1).getIndex();
4687bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
4700bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
4702 unsigned Size)
const {
4703 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
4705 if (!isUInt<8>(Offset0 /
Size) || !isUInt<8>(Offset1 /
Size))
4718 return Addr->getOpcode() == TargetOpcode::G_OR ||
4719 (
Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
4726bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
4740 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
4741 std::optional<ValueAndVReg> RhsValReg =
4747 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
4748 RhsValReg->Value.getSExtValue() > -0x40000000)
4757bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
4775bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
4784 std::optional<DefinitionAndSourceRegister> BaseDef =
4786 std::optional<ValueAndVReg> RHSOffset =
4796 (RHSOffset->Value.getSExtValue() < 0 &&
4797 RHSOffset->Value.getSExtValue() > -0x40000000)))
4800 Register LHS = BaseDef->MI->getOperand(1).getReg();
4801 Register RHS = BaseDef->MI->getOperand(2).getReg();
4805bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
4806 unsigned ShAmtBits)
const {
4807 assert(
MI.getOpcode() == TargetOpcode::G_AND);
4809 std::optional<APInt>
RHS =
4814 if (
RHS->countr_one() >= ShAmtBits)
4818 return (LHSKnownZeros | *RHS).
countr_one() >= ShAmtBits;
4822AMDGPUInstructionSelector::selectMUBUFScratchOffset(
4827 std::optional<DefinitionAndSourceRegister>
Def =
4829 assert(Def &&
"this shouldn't be an optional result");
4884std::pair<Register, unsigned>
4885AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
4888 return std::pair(Root.
getReg(), 0);
4890 int64_t ConstAddr = 0;
4894 std::tie(PtrBase,
Offset) =
4895 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4898 if (isDSOffsetLegal(PtrBase,
Offset)) {
4900 return std::pair(PtrBase,
Offset);
4902 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
4911 return std::pair(Root.
getReg(), 0);
4915AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
4918 std::tie(Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
4926AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
4927 return selectDSReadWrite2(Root, 4);
4931AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
4932 return selectDSReadWrite2(Root, 8);
4936AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
4937 unsigned Size)
const {
4940 std::tie(Reg,
Offset) = selectDSReadWrite2Impl(Root,
Size);
4948std::pair<Register, unsigned>
4949AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
4950 unsigned Size)
const {
4953 return std::pair(Root.
getReg(), 0);
4955 int64_t ConstAddr = 0;
4959 std::tie(PtrBase,
Offset) =
4960 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
4963 int64_t OffsetValue0 =
Offset;
4965 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
4967 return std::pair(PtrBase, OffsetValue0 /
Size);
4969 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
4977 return std::pair(Root.
getReg(), 0);
4984std::pair<Register, int64_t>
4985AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
4988 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
4992 std::optional<ValueAndVReg> MaybeOffset =
5008 Register RSrc2 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5009 Register RSrc3 =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
5010 Register RSrcHi =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5011 Register RSrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
5013 B.buildInstr(AMDGPU::S_MOV_B32)
5016 B.buildInstr(AMDGPU::S_MOV_B32)
5023 B.buildInstr(AMDGPU::REG_SEQUENCE)
5026 .addImm(AMDGPU::sub0)
5028 .addImm(AMDGPU::sub1);
5032 RSrcLo =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
5033 B.buildInstr(AMDGPU::S_MOV_B64)
5038 B.buildInstr(AMDGPU::REG_SEQUENCE)
5041 .addImm(AMDGPU::sub0_sub1)
5043 .addImm(AMDGPU::sub2_sub3);
5050 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5059 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
5066AMDGPUInstructionSelector::MUBUFAddressData
5067AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
5068 MUBUFAddressData
Data;
5074 std::tie(PtrBase,
Offset) = getPtrBaseWithConstantOffset(Src, *MRI);
5075 if (isUInt<32>(
Offset)) {
5082 Data.N2 = InputAdd->getOperand(1).getReg();
5083 Data.N3 = InputAdd->getOperand(2).getReg();
5098bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData
Addr)
const {
5105 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
5111void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
5117 SOffset =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5118 B.buildInstr(AMDGPU::S_MOV_B32)
5124bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
5132 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5133 if (!shouldUseAddr64(AddrData))
5139 Offset = AddrData.Offset;
5145 if (RBI.
getRegBank(N2, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5147 if (RBI.
getRegBank(N3, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5160 }
else if (RBI.
getRegBank(N0, *MRI, TRI)->
getID() == AMDGPU::VGPRRegBankID) {
5171 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5175bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
5183 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
5184 if (shouldUseAddr64(AddrData))
5190 Offset = AddrData.Offset;
5196 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
5201AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
5207 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
5223 MIB.
addReg(AMDGPU::SGPR_NULL);
5237AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
5242 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
5253 MIB.
addReg(AMDGPU::SGPR_NULL);
5265AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
5270 SOffset = AMDGPU::SGPR_NULL;
5276static std::optional<uint64_t>
5280 if (!OffsetVal || !isInt<32>(*OffsetVal))
5281 return std::nullopt;
5282 return Lo_32(*OffsetVal);
5286AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
5291 std::optional<int64_t> EncodedImm =
5300AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
5307 std::optional<int64_t> EncodedImm =
5316AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
5324 return std::nullopt;
5326 std::optional<int64_t> EncodedOffset =
5329 return std::nullopt;
5339 if (
MI->getOpcode() == AMDGPU::G_BITCAST)
5350 if (Inst->
getOpcode() != AMDGPU::G_TRUNC)
5358 if (TruncOp->
getOpcode() == AMDGPU::G_LSHR) {
5361 if (SrlAmount && SrlAmount->Value.getZExtValue() == 16) {
5372 if (TruncOp->
getOpcode() == AMDGPU::G_SHUFFLE_VECTOR) {
5377 assert(Mask.size() == 2);
5379 if (Mask[0] == 1 && Mask[1] <= 1) {
5389std::pair<Register, unsigned>
5390AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
5391 bool &Matched)
const {
5396 std::tie(Src, Mods) = selectVOP3ModsImpl(Root);
5399 if (
MI->getOpcode() == AMDGPU::G_FPEXT) {
5408 if (
MI->getOpcode() == AMDGPU::G_BITCAST) {
5409 MO = &
MI->getOperand(1);
5414 const auto CheckAbsNeg = [&]() {
5419 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(*MO);
5441 MI = ExtractHiEltMI;
5442 MO = &
MI->getOperand(0);
5455AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
5460 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5471AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
5475 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
5483bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
5487 Register CCReg =
I.getOperand(0).getReg();
5489 bool HasM0 = IntrID == Intrinsic::amdgcn_s_barrier_signal_isfirst_var;
5492 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
5493 .
addReg(
I.getOperand(2).getReg());
5494 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0));
5498 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
5499 .
addImm(
I.getOperand(2).getImm());
5504 I.eraseFromParent();
5510 if (HasInlineConst) {
5514 case Intrinsic::amdgcn_s_barrier_init:
5515 return AMDGPU::S_BARRIER_INIT_IMM;
5516 case Intrinsic::amdgcn_s_barrier_join:
5517 return AMDGPU::S_BARRIER_JOIN_IMM;
5518 case Intrinsic::amdgcn_s_wakeup_barrier:
5519 return AMDGPU::S_WAKEUP_BARRIER_IMM;
5520 case Intrinsic::amdgcn_s_get_barrier_state:
5521 return AMDGPU::S_GET_BARRIER_STATE_IMM;
5527 case Intrinsic::amdgcn_s_barrier_init:
5528 return AMDGPU::S_BARRIER_INIT_M0;
5529 case Intrinsic::amdgcn_s_barrier_join:
5530 return AMDGPU::S_BARRIER_JOIN_M0;
5531 case Intrinsic::amdgcn_s_wakeup_barrier:
5532 return AMDGPU::S_WAKEUP_BARRIER_M0;
5533 case Intrinsic::amdgcn_s_get_barrier_state:
5534 return AMDGPU::S_GET_BARRIER_STATE_M0;
5539bool AMDGPUInstructionSelector::selectNamedBarrierInst(
5543 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_barrier_state
5546 std::optional<int64_t> BarValImm =
5552 if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
5553 Register MemberCount =
I.getOperand(2).getReg();
5554 TmpReg0 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5565 if (IntrID == Intrinsic::amdgcn_s_barrier_init) {
5569 Register TmpReg1 =
MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5590 if (IntrID == Intrinsic::amdgcn_s_get_barrier_state)
5591 MIB.
addDef(
I.getOperand(0).getReg());
5596 I.eraseFromParent();
5600bool AMDGPUInstructionSelector::selectSBarrierLeave(
MachineInstr &
I)
const {
5603 Register CCReg =
I.getOperand(0).getReg();
5605 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_LEAVE));
5608 I.eraseFromParent();
5616 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5617 "Expected G_CONSTANT");
5618 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
5624 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5625 "Expected G_CONSTANT");
5626 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
5635 if (
MI.getOpcode() == TargetOpcode::G_FCONSTANT)
5636 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
5638 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
"Expected G_CONSTANT");
5639 MIB.
addImm(
Op.getCImm()->getSExtValue());
5646 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5647 "Expected G_CONSTANT");
5648 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().popcount());
5656 MIB.
addImm(
MI.getOperand(OpIdx).getImm());
5662 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5669 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5670 MIB.
addImm(
MI.getOperand(OpIdx).getImm() &
5678 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5679 const bool Swizzle =
MI.getOperand(OpIdx).getImm() &
5685void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
5687 assert(OpIdx >= 0 &&
"expected to match an immediate operand");
5688 const uint32_t Cpol =
MI.getOperand(OpIdx).getImm() &
5703 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
5705 assert(ExpVal != INT_MIN);
5709bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
5713bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelKnownBits &KnownBits)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static int sizeToSubRegIndex(unsigned Size)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg)
Match a zero extend from a 32-bit value to 64-bits.
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static MachineInstr * stripBitCast(MachineInstr *MI, MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
amdgpu AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
static Error getAddrSpace(StringRef R, unsigned &AddrSpace)
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelKnownBits *KB, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
bool isEntryFunction() const
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
LLVM_READONLY int getExactLog2Abs() const
APInt bitcastToAPInt() const
Class for arbitrary precision integers.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This class represents an Operation in the Expression.
Diagnostic information for unsupported feature in backend.
Represents a G_BUILD_VECTOR.
bool useVGPRIndexMode() const
bool hasScalarCompareEq64() const
int getLDSBankCount() const
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
bool unsafeDSOffsetFoldingEnabled() const
bool hasFlatInstOffsets() const
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
bool hasGFX90AInsts() const
unsigned getConstantBusLimit(unsigned Opcode) const
bool hasMADIntraFwdBug() const
bool privateMemoryResourceIsRangeChecked() const
bool hasSignedScratchOffsets() const
bool hasRestrictedSOffset() const
const SITargetLowering * getTargetLowering() const override
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
bool hasFlatScratchSVSSwizzleBug() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasSplitBarriers() const
bool hasUnpackedD16VMem() const
bool hasGWSSemaReleaseAll() const
bool hasAddNoCarry() const
bool hasSALUFloatInsts() const
bool hasPartialNSAEncoding() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
Represents a G_CONCAT_VECTORS.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelKnownBits *kb, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
APInt getKnownOnes(Register R)
KnownBits getKnownBits(Register R)
bool signBitIsZero(Register Op)
APInt getKnownZeroes(Register R)
Module * getParent()
Get the module that this global value is contained inside of...
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Describe properties that are true of each instruction in the target description file.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
const ConstantFP * getFPImm() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A Module instance is used to store all the information related to an LLVM module.
T get() const
Returns the value of the specified pointer type.
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Analysis providing profile information.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction.
void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCRegister getReturnAddressReg(const MachineFunction &MF) const
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
const Triple & getTargetTriple() const
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
OSType getOS() const
Get the parsed operating system type of this triple.
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Value Representation.
Value(Type *Ty, unsigned scid)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
LLVM_READONLY int getGlobalSaddrOp(uint16_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
operand_type_match m_Reg()
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
OneUse_match< T > m_OneUse(const T &SubPattern)
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
int popcount(T Value) noexcept
Count the number of set bits in a value.
const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
unsigned getUndefRegState(bool B)
@ SMax
Signed integer max implemented in terms of select(cmp()).
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
This class contains a discriminated union of information about pointers in memory operands,...
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.