34#include "llvm/IR/IntrinsicsAMDGPU.h"
41#define DEBUG_TYPE "si-instr-info"
43#define GET_INSTRINFO_CTOR_DTOR
44#include "AMDGPUGenInstrInfo.inc"
47#define GET_D16ImageDimIntrinsics_IMPL
48#define GET_ImageDimIntrinsicTable_IMPL
49#define GET_RsrcIntrinsics_IMPL
50#include "AMDGPUGenSearchableTables.inc"
58 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
61 "amdgpu-fix-16-bit-physreg-copies",
62 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
78 unsigned N =
Node->getNumOperands();
79 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
91 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
92 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
94 if (Op0Idx == -1 && Op1Idx == -1)
98 if ((Op0Idx == -1 && Op1Idx != -1) ||
99 (Op1Idx == -1 && Op0Idx != -1))
120 return !
MI.memoperands_empty() &&
122 return MMO->isLoad() && MMO->isInvariant();
144 if (!
MI.hasImplicitDef() &&
145 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
146 !
MI.mayRaiseFPException())
154bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
158 if (
MI.isConvergent())
186 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
191 for (
auto Op :
MI.uses()) {
192 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
198 if (FromCycle ==
nullptr)
204 while (FromCycle && !FromCycle->
contains(ToCycle)) {
224 int64_t &Offset1)
const {
232 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
236 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
252 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
253 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
254 if (Offset0Idx == -1 || Offset1Idx == -1)
261 Offset0Idx -=
get(Opc0).NumDefs;
262 Offset1Idx -=
get(Opc1).NumDefs;
292 if (!Load0Offset || !Load1Offset)
309 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
310 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
312 if (OffIdx0 == -1 || OffIdx1 == -1)
318 OffIdx0 -=
get(Opc0).NumDefs;
319 OffIdx1 -=
get(Opc1).NumDefs;
338 case AMDGPU::DS_READ2ST64_B32:
339 case AMDGPU::DS_READ2ST64_B64:
340 case AMDGPU::DS_WRITE2ST64_B32:
341 case AMDGPU::DS_WRITE2ST64_B64:
356 OffsetIsScalable =
false;
373 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
375 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
376 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
389 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
390 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
391 if (Offset0 + 1 != Offset1)
402 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
410 Offset = EltSize * Offset0;
412 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
413 if (DataOpIdx == -1) {
414 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
416 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
432 if (BaseOp && !BaseOp->
isFI())
440 if (SOffset->
isReg())
446 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
448 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
457 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
458 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
460 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
461 if (VAddr0Idx >= 0) {
463 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
470 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
485 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
502 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
504 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
521 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
529 if (MO1->getAddrSpace() != MO2->getAddrSpace())
532 const auto *Base1 = MO1->getValue();
533 const auto *Base2 = MO2->getValue();
534 if (!Base1 || !Base2)
542 return Base1 == Base2;
546 int64_t Offset1,
bool OffsetIsScalable1,
548 int64_t Offset2,
bool OffsetIsScalable2,
549 unsigned ClusterSize,
550 unsigned NumBytes)
const {
563 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
582 const unsigned LoadSize = NumBytes / ClusterSize;
583 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
584 return NumDWords <= MaxMemoryClusterDWords;
598 int64_t Offset0, int64_t Offset1,
599 unsigned NumLoads)
const {
600 assert(Offset1 > Offset0 &&
601 "Second offset should be larger than first offset!");
606 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
613 const char *Msg =
"illegal VGPR to SGPR copy") {
634 assert((
TII.getSubtarget().hasMAIInsts() &&
635 !
TII.getSubtarget().hasGFX90AInsts()) &&
636 "Expected GFX908 subtarget.");
639 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
640 "Source register of the copy should be either an SGPR or an AGPR.");
643 "Destination register of the copy should be an AGPR.");
652 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
655 if (!Def->modifiesRegister(SrcReg, &RI))
658 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
659 Def->getOperand(0).getReg() != SrcReg)
666 bool SafeToPropagate =
true;
669 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
670 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
671 SafeToPropagate =
false;
673 if (!SafeToPropagate)
676 for (
auto I = Def;
I !=
MI; ++
I)
677 I->clearRegisterKills(DefOp.
getReg(), &RI);
686 if (ImpUseSuperReg) {
687 Builder.addReg(ImpUseSuperReg,
695 RS.enterBasicBlockEnd(
MBB);
696 RS.backward(std::next(
MI));
705 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
708 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
709 "VGPR used for an intermediate copy should have been reserved.");
714 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
724 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
725 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
726 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
733 if (ImpUseSuperReg) {
734 UseBuilder.
addReg(ImpUseSuperReg,
755 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
756 int16_t SubIdx = BaseIndices[Idx];
757 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
758 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
759 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
760 unsigned Opcode = AMDGPU::S_MOV_B32;
763 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
764 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
765 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
769 DestSubReg = RI.getSubReg(DestReg, SubIdx);
770 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
771 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
772 Opcode = AMDGPU::S_MOV_B64;
787 assert(FirstMI && LastMI);
795 LastMI->addRegisterKilled(SrcReg, &RI);
801 Register SrcReg,
bool KillSrc,
bool RenamableDest,
802 bool RenamableSrc)
const {
804 unsigned Size = RI.getRegSizeInBits(*RC);
806 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
812 if (((
Size == 16) != (SrcSize == 16))) {
814 assert(ST.useRealTrue16Insts());
816 MCRegister SubReg = RI.getSubReg(RegToFix, AMDGPU::lo16);
819 if (DestReg == SrcReg) {
825 RC = RI.getPhysRegBaseClass(DestReg);
826 Size = RI.getRegSizeInBits(*RC);
827 SrcRC = RI.getPhysRegBaseClass(SrcReg);
828 SrcSize = RI.getRegSizeInBits(*SrcRC);
832 if (RC == &AMDGPU::VGPR_32RegClass) {
834 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
835 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
836 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
837 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
843 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
844 RC == &AMDGPU::SReg_32RegClass) {
845 if (SrcReg == AMDGPU::SCC) {
852 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
853 if (DestReg == AMDGPU::VCC_LO) {
871 if (RC == &AMDGPU::SReg_64RegClass) {
872 if (SrcReg == AMDGPU::SCC) {
879 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
880 if (DestReg == AMDGPU::VCC) {
898 if (DestReg == AMDGPU::SCC) {
901 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
905 assert(ST.hasScalarCompareEq64());
919 if (RC == &AMDGPU::AGPR_32RegClass) {
920 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
921 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
927 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
936 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
943 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
944 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
946 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
947 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
948 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
949 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
952 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
953 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
966 if (IsAGPRDst || IsAGPRSrc) {
967 if (!DstLow || !SrcLow) {
969 "Cannot use hi16 subreg with an AGPR!");
976 if (ST.useRealTrue16Insts()) {
982 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
983 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
995 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
996 if (!DstLow || !SrcLow) {
998 "Cannot use hi16 subreg on VI!");
1021 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1022 if (ST.hasVMovB64Inst()) {
1027 if (ST.hasPkMovB32()) {
1043 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1044 if (RI.isSGPRClass(RC)) {
1045 if (!RI.isSGPRClass(SrcRC)) {
1049 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1055 unsigned EltSize = 4;
1056 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1057 if (RI.isAGPRClass(RC)) {
1058 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1059 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1060 else if (RI.hasVGPRs(SrcRC) ||
1061 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1062 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1064 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1065 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1066 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1067 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1068 (RI.isProperlyAlignedRC(*RC) &&
1069 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1071 if (ST.hasVMovB64Inst()) {
1072 Opcode = AMDGPU::V_MOV_B64_e32;
1074 }
else if (ST.hasPkMovB32()) {
1075 Opcode = AMDGPU::V_PK_MOV_B32;
1085 std::unique_ptr<RegScavenger> RS;
1086 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1087 RS = std::make_unique<RegScavenger>();
1093 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1094 const bool CanKillSuperReg = KillSrc && !Overlap;
1096 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1099 SubIdx = SubIndices[Idx];
1101 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1102 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1103 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1104 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1106 bool IsFirstSubreg = Idx == 0;
1107 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1109 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1113 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1114 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1160 int64_t &ImmVal)
const {
1161 switch (
MI.getOpcode()) {
1162 case AMDGPU::V_MOV_B32_e32:
1163 case AMDGPU::S_MOV_B32:
1164 case AMDGPU::S_MOVK_I32:
1165 case AMDGPU::S_MOV_B64:
1166 case AMDGPU::V_MOV_B64_e32:
1167 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1168 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1169 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1170 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1171 case AMDGPU::V_MOV_B64_PSEUDO:
1172 case AMDGPU::V_MOV_B16_t16_e32: {
1176 return MI.getOperand(0).getReg() == Reg;
1181 case AMDGPU::V_MOV_B16_t16_e64: {
1183 if (Src0.
isImm() && !
MI.getOperand(1).getImm()) {
1185 return MI.getOperand(0).getReg() == Reg;
1190 case AMDGPU::S_BREV_B32:
1191 case AMDGPU::V_BFREV_B32_e32:
1192 case AMDGPU::V_BFREV_B32_e64: {
1196 return MI.getOperand(0).getReg() == Reg;
1201 case AMDGPU::S_NOT_B32:
1202 case AMDGPU::V_NOT_B32_e32:
1203 case AMDGPU::V_NOT_B32_e64: {
1206 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1207 return MI.getOperand(0).getReg() == Reg;
1217std::optional<int64_t>
1222 if (!
Op.isReg() || !
Op.getReg().isVirtual())
1223 return std::nullopt;
1226 if (Def && Def->isMoveImmediate()) {
1232 return std::nullopt;
1237 if (RI.isAGPRClass(DstRC))
1238 return AMDGPU::COPY;
1239 if (RI.getRegSizeInBits(*DstRC) == 16) {
1242 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1244 if (RI.getRegSizeInBits(*DstRC) == 32)
1245 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1246 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1247 return AMDGPU::S_MOV_B64;
1248 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1249 return AMDGPU::V_MOV_B64_PSEUDO;
1250 return AMDGPU::COPY;
1255 bool IsIndirectSrc)
const {
1256 if (IsIndirectSrc) {
1258 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1260 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1262 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1264 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1266 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1268 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6);
1270 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7);
1272 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1274 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1276 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1278 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1280 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1282 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1283 if (VecSize <= 1024)
1284 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1290 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1292 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1294 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1296 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1298 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1300 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6);
1302 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7);
1304 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1306 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1308 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1310 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1312 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1314 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1315 if (VecSize <= 1024)
1316 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1323 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1325 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1327 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1329 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1331 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1333 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1335 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1337 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1339 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1341 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1343 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1345 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1347 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1348 if (VecSize <= 1024)
1349 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1356 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1358 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1360 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1362 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1364 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1366 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1368 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1370 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1372 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1374 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1376 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1378 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1380 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1381 if (VecSize <= 1024)
1382 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1389 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1391 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1393 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1395 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1396 if (VecSize <= 1024)
1397 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1404 bool IsSGPR)
const {
1416 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1423 return NeedsCFI ? AMDGPU::SI_SPILL_S32_CFI_SAVE : AMDGPU::SI_SPILL_S32_SAVE;
1425 return NeedsCFI ? AMDGPU::SI_SPILL_S64_CFI_SAVE : AMDGPU::SI_SPILL_S64_SAVE;
1427 return NeedsCFI ? AMDGPU::SI_SPILL_S96_CFI_SAVE : AMDGPU::SI_SPILL_S96_SAVE;
1429 return NeedsCFI ? AMDGPU::SI_SPILL_S128_CFI_SAVE
1430 : AMDGPU::SI_SPILL_S128_SAVE;
1432 return NeedsCFI ? AMDGPU::SI_SPILL_S160_CFI_SAVE
1433 : AMDGPU::SI_SPILL_S160_SAVE;
1435 return NeedsCFI ? AMDGPU::SI_SPILL_S192_CFI_SAVE
1436 : AMDGPU::SI_SPILL_S192_SAVE;
1438 return NeedsCFI ? AMDGPU::SI_SPILL_S224_CFI_SAVE
1439 : AMDGPU::SI_SPILL_S224_SAVE;
1441 return AMDGPU::SI_SPILL_S256_SAVE;
1443 return AMDGPU::SI_SPILL_S288_SAVE;
1445 return AMDGPU::SI_SPILL_S320_SAVE;
1447 return AMDGPU::SI_SPILL_S352_SAVE;
1449 return AMDGPU::SI_SPILL_S384_SAVE;
1451 return NeedsCFI ? AMDGPU::SI_SPILL_S512_CFI_SAVE
1452 : AMDGPU::SI_SPILL_S512_SAVE;
1454 return NeedsCFI ? AMDGPU::SI_SPILL_S1024_CFI_SAVE
1455 : AMDGPU::SI_SPILL_S1024_SAVE;
1464 return AMDGPU::SI_SPILL_V16_SAVE;
1466 return NeedsCFI ? AMDGPU::SI_SPILL_V32_CFI_SAVE : AMDGPU::SI_SPILL_V32_SAVE;
1468 return NeedsCFI ? AMDGPU::SI_SPILL_V64_CFI_SAVE : AMDGPU::SI_SPILL_V64_SAVE;
1470 return NeedsCFI ? AMDGPU::SI_SPILL_V96_CFI_SAVE : AMDGPU::SI_SPILL_V96_SAVE;
1472 return NeedsCFI ? AMDGPU::SI_SPILL_V128_CFI_SAVE
1473 : AMDGPU::SI_SPILL_V128_SAVE;
1475 return NeedsCFI ? AMDGPU::SI_SPILL_V160_CFI_SAVE
1476 : AMDGPU::SI_SPILL_V160_SAVE;
1478 return NeedsCFI ? AMDGPU::SI_SPILL_V192_CFI_SAVE
1479 : AMDGPU::SI_SPILL_V192_SAVE;
1481 return NeedsCFI ? AMDGPU::SI_SPILL_V224_CFI_SAVE
1482 : AMDGPU::SI_SPILL_V224_SAVE;
1484 return NeedsCFI ? AMDGPU::SI_SPILL_V256_CFI_SAVE
1485 : AMDGPU::SI_SPILL_V256_SAVE;
1487 return NeedsCFI ? AMDGPU::SI_SPILL_V288_CFI_SAVE
1488 : AMDGPU::SI_SPILL_V288_SAVE;
1490 return NeedsCFI ? AMDGPU::SI_SPILL_V320_CFI_SAVE
1491 : AMDGPU::SI_SPILL_V320_SAVE;
1493 return NeedsCFI ? AMDGPU::SI_SPILL_V352_CFI_SAVE
1494 : AMDGPU::SI_SPILL_V352_SAVE;
1496 return NeedsCFI ? AMDGPU::SI_SPILL_V384_CFI_SAVE
1497 : AMDGPU::SI_SPILL_V384_SAVE;
1499 return NeedsCFI ? AMDGPU::SI_SPILL_V512_CFI_SAVE
1500 : AMDGPU::SI_SPILL_V512_SAVE;
1502 return NeedsCFI ? AMDGPU::SI_SPILL_V1024_CFI_SAVE
1503 : AMDGPU::SI_SPILL_V1024_SAVE;
1512 return NeedsCFI ? AMDGPU::SI_SPILL_AV32_CFI_SAVE
1513 : AMDGPU::SI_SPILL_AV32_SAVE;
1515 return NeedsCFI ? AMDGPU::SI_SPILL_AV64_CFI_SAVE
1516 : AMDGPU::SI_SPILL_AV64_SAVE;
1518 return NeedsCFI ? AMDGPU::SI_SPILL_AV96_CFI_SAVE
1519 : AMDGPU::SI_SPILL_AV96_SAVE;
1521 return NeedsCFI ? AMDGPU::SI_SPILL_AV128_CFI_SAVE
1522 : AMDGPU::SI_SPILL_AV128_SAVE;
1524 return NeedsCFI ? AMDGPU::SI_SPILL_AV160_CFI_SAVE
1525 : AMDGPU::SI_SPILL_AV160_SAVE;
1527 return NeedsCFI ? AMDGPU::SI_SPILL_AV192_CFI_SAVE
1528 : AMDGPU::SI_SPILL_AV192_SAVE;
1530 return NeedsCFI ? AMDGPU::SI_SPILL_AV224_CFI_SAVE
1531 : AMDGPU::SI_SPILL_AV224_SAVE;
1533 return NeedsCFI ? AMDGPU::SI_SPILL_AV256_CFI_SAVE
1534 : AMDGPU::SI_SPILL_AV256_SAVE;
1536 return AMDGPU::SI_SPILL_AV288_SAVE;
1538 return AMDGPU::SI_SPILL_AV320_SAVE;
1540 return AMDGPU::SI_SPILL_AV352_SAVE;
1542 return AMDGPU::SI_SPILL_AV384_SAVE;
1544 return NeedsCFI ? AMDGPU::SI_SPILL_AV512_CFI_SAVE
1545 : AMDGPU::SI_SPILL_AV512_SAVE;
1547 return NeedsCFI ? AMDGPU::SI_SPILL_AV1024_CFI_SAVE
1548 : AMDGPU::SI_SPILL_AV1024_SAVE;
1555 bool IsVectorSuperClass) {
1560 if (IsVectorSuperClass)
1561 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1563 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1569 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1576 if (ST.hasMAIInsts())
1582void SIInstrInfo::storeRegToStackSlotImpl(
1595 FrameInfo.getObjectAlign(FrameIndex));
1596 unsigned SpillSize = RI.getSpillSize(*RC);
1602 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1603 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1604 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1613 if (SrcReg.
isVirtual() && SpillSize == 4) {
1627 SpillSize, *MFI, NeedsCFI);
1642 storeRegToStackSlotImpl(
MBB,
MI, SrcReg, isKill, FrameIndex, RC, VReg, Flags,
1651 storeRegToStackSlotImpl(
MBB,
MI, SrcReg, isKill, FrameIndex, RC,
Register(),
1658 return AMDGPU::SI_SPILL_S32_RESTORE;
1660 return AMDGPU::SI_SPILL_S64_RESTORE;
1662 return AMDGPU::SI_SPILL_S96_RESTORE;
1664 return AMDGPU::SI_SPILL_S128_RESTORE;
1666 return AMDGPU::SI_SPILL_S160_RESTORE;
1668 return AMDGPU::SI_SPILL_S192_RESTORE;
1670 return AMDGPU::SI_SPILL_S224_RESTORE;
1672 return AMDGPU::SI_SPILL_S256_RESTORE;
1674 return AMDGPU::SI_SPILL_S288_RESTORE;
1676 return AMDGPU::SI_SPILL_S320_RESTORE;
1678 return AMDGPU::SI_SPILL_S352_RESTORE;
1680 return AMDGPU::SI_SPILL_S384_RESTORE;
1682 return AMDGPU::SI_SPILL_S512_RESTORE;
1684 return AMDGPU::SI_SPILL_S1024_RESTORE;
1693 return AMDGPU::SI_SPILL_V16_RESTORE;
1695 return AMDGPU::SI_SPILL_V32_RESTORE;
1697 return AMDGPU::SI_SPILL_V64_RESTORE;
1699 return AMDGPU::SI_SPILL_V96_RESTORE;
1701 return AMDGPU::SI_SPILL_V128_RESTORE;
1703 return AMDGPU::SI_SPILL_V160_RESTORE;
1705 return AMDGPU::SI_SPILL_V192_RESTORE;
1707 return AMDGPU::SI_SPILL_V224_RESTORE;
1709 return AMDGPU::SI_SPILL_V256_RESTORE;
1711 return AMDGPU::SI_SPILL_V288_RESTORE;
1713 return AMDGPU::SI_SPILL_V320_RESTORE;
1715 return AMDGPU::SI_SPILL_V352_RESTORE;
1717 return AMDGPU::SI_SPILL_V384_RESTORE;
1719 return AMDGPU::SI_SPILL_V512_RESTORE;
1721 return AMDGPU::SI_SPILL_V1024_RESTORE;
1730 return AMDGPU::SI_SPILL_AV32_RESTORE;
1732 return AMDGPU::SI_SPILL_AV64_RESTORE;
1734 return AMDGPU::SI_SPILL_AV96_RESTORE;
1736 return AMDGPU::SI_SPILL_AV128_RESTORE;
1738 return AMDGPU::SI_SPILL_AV160_RESTORE;
1740 return AMDGPU::SI_SPILL_AV192_RESTORE;
1742 return AMDGPU::SI_SPILL_AV224_RESTORE;
1744 return AMDGPU::SI_SPILL_AV256_RESTORE;
1746 return AMDGPU::SI_SPILL_AV288_RESTORE;
1748 return AMDGPU::SI_SPILL_AV320_RESTORE;
1750 return AMDGPU::SI_SPILL_AV352_RESTORE;
1752 return AMDGPU::SI_SPILL_AV384_RESTORE;
1754 return AMDGPU::SI_SPILL_AV512_RESTORE;
1756 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1763 bool IsVectorSuperClass) {
1768 if (IsVectorSuperClass)
1769 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1771 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1777 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1784 if (ST.hasMAIInsts())
1787 assert(!RI.isAGPRClass(RC));
1801 unsigned SpillSize = RI.getSpillSize(*RC);
1808 FrameInfo.getObjectAlign(FrameIndex));
1810 if (RI.isSGPRClass(RC)) {
1813 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1814 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1815 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1820 if (DestReg.
isVirtual() && SpillSize == 4) {
1849 unsigned Quantity)
const {
1851 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1852 while (Quantity > 0) {
1853 unsigned Arg = std::min(Quantity, MaxSNopCount);
1864 constexpr unsigned DoorbellIDMask = 0x3ff;
1865 constexpr unsigned ECQueueWaveAbort = 0x400;
1870 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
1871 MBB.splitAt(
MI,
false);
1875 MBB.addSuccessor(TrapBB);
1885 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
1889 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
1894 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
1895 .
addUse(DoorbellRegMasked)
1896 .
addImm(ECQueueWaveAbort);
1897 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
1898 .
addUse(SetWaveAbortBit);
1901 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
1912 return MBB.getNextNode();
1916 switch (
MI.getOpcode()) {
1918 if (
MI.isMetaInstruction())
1923 return MI.getOperand(0).getImm() + 1;
1933 switch (
MI.getOpcode()) {
1935 case AMDGPU::S_MOV_B64_term:
1938 MI.setDesc(
get(AMDGPU::S_MOV_B64));
1941 case AMDGPU::S_MOV_B32_term:
1944 MI.setDesc(
get(AMDGPU::S_MOV_B32));
1947 case AMDGPU::S_XOR_B64_term:
1950 MI.setDesc(
get(AMDGPU::S_XOR_B64));
1953 case AMDGPU::S_XOR_B32_term:
1956 MI.setDesc(
get(AMDGPU::S_XOR_B32));
1958 case AMDGPU::S_OR_B64_term:
1961 MI.setDesc(
get(AMDGPU::S_OR_B64));
1963 case AMDGPU::S_OR_B32_term:
1966 MI.setDesc(
get(AMDGPU::S_OR_B32));
1969 case AMDGPU::S_ANDN2_B64_term:
1972 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
1975 case AMDGPU::S_ANDN2_B32_term:
1978 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
1981 case AMDGPU::S_AND_B64_term:
1984 MI.setDesc(
get(AMDGPU::S_AND_B64));
1987 case AMDGPU::S_AND_B32_term:
1990 MI.setDesc(
get(AMDGPU::S_AND_B32));
1993 case AMDGPU::S_AND_SAVEEXEC_B64_term:
1996 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
1999 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2002 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2005 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2006 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2009 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2010 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2012 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2016 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2019 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2022 int64_t Imm =
MI.getOperand(1).getImm();
2024 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2025 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2030 MI.eraseFromParent();
2036 case AMDGPU::V_MOV_B64_PSEUDO: {
2038 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2039 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2047 if (ST.hasVMovB64Inst() && Mov64RC->
contains(Dst)) {
2048 MI.setDesc(Mov64Desc);
2053 if (
SrcOp.isImm()) {
2055 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2056 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2080 if (ST.hasPkMovB32() &&
2099 MI.eraseFromParent();
2102 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2106 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2110 if (ST.has64BitLiterals()) {
2111 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2117 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2122 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2123 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2125 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2126 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2131 MI.eraseFromParent();
2134 case AMDGPU::V_SET_INACTIVE_B32: {
2138 .
add(
MI.getOperand(3))
2139 .
add(
MI.getOperand(4))
2140 .
add(
MI.getOperand(1))
2141 .
add(
MI.getOperand(2))
2142 .
add(
MI.getOperand(5));
2143 MI.eraseFromParent();
2146 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2147 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2148 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2149 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2150 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2151 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2152 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2153 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2154 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2155 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2156 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2157 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2158 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2159 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2160 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2161 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2162 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2163 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2164 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2165 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2166 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2167 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2168 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2169 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2170 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2171 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2172 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2173 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2174 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2175 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2176 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2177 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2178 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2182 if (RI.hasVGPRs(EltRC)) {
2183 Opc = AMDGPU::V_MOVRELD_B32_e32;
2185 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2186 : AMDGPU::S_MOVRELD_B32;
2191 bool IsUndef =
MI.getOperand(1).isUndef();
2192 unsigned SubReg =
MI.getOperand(3).getImm();
2193 assert(VecReg ==
MI.getOperand(1).getReg());
2198 .
add(
MI.getOperand(2))
2202 const int ImpDefIdx =
2204 const int ImpUseIdx = ImpDefIdx + 1;
2206 MI.eraseFromParent();
2209 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2210 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2211 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2212 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2213 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2214 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6:
2215 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7:
2216 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2217 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2218 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2219 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2220 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2221 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2222 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2223 assert(ST.useVGPRIndexMode());
2225 bool IsUndef =
MI.getOperand(1).isUndef();
2234 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2238 .
add(
MI.getOperand(2))
2242 const int ImpDefIdx =
2244 const int ImpUseIdx = ImpDefIdx + 1;
2251 MI.eraseFromParent();
2254 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2255 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2256 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2257 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2258 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2259 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6:
2260 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7:
2261 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2262 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2263 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2264 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2265 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2266 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2267 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2268 assert(ST.useVGPRIndexMode());
2271 bool IsUndef =
MI.getOperand(1).isUndef();
2275 .
add(
MI.getOperand(2))
2288 MI.eraseFromParent();
2291 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2294 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2295 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2314 if (ST.hasGetPCZeroExtension()) {
2318 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2325 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2335 MI.eraseFromParent();
2338 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2348 Op.setOffset(
Op.getOffset() + 4);
2350 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2354 MI.eraseFromParent();
2357 case AMDGPU::ENTER_STRICT_WWM: {
2363 case AMDGPU::ENTER_STRICT_WQM: {
2370 MI.eraseFromParent();
2373 case AMDGPU::EXIT_STRICT_WWM:
2374 case AMDGPU::EXIT_STRICT_WQM: {
2380 case AMDGPU::SI_RETURN: {
2394 MI.eraseFromParent();
2398 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2399 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2400 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2403 case AMDGPU::S_GETPC_B64_pseudo:
2404 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2405 if (ST.hasGetPCZeroExtension()) {
2407 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2416 case AMDGPU::V_MAX_BF16_PSEUDO_e64: {
2417 assert(ST.hasBF16PackedInsts());
2418 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2429 case AMDGPU::GET_STACK_BASE:
2432 if (ST.getFrameLowering()->mayReserveScratchForCWSR(*
MBB.getParent())) {
2439 Register DestReg =
MI.getOperand(0).getReg();
2449 MI.getOperand(
MI.getNumExplicitOperands()).setIsDead(
false);
2450 MI.getOperand(
MI.getNumExplicitOperands()).setIsUse();
2451 MI.setDesc(
get(AMDGPU::S_CMOVK_I32));
2454 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2457 MI.getNumExplicitOperands());
2475 case AMDGPU::S_MOV_B64:
2476 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2485 if (UsedLanes.
all())
2490 unsigned LoSubReg = RI.composeSubRegIndices(OrigSubReg, AMDGPU::sub0);
2491 unsigned HiSubReg = RI.composeSubRegIndices(OrigSubReg, AMDGPU::sub1);
2493 bool NeedLo = (UsedLanes & RI.getSubRegIndexLaneMask(LoSubReg)).any();
2494 bool NeedHi = (UsedLanes & RI.getSubRegIndexLaneMask(HiSubReg)).any();
2496 if (NeedLo && NeedHi)
2500 int32_t Imm32 = NeedLo ?
Lo_32(Imm64) :
Hi_32(Imm64);
2502 unsigned UseSubReg = NeedLo ? LoSubReg : HiSubReg;
2511 case AMDGPU::S_LOAD_DWORDX16_IMM:
2512 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2525 for (
auto &CandMO :
I->operands()) {
2526 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2534 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2538 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2544 unsigned NewOpcode = -1;
2545 if (SubregSize == 256)
2546 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2547 else if (SubregSize == 128)
2548 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2558 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2563 MI->getOperand(0).setReg(DestReg);
2564 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2568 OffsetMO->
setImm(FinalOffset);
2574 MI->setMemRefs(*MF, NewMMOs);
2587std::pair<MachineInstr*, MachineInstr*>
2589 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2591 if (ST.hasVMovB64Inst() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2594 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2595 return std::pair(&
MI,
nullptr);
2606 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2608 if (Dst.isPhysical()) {
2609 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2616 for (
unsigned I = 1;
I <= 2; ++
I) {
2619 if (
SrcOp.isImm()) {
2621 Imm.ashrInPlace(Part * 32);
2622 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2626 if (Src.isPhysical())
2627 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2634 MovDPP.addImm(MO.getImm());
2636 Split[Part] = MovDPP;
2640 if (Dst.isVirtual())
2647 MI.eraseFromParent();
2648 return std::pair(Split[0], Split[1]);
2651std::optional<DestSourcePair>
2653 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2656 return std::nullopt;
2660 AMDGPU::OpName Src0OpName,
2662 AMDGPU::OpName Src1OpName)
const {
2669 "All commutable instructions have both src0 and src1 modifiers");
2671 int Src0ModsVal = Src0Mods->
getImm();
2672 int Src1ModsVal = Src1Mods->
getImm();
2674 Src1Mods->
setImm(Src0ModsVal);
2675 Src0Mods->
setImm(Src1ModsVal);
2684 bool IsKill = RegOp.
isKill();
2686 bool IsUndef = RegOp.
isUndef();
2687 bool IsDebug = RegOp.
isDebug();
2689 if (NonRegOp.
isImm())
2691 else if (NonRegOp.
isFI())
2712 int64_t NonRegVal = NonRegOp1.
getImm();
2715 NonRegOp2.
setImm(NonRegVal);
2722 unsigned OpIdx1)
const {
2727 unsigned Opc =
MI.getOpcode();
2728 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2738 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2741 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2746 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2752 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2767 unsigned Src1Idx)
const {
2768 assert(!NewMI &&
"this should never be used");
2770 unsigned Opc =
MI.getOpcode();
2772 if (CommutedOpcode == -1)
2775 if (Src0Idx > Src1Idx)
2778 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2779 static_cast<int>(Src0Idx) &&
2780 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2781 static_cast<int>(Src1Idx) &&
2782 "inconsistency with findCommutedOpIndices");
2807 Src1, AMDGPU::OpName::src1_modifiers);
2810 AMDGPU::OpName::src1_sel);
2822 unsigned &SrcOpIdx0,
2823 unsigned &SrcOpIdx1)
const {
2828 unsigned &SrcOpIdx0,
2829 unsigned &SrcOpIdx1)
const {
2830 if (!
Desc.isCommutable())
2833 unsigned Opc =
Desc.getOpcode();
2834 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2838 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2842 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2846 int64_t BrOffset)
const {
2863 return MI.getOperand(0).getMBB();
2868 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2869 MI.getOpcode() == AMDGPU::SI_LOOP)
2881 "new block should be inserted for expanding unconditional branch");
2884 "restore block should be inserted for restoring clobbered registers");
2892 if (ST.useAddPC64Inst()) {
2894 MCCtx.createTempSymbol(
"offset",
true);
2898 MCCtx.createTempSymbol(
"post_addpc",
true);
2899 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2903 Offset->setVariableValue(OffsetExpr);
2907 assert(RS &&
"RegScavenger required for long branching");
2915 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
2916 ST.hasVALUReadSGPRHazard();
2917 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2918 if (FlushSGPRWrites)
2926 ApplyHazardWorkarounds();
2929 MCCtx.createTempSymbol(
"post_getpc",
true);
2933 MCCtx.createTempSymbol(
"offset_lo",
true);
2935 MCCtx.createTempSymbol(
"offset_hi",
true);
2938 .
addReg(PCReg, {}, AMDGPU::sub0)
2942 .
addReg(PCReg, {}, AMDGPU::sub1)
2944 ApplyHazardWorkarounds();
2985 if (LongBranchReservedReg) {
2986 RS->enterBasicBlock(
MBB);
2987 Scav = LongBranchReservedReg;
2989 RS->enterBasicBlockEnd(
MBB);
2990 Scav = RS->scavengeRegisterBackwards(
2995 RS->setRegUsed(Scav);
3003 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3020unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3022 case SIInstrInfo::SCC_TRUE:
3023 return AMDGPU::S_CBRANCH_SCC1;
3024 case SIInstrInfo::SCC_FALSE:
3025 return AMDGPU::S_CBRANCH_SCC0;
3026 case SIInstrInfo::VCCNZ:
3027 return AMDGPU::S_CBRANCH_VCCNZ;
3028 case SIInstrInfo::VCCZ:
3029 return AMDGPU::S_CBRANCH_VCCZ;
3030 case SIInstrInfo::EXECNZ:
3031 return AMDGPU::S_CBRANCH_EXECNZ;
3032 case SIInstrInfo::EXECZ:
3033 return AMDGPU::S_CBRANCH_EXECZ;
3039SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3041 case AMDGPU::S_CBRANCH_SCC0:
3043 case AMDGPU::S_CBRANCH_SCC1:
3045 case AMDGPU::S_CBRANCH_VCCNZ:
3047 case AMDGPU::S_CBRANCH_VCCZ:
3049 case AMDGPU::S_CBRANCH_EXECNZ:
3051 case AMDGPU::S_CBRANCH_EXECZ:
3063 bool AllowModify)
const {
3064 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3066 TBB =
I->getOperand(0).getMBB();
3070 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3071 if (Pred == INVALID_BR)
3076 Cond.push_back(
I->getOperand(1));
3080 if (
I ==
MBB.end()) {
3086 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3088 FBB =
I->getOperand(0).getMBB();
3098 bool AllowModify)
const {
3106 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3107 switch (
I->getOpcode()) {
3108 case AMDGPU::S_MOV_B64_term:
3109 case AMDGPU::S_XOR_B64_term:
3110 case AMDGPU::S_OR_B64_term:
3111 case AMDGPU::S_ANDN2_B64_term:
3112 case AMDGPU::S_AND_B64_term:
3113 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3114 case AMDGPU::S_MOV_B32_term:
3115 case AMDGPU::S_XOR_B32_term:
3116 case AMDGPU::S_OR_B32_term:
3117 case AMDGPU::S_ANDN2_B32_term:
3118 case AMDGPU::S_AND_B32_term:
3119 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3122 case AMDGPU::SI_ELSE:
3123 case AMDGPU::SI_KILL_I1_TERMINATOR:
3124 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3141 int *BytesRemoved)
const {
3143 unsigned RemovedSize = 0;
3146 if (
MI.isBranch() ||
MI.isReturn()) {
3148 MI.eraseFromParent();
3154 *BytesRemoved = RemovedSize;
3171 int *BytesAdded)
const {
3172 if (!FBB &&
Cond.empty()) {
3176 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3183 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3195 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3213 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3220 if (
Cond.size() != 2) {
3224 if (
Cond[0].isImm()) {
3235 Register FalseReg,
int &CondCycles,
3236 int &TrueCycles,
int &FalseCycles)
const {
3246 CondCycles = TrueCycles = FalseCycles = NumInsts;
3249 return RI.hasVGPRs(RC) && NumInsts <= 6;
3263 if (NumInsts % 2 == 0)
3266 CondCycles = TrueCycles = FalseCycles = NumInsts;
3267 return RI.isSGPRClass(RC);
3278 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3279 if (Pred == VCCZ || Pred == SCC_FALSE) {
3280 Pred =
static_cast<BranchPredicate
>(-Pred);
3286 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3288 if (DstSize == 32) {
3290 if (Pred == SCC_TRUE) {
3305 if (DstSize == 64 && Pred == SCC_TRUE) {
3315 static const int16_t Sub0_15[] = {
3316 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3317 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3318 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3319 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3322 static const int16_t Sub0_15_64[] = {
3323 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3324 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3325 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3326 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3329 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3331 const int16_t *SubIndices = Sub0_15;
3332 int NElts = DstSize / 32;
3336 if (Pred == SCC_TRUE) {
3338 SelOp = AMDGPU::S_CSELECT_B32;
3339 EltRC = &AMDGPU::SGPR_32RegClass;
3341 SelOp = AMDGPU::S_CSELECT_B64;
3342 EltRC = &AMDGPU::SGPR_64RegClass;
3343 SubIndices = Sub0_15_64;
3349 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3354 for (
int Idx = 0; Idx != NElts; ++Idx) {
3358 unsigned SubIdx = SubIndices[Idx];
3361 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3363 .
addReg(FalseReg, {}, SubIdx)
3364 .addReg(TrueReg, {}, SubIdx);
3367 .
addReg(TrueReg, {}, SubIdx)
3368 .addReg(FalseReg, {}, SubIdx);
3381 if (
MI.isBranch() ||
MI.isCall() ||
MI.isReturn() ||
MI.isIndirectBranch())
3384 switch (
MI.getOpcode()) {
3385 case AMDGPU::S_ENDPGM:
3386 case AMDGPU::S_ENDPGM_SAVED:
3387 case AMDGPU::S_TRAP:
3388 case AMDGPU::S_GETREG_B32:
3389 case AMDGPU::S_SETREG_B32:
3390 case AMDGPU::S_SETREG_B32_mode:
3391 case AMDGPU::S_SETREG_IMM32_B32:
3392 case AMDGPU::S_SETREG_IMM32_B32_mode:
3393 case AMDGPU::S_SENDMSG:
3394 case AMDGPU::S_SENDMSGHALT:
3395 case AMDGPU::S_SENDMSG_RTN_B32:
3396 case AMDGPU::S_SENDMSG_RTN_B64:
3397 case AMDGPU::S_BARRIER_WAIT:
3398 case AMDGPU::S_BARRIER_SIGNAL_M0:
3399 case AMDGPU::S_BARRIER_SIGNAL_IMM:
3400 case AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0:
3401 case AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM:
3409 switch (
MI.getOpcode()) {
3410 case AMDGPU::V_MOV_B16_t16_e32:
3411 case AMDGPU::V_MOV_B16_t16_e64:
3412 case AMDGPU::V_MOV_B32_e32:
3413 case AMDGPU::V_MOV_B32_e64:
3414 case AMDGPU::V_MOV_B64_PSEUDO:
3415 case AMDGPU::V_MOV_B64_e32:
3416 case AMDGPU::V_MOV_B64_e64:
3417 case AMDGPU::S_MOV_B32:
3418 case AMDGPU::S_MOV_B64:
3419 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3421 case AMDGPU::WWM_COPY:
3422 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3423 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3424 case AMDGPU::V_ACCVGPR_MOV_B32:
3425 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3426 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3434 switch (
MI.getOpcode()) {
3435 case AMDGPU::V_MOV_B16_t16_e32:
3436 case AMDGPU::V_MOV_B16_t16_e64:
3438 case AMDGPU::V_MOV_B32_e32:
3439 case AMDGPU::V_MOV_B32_e64:
3440 case AMDGPU::V_MOV_B64_PSEUDO:
3441 case AMDGPU::V_MOV_B64_e32:
3442 case AMDGPU::V_MOV_B64_e64:
3443 case AMDGPU::S_MOV_B32:
3444 case AMDGPU::S_MOV_B64:
3445 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3447 case AMDGPU::WWM_COPY:
3448 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3449 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3450 case AMDGPU::V_ACCVGPR_MOV_B32:
3451 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3452 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3460 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3461 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3462 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3465 unsigned Opc =
MI.getOpcode();
3467 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3469 MI.removeOperand(Idx);
3475 MI.setDesc(NewDesc);
3481 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
3482 Desc.implicit_defs().size();
3484 for (
unsigned I =
MI.getNumOperands() - 1;
I >=
NumOps; --
I)
3485 MI.removeOperand(
I);
3489 unsigned SubRegIndex) {
3490 switch (SubRegIndex) {
3491 case AMDGPU::NoSubRegister:
3501 case AMDGPU::sub1_lo16:
3503 case AMDGPU::sub1_hi16:
3506 return std::nullopt;
3514 case AMDGPU::V_MAC_F16_e32:
3515 case AMDGPU::V_MAC_F16_e64:
3516 case AMDGPU::V_MAD_F16_e64:
3517 return AMDGPU::V_MADAK_F16;
3518 case AMDGPU::V_MAC_F32_e32:
3519 case AMDGPU::V_MAC_F32_e64:
3520 case AMDGPU::V_MAD_F32_e64:
3521 return AMDGPU::V_MADAK_F32;
3522 case AMDGPU::V_FMAC_F32_e32:
3523 case AMDGPU::V_FMAC_F32_e64:
3524 case AMDGPU::V_FMA_F32_e64:
3525 return AMDGPU::V_FMAAK_F32;
3526 case AMDGPU::V_FMAC_F16_e32:
3527 case AMDGPU::V_FMAC_F16_e64:
3528 case AMDGPU::V_FMAC_F16_t16_e64:
3529 case AMDGPU::V_FMAC_F16_fake16_e64:
3530 case AMDGPU::V_FMAC_F16_t16_e32:
3531 case AMDGPU::V_FMAC_F16_fake16_e32:
3532 case AMDGPU::V_FMA_F16_e64:
3533 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3534 ? AMDGPU::V_FMAAK_F16_t16
3535 : AMDGPU::V_FMAAK_F16_fake16
3536 : AMDGPU::V_FMAAK_F16;
3537 case AMDGPU::V_FMAC_F64_e32:
3538 case AMDGPU::V_FMAC_F64_e64:
3539 case AMDGPU::V_FMA_F64_e64:
3540 return AMDGPU::V_FMAAK_F64;
3548 case AMDGPU::V_MAC_F16_e32:
3549 case AMDGPU::V_MAC_F16_e64:
3550 case AMDGPU::V_MAD_F16_e64:
3551 return AMDGPU::V_MADMK_F16;
3552 case AMDGPU::V_MAC_F32_e32:
3553 case AMDGPU::V_MAC_F32_e64:
3554 case AMDGPU::V_MAD_F32_e64:
3555 return AMDGPU::V_MADMK_F32;
3556 case AMDGPU::V_FMAC_F32_e32:
3557 case AMDGPU::V_FMAC_F32_e64:
3558 case AMDGPU::V_FMA_F32_e64:
3559 return AMDGPU::V_FMAMK_F32;
3560 case AMDGPU::V_FMAC_F16_e32:
3561 case AMDGPU::V_FMAC_F16_e64:
3562 case AMDGPU::V_FMAC_F16_t16_e64:
3563 case AMDGPU::V_FMAC_F16_fake16_e64:
3564 case AMDGPU::V_FMAC_F16_t16_e32:
3565 case AMDGPU::V_FMAC_F16_fake16_e32:
3566 case AMDGPU::V_FMA_F16_e64:
3567 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3568 ? AMDGPU::V_FMAMK_F16_t16
3569 : AMDGPU::V_FMAMK_F16_fake16
3570 : AMDGPU::V_FMAMK_F16;
3571 case AMDGPU::V_FMAC_F64_e32:
3572 case AMDGPU::V_FMAC_F64_e64:
3573 case AMDGPU::V_FMA_F64_e64:
3574 return AMDGPU::V_FMAMK_F64;
3588 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3591 if (
Opc == AMDGPU::COPY) {
3592 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3599 if (HasMultipleUses) {
3602 unsigned ImmDefSize = RI.getRegSizeInBits(*MRI->
getRegClass(Reg));
3605 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3613 if (ImmDefSize == 32 &&
3618 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3619 RI.getSubRegIdxSize(UseSubReg) == 16;
3622 if (RI.hasVGPRs(DstRC))
3625 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3631 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3638 for (
unsigned MovOp :
3639 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3640 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3648 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3652 if (MovDstPhysReg) {
3656 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3663 if (MovDstPhysReg) {
3664 if (!MovDstRC->
contains(MovDstPhysReg))
3680 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3688 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3692 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3694 UseMI.getOperand(0).setReg(MovDstPhysReg);
3699 UseMI.setDesc(NewMCID);
3700 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3701 UseMI.addImplicitDefUseOperands(*MF);
3705 if (HasMultipleUses)
3708 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3709 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3710 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3711 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3712 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3713 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3714 Opc == AMDGPU::V_FMAC_F64_e64) {
3723 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3734 auto CopyRegOperandToNarrowerRC =
3737 if (!
MI.getOperand(OpNo).isReg())
3741 if (RI.getCommonSubClass(RC, NewRC) != NewRC)
3744 BuildMI(*
MI.getParent(),
MI.getIterator(),
MI.getDebugLoc(),
3745 get(AMDGPU::COPY), Tmp)
3747 MI.getOperand(OpNo).setReg(Tmp);
3748 MI.getOperand(OpNo).setIsKill();
3755 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3756 if (!RegSrc->
isReg())
3759 ST.getConstantBusLimit(
Opc) < 2)
3774 if (Def && Def->isMoveImmediate() &&
3789 unsigned SrcSubReg = RegSrc->
getSubReg();
3794 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3795 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3796 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3797 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3798 UseMI.untieRegOperand(
3799 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3806 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3807 NewOpc == AMDGPU::V_FMAMK_F16_fake16) {
3811 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3812 UseMI.getOperand(0).getReg())
3814 UseMI.getOperand(0).setReg(Tmp);
3815 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3816 CopyRegOperandToNarrowerRC(
UseMI, 3, NewRC);
3821 DefMI.eraseFromParent();
3828 if (ST.getConstantBusLimit(
Opc) < 2) {
3831 bool Src0Inlined =
false;
3832 if (Src0->
isReg()) {
3837 if (Def && Def->isMoveImmediate() &&
3842 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3843 RI.isSGPRReg(*MRI, Src0->
getReg())) {
3849 if (Src1->
isReg() && !Src0Inlined) {
3852 if (Def && Def->isMoveImmediate() &&
3856 else if (RI.isSGPRReg(*MRI, Src1->
getReg()))
3869 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3870 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3871 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3872 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3873 UseMI.untieRegOperand(
3874 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3876 const std::optional<int64_t> SubRegImm =
3886 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3887 NewOpc == AMDGPU::V_FMAAK_F16_fake16) {
3891 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3892 UseMI.getOperand(0).getReg())
3894 UseMI.getOperand(0).setReg(Tmp);
3895 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3896 CopyRegOperandToNarrowerRC(
UseMI, 2, NewRC);
3906 DefMI.eraseFromParent();
3918 if (BaseOps1.
size() != BaseOps2.
size())
3920 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3921 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3929 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3930 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3931 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3933 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3936bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3939 int64_t Offset0, Offset1;
3942 bool Offset0IsScalable, Offset1IsScalable;
3956 LocationSize Width0 = MIa.
memoperands().front()->getSize();
3957 LocationSize Width1 = MIb.
memoperands().front()->getSize();
3964 "MIa must load from or modify a memory location");
3966 "MIb must load from or modify a memory location");
3988 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3995 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4005 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4019 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4030 if (
Reg.isPhysical())
4034 Imm = Def->getOperand(1).getImm();
4054 unsigned NumOps =
MI.getNumOperands();
4057 if (
Op.isReg() &&
Op.isKill())
4065 case AMDGPU::V_MAC_F16_e32:
4066 case AMDGPU::V_MAC_F16_e64:
4067 return AMDGPU::V_MAD_F16_e64;
4068 case AMDGPU::V_MAC_F32_e32:
4069 case AMDGPU::V_MAC_F32_e64:
4070 return AMDGPU::V_MAD_F32_e64;
4071 case AMDGPU::V_MAC_LEGACY_F32_e32:
4072 case AMDGPU::V_MAC_LEGACY_F32_e64:
4073 return AMDGPU::V_MAD_LEGACY_F32_e64;
4074 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4075 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4076 return AMDGPU::V_FMA_LEGACY_F32_e64;
4077 case AMDGPU::V_FMAC_F16_e32:
4078 case AMDGPU::V_FMAC_F16_e64:
4079 case AMDGPU::V_FMAC_F16_t16_e64:
4080 case AMDGPU::V_FMAC_F16_fake16_e64:
4081 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4082 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4083 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4084 : AMDGPU::V_FMA_F16_gfx9_e64;
4085 case AMDGPU::V_FMAC_F32_e32:
4086 case AMDGPU::V_FMAC_F32_e64:
4087 return AMDGPU::V_FMA_F32_e64;
4088 case AMDGPU::V_FMAC_F64_e32:
4089 case AMDGPU::V_FMAC_F64_e64:
4090 return AMDGPU::V_FMA_F64_e64;
4110 if (
MI.isBundle()) {
4113 if (
MI.getBundleSize() != 1)
4115 CandidateMI =
MI.getNextNode();
4119 MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4123 if (
MI.isBundle()) {
4128 MI.untieRegOperand(MO.getOperandNo());
4136 if (Def.isEarlyClobber() && Def.isReg() &&
4141 auto UpdateDefIndex = [&](
LiveRange &LR) {
4142 auto *S = LR.find(OldIndex);
4143 if (S != LR.end() && S->start == OldIndex) {
4144 assert(S->valno && S->valno->def == OldIndex);
4145 S->start = NewIndex;
4146 S->valno->def = NewIndex;
4150 for (
auto &SR : LI.subranges())
4156 if (U.RemoveMIUse) {
4159 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4163 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4164 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4165 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4166 U.RemoveMIUse->removeOperand(
I);
4171 if (
MI.isBundle()) {
4175 if (MO.isReg() && MO.getReg() == DefReg) {
4176 assert(MO.getSubReg() == 0 &&
4177 "tied sub-registers in bundles currently not supported");
4178 MI.removeOperand(MO.getOperandNo());
4195 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4196 MIOp.setIsUndef(
true);
4197 MIOp.setReg(DummyReg);
4201 if (
MI.isBundle()) {
4205 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4206 MIOp.setIsUndef(
true);
4207 MIOp.setReg(DummyReg);
4220 return MI.isBundle() ? &
MI : NewMI;
4225 ThreeAddressUpdates &U)
const {
4227 unsigned Opc =
MI.getOpcode();
4231 if (NewMFMAOpc != -1) {
4234 for (
unsigned I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I)
4235 MIB.
add(
MI.getOperand(
I));
4243 for (
unsigned I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I)
4248 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4249 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4250 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4254 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4255 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4256 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4257 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4258 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4259 bool Src0Literal =
false;
4264 case AMDGPU::V_MAC_F16_e64:
4265 case AMDGPU::V_FMAC_F16_e64:
4266 case AMDGPU::V_FMAC_F16_t16_e64:
4267 case AMDGPU::V_FMAC_F16_fake16_e64:
4268 case AMDGPU::V_MAC_F32_e64:
4269 case AMDGPU::V_MAC_LEGACY_F32_e64:
4270 case AMDGPU::V_FMAC_F32_e64:
4271 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4272 case AMDGPU::V_FMAC_F64_e64:
4274 case AMDGPU::V_MAC_F16_e32:
4275 case AMDGPU::V_FMAC_F16_e32:
4276 case AMDGPU::V_MAC_F32_e32:
4277 case AMDGPU::V_MAC_LEGACY_F32_e32:
4278 case AMDGPU::V_FMAC_F32_e32:
4279 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4280 case AMDGPU::V_FMAC_F64_e32: {
4281 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4282 AMDGPU::OpName::src0);
4283 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4294 MachineInstrBuilder MIB;
4297 const MachineOperand *Src0Mods =
4300 const MachineOperand *Src1Mods =
4303 const MachineOperand *Src2Mods =
4309 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4310 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4312 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4314 MachineInstr *
DefMI;
4350 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4366 if (Src0Literal && !ST.hasVOP3Literal())
4394 switch (
MI.getOpcode()) {
4395 case AMDGPU::S_SET_GPR_IDX_ON:
4396 case AMDGPU::S_SET_GPR_IDX_MODE:
4397 case AMDGPU::S_SET_GPR_IDX_OFF:
4415 if (
MI.isTerminator() ||
MI.isPosition())
4419 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4422 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4428 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4429 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4430 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4431 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4432 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4437 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4438 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4439 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4453 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4458 if (
MI.memoperands_empty())
4463 unsigned AS = Memop->getAddrSpace();
4464 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4465 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4466 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4467 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4482 if (
MI.memoperands_empty())
4491 unsigned AS = Memop->getAddrSpace();
4508 if (ST.isTgSplitEnabled())
4513 if (
MI.memoperands_empty())
4518 unsigned AS = Memop->getAddrSpace();
4534 unsigned Opcode =
MI.getOpcode();
4549 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4550 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4551 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT ||
4552 Opcode == AMDGPU::S_SETHALT)
4555 if (
MI.isCall() ||
MI.isInlineAsm())
4571 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4572 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4573 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4574 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4582 if (
MI.isMetaInstruction())
4586 if (
MI.isCopyLike()) {
4587 if (!RI.isSGPRReg(MRI,
MI.getOperand(0).getReg()))
4591 return MI.readsRegister(AMDGPU::EXEC, &RI);
4602 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4606 switch (Imm.getBitWidth()) {
4612 ST.hasInv2PiInlineImm());
4615 ST.hasInv2PiInlineImm());
4617 return ST.has16BitInsts() &&
4619 ST.hasInv2PiInlineImm());
4626 APInt IntImm = Imm.bitcastToAPInt();
4628 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4636 return ST.has16BitInsts() &&
4639 return ST.has16BitInsts() &&
4649 switch (OperandType) {
4659 int32_t Trunc =
static_cast<int32_t
>(Imm);
4703 int16_t Trunc =
static_cast<int16_t
>(Imm);
4704 return ST.has16BitInsts() &&
4713 int16_t Trunc =
static_cast<int16_t
>(Imm);
4714 return ST.has16BitInsts() &&
4765 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4771 return ST.hasVOP3Literal();
4775 int64_t ImmVal)
const {
4778 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4779 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4780 AMDGPU::OpName::src2))
4782 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4794 "unexpected imm-like operand kind");
4807 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4825 AMDGPU::OpName
OpName)
const {
4827 return Mods && Mods->
getImm();
4840 switch (
MI.getOpcode()) {
4841 default:
return false;
4843 case AMDGPU::V_ADDC_U32_e64:
4844 case AMDGPU::V_SUBB_U32_e64:
4845 case AMDGPU::V_SUBBREV_U32_e64: {
4848 if (!Src1->
isReg() || !RI.isVGPR(MRI, Src1->
getReg()))
4853 case AMDGPU::V_MAC_F16_e64:
4854 case AMDGPU::V_MAC_F32_e64:
4855 case AMDGPU::V_MAC_LEGACY_F32_e64:
4856 case AMDGPU::V_FMAC_F16_e64:
4857 case AMDGPU::V_FMAC_F16_t16_e64:
4858 case AMDGPU::V_FMAC_F16_fake16_e64:
4859 case AMDGPU::V_FMAC_F32_e64:
4860 case AMDGPU::V_FMAC_F64_e64:
4861 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4862 if (!Src2->
isReg() || !RI.isVGPR(MRI, Src2->
getReg()) ||
4867 case AMDGPU::V_CNDMASK_B32_e64:
4873 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(MRI, Src1->
getReg()) ||
4903 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4912 unsigned Op32)
const {
4926 Inst32.
add(
MI.getOperand(
I));
4930 int Idx =
MI.getNumExplicitDefs();
4932 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
4937 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
4959 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
4967 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
4970 return AMDGPU::SReg_32RegClass.contains(Reg) ||
4971 AMDGPU::SReg_64RegClass.contains(Reg);
4999 switch (MO.getReg()) {
5001 case AMDGPU::VCC_LO:
5002 case AMDGPU::VCC_HI:
5004 case AMDGPU::FLAT_SCR:
5017 switch (
MI.getOpcode()) {
5018 case AMDGPU::V_READLANE_B32:
5019 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
5020 case AMDGPU::V_WRITELANE_B32:
5021 case AMDGPU::SI_SPILL_S32_TO_VGPR:
5028 if (
MI.isPreISelOpcode() ||
5029 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
5047 return SubReg.
getSubReg() != AMDGPU::NoSubRegister &&
5058 if (RI.isVectorRegister(MRI, SrcReg) && RI.isSGPRReg(MRI, DstReg)) {
5059 ErrInfo =
"illegal copy from vector register to SGPR";
5077 if (!MRI.
isSSA() &&
MI.isCopy())
5078 return verifyCopy(
MI, MRI, ErrInfo);
5080 if (SIInstrInfo::isGenericOpcode(Opcode))
5083 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
5084 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
5085 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
5087 if (Src0Idx == -1) {
5089 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
5090 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
5091 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
5092 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
5097 if (!
Desc.isVariadic() &&
5098 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
5099 ErrInfo =
"Instruction has wrong number of operands.";
5103 if (
MI.isInlineAsm()) {
5116 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5117 ErrInfo =
"inlineasm operand has incorrect register class.";
5125 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5126 ErrInfo =
"missing memory operand from image instruction.";
5131 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5134 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5135 "all fp values to integers.";
5140 int16_t RegClass = getOpRegClassID(OpInfo);
5142 switch (OpInfo.OperandType) {
5144 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5145 ErrInfo =
"Illegal immediate value for operand.";
5178 ErrInfo =
"Illegal immediate value for operand.";
5187 if (ST.has64BitLiterals() &&
Desc.getSize() != 4 && MO.
isImm() &&
5190 OpInfo.OperandType ==
5192 ErrInfo =
"illegal 64-bit immediate value for operand.";
5199 ErrInfo =
"Expected inline constant for operand.";
5213 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5214 ErrInfo =
"Expected immediate, but got non-immediate";
5223 if (OpInfo.isGenericType())
5238 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO &&
5239 Opcode != AMDGPU::V_MOV_B64_PSEUDO && !
isSpill(
MI)) {
5241 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5243 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5244 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5251 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5252 ErrInfo =
"Subtarget requires even aligned vector registers";
5257 if (RegClass != -1) {
5258 if (Reg.isVirtual())
5263 ErrInfo =
"Operand has incorrect register class.";
5271 if (!ST.hasSDWA()) {
5272 ErrInfo =
"SDWA is not supported on this target";
5276 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5277 AMDGPU::OpName::dst_sel}) {
5281 int64_t Imm = MO->
getImm();
5283 ErrInfo =
"Invalid SDWA selection";
5288 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5290 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5295 if (!ST.hasSDWAScalar()) {
5297 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.
getReg()))) {
5298 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5305 "Only reg allowed as operands in SDWA instructions on GFX9+";
5311 if (!ST.hasSDWAOmod()) {
5314 if (OMod !=
nullptr &&
5316 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5321 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5322 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5323 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5324 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5327 unsigned Mods = Src0ModsMO->
getImm();
5330 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5336 if (
isVOPC(BasicOpcode)) {
5337 if (!ST.hasSDWASdst() && DstIdx != -1) {
5340 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5341 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5344 }
else if (!ST.hasSDWAOutModsVOPC()) {
5347 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5348 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5354 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5355 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5362 if (DstUnused && DstUnused->isImm() &&
5365 if (!Dst.isReg() || !Dst.isTied()) {
5366 ErrInfo =
"Dst register should have tied register";
5371 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5374 "Dst register should be tied to implicit use of preserved register";
5378 ErrInfo =
"Dst register should use same physical register as preserved";
5384 if (
isDPP(
MI) && !ST.hasDPPSrc1SGPR() && Src1Idx != -1) {
5386 if (Src1MO.
isReg() && RI.isSGPRReg(MRI, Src1MO.
getReg())) {
5387 ErrInfo =
"DPP src1 cannot be SGPR on this subtarget";
5393 if (
isImage(Opcode) && !
MI.mayStore()) {
5405 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5413 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5417 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5418 if (RegCount > DstSize) {
5419 ErrInfo =
"Image instruction returns too many registers for dst "
5429 Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5430 unsigned ConstantBusCount = 0;
5431 bool UsesLiteral =
false;
5434 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5438 LiteralVal = &
MI.getOperand(ImmIdx);
5447 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5458 }
else if (!MO.
isFI()) {
5465 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5475 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5476 return !RI.regsOverlap(SGPRUsed, SGPR);
5485 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5486 Opcode != AMDGPU::V_WRITELANE_B32) {
5487 ErrInfo =
"VOP* instruction violates constant bus restriction";
5491 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5492 ErrInfo =
"VOP3 instruction uses literal";
5499 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5500 unsigned SGPRCount = 0;
5503 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5511 if (MO.
getReg() != SGPRUsed)
5516 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5517 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5524 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5525 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5532 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5542 ErrInfo =
"ABS not allowed in VOP3B instructions";
5555 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5562 if (
Desc.isBranch()) {
5564 ErrInfo =
"invalid branch target for SOPK instruction";
5571 ErrInfo =
"invalid immediate for SOPK instruction";
5576 ErrInfo =
"invalid immediate for SOPK instruction";
5583 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5584 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5585 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5586 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5587 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5588 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5590 const unsigned StaticNumOps =
5591 Desc.getNumOperands() +
Desc.implicit_uses().size();
5592 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5598 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5599 ErrInfo =
"missing implicit register operands";
5605 if (!Dst->isUse()) {
5606 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5611 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5612 UseOpIdx != StaticNumOps + 1) {
5613 ErrInfo =
"movrel implicit operands should be tied";
5620 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5622 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5623 ErrInfo =
"src0 should be subreg of implicit vector use";
5631 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5632 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5638 if (
MI.mayStore() &&
5643 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5644 ErrInfo =
"scalar stores must use m0 as offset register";
5650 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5652 if (
Offset->getImm() != 0) {
5653 ErrInfo =
"subtarget does not support offsets in flat instructions";
5658 if (
isDS(
MI) && !ST.hasGDS()) {
5660 if (GDSOp && GDSOp->
getImm() != 0) {
5661 ErrInfo =
"GDS is not supported on this subtarget";
5669 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5670 AMDGPU::OpName::vaddr0);
5671 AMDGPU::OpName RSrcOpName =
5672 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5673 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5681 ErrInfo =
"dim is out of range";
5686 if (ST.hasR128A16()) {
5688 IsA16 = R128A16->
getImm() != 0;
5689 }
else if (ST.hasA16()) {
5691 IsA16 = A16->
getImm() != 0;
5694 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5696 unsigned AddrWords =
5699 unsigned VAddrWords;
5701 VAddrWords = RsrcIdx - VAddr0Idx;
5702 if (ST.hasPartialNSAEncoding() &&
5704 unsigned LastVAddrIdx = RsrcIdx - 1;
5705 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5713 if (VAddrWords != AddrWords) {
5715 <<
" but got " << VAddrWords <<
"\n");
5716 ErrInfo =
"bad vaddr size";
5726 unsigned DC = DppCt->
getImm();
5727 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5728 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5729 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5730 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5731 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5732 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5733 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5734 ErrInfo =
"Invalid dpp_ctrl value";
5737 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5738 !ST.hasDPPWavefrontShifts()) {
5739 ErrInfo =
"Invalid dpp_ctrl value: "
5740 "wavefront shifts are not supported on GFX10+";
5743 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5744 !ST.hasDPPBroadcasts()) {
5745 ErrInfo =
"Invalid dpp_ctrl value: "
5746 "broadcasts are not supported on GFX10+";
5749 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5751 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5752 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5753 !ST.hasGFX90AInsts()) {
5754 ErrInfo =
"Invalid dpp_ctrl value: "
5755 "row_newbroadcast/row_share is not supported before "
5759 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5760 ErrInfo =
"Invalid dpp_ctrl value: "
5761 "row_share and row_xmask are not supported before GFX10";
5766 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5769 ErrInfo =
"Invalid dpp_ctrl value: "
5770 "DP ALU dpp only support row_newbcast";
5777 AMDGPU::OpName DataName =
5778 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5784 if (ST.hasGFX90AInsts()) {
5785 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5786 (RI.isAGPR(MRI, Dst->getReg()) != RI.isAGPR(MRI,
Data->getReg()))) {
5787 ErrInfo =
"Invalid register class: "
5788 "vdata and vdst should be both VGPR or AGPR";
5791 if (
Data && Data2 &&
5792 (RI.isAGPR(MRI,
Data->getReg()) != RI.isAGPR(MRI, Data2->
getReg()))) {
5793 ErrInfo =
"Invalid register class: "
5794 "both data operands should be VGPR or AGPR";
5798 if ((Dst && RI.isAGPR(MRI, Dst->getReg())) ||
5799 (
Data && RI.isAGPR(MRI,
Data->getReg())) ||
5800 (Data2 && RI.isAGPR(MRI, Data2->
getReg()))) {
5801 ErrInfo =
"Invalid register class: "
5802 "agpr loads and stores not supported on this GPU";
5808 if (ST.needsAlignedVGPRs()) {
5809 const auto isAlignedReg = [&
MI, &MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5814 if (Reg.isPhysical())
5815 return !(RI.getHWRegIndex(Reg) & 1);
5817 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5818 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5821 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5822 Opcode == AMDGPU::DS_GWS_BARRIER) {
5824 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5825 ErrInfo =
"Subtarget requires even aligned vector registers "
5826 "for DS_GWS instructions";
5832 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5833 ErrInfo =
"Subtarget requires even aligned vector registers "
5834 "for vaddr operand of image instructions";
5840 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5842 if (Src->isReg() && RI.isSGPRReg(MRI, Src->getReg())) {
5843 ErrInfo =
"Invalid register class: "
5844 "v_accvgpr_write with an SGPR is not supported on this GPU";
5849 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5852 ErrInfo =
"pseudo expects only physical SGPRs";
5859 if (!ST.hasScaleOffset()) {
5860 ErrInfo =
"Subtarget does not support offset scaling";
5864 ErrInfo =
"Instruction does not support offset scaling";
5873 for (
unsigned I = 0;
I < 3; ++
I) {
5879 if (ST.hasFlatScratchHiInB64InstHazard() &&
isSALU(
MI) &&
5880 MI.readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI,
nullptr)) {
5882 if ((Dst && RI.getRegClassForReg(MRI, Dst->getReg()) ==
5883 &AMDGPU::SReg_64RegClass) ||
5884 Opcode == AMDGPU::S_BITCMP0_B64 || Opcode == AMDGPU::S_BITCMP1_B64) {
5885 ErrInfo =
"Instruction cannot read flat_scratch_base_hi";
5894 if (
MI.getOpcode() == AMDGPU::S_MOV_B32) {
5896 return MI.getOperand(1).isReg() || RI.isAGPR(MRI,
MI.getOperand(0).getReg())
5898 : AMDGPU::V_MOV_B32_e32;
5908 default:
return AMDGPU::INSTRUCTION_LIST_END;
5909 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5910 case AMDGPU::COPY:
return AMDGPU::COPY;
5911 case AMDGPU::PHI:
return AMDGPU::PHI;
5912 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5913 case AMDGPU::WQM:
return AMDGPU::WQM;
5914 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5915 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5916 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5917 case AMDGPU::S_ADD_I32:
5918 return ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5919 case AMDGPU::S_ADDC_U32:
5920 return AMDGPU::V_ADDC_U32_e32;
5921 case AMDGPU::S_SUB_I32:
5922 return ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5925 case AMDGPU::S_ADD_U32:
5926 return AMDGPU::V_ADD_CO_U32_e32;
5927 case AMDGPU::S_SUB_U32:
5928 return AMDGPU::V_SUB_CO_U32_e32;
5929 case AMDGPU::S_ADD_U64_PSEUDO:
5930 return AMDGPU::V_ADD_U64_PSEUDO;
5931 case AMDGPU::S_SUB_U64_PSEUDO:
5932 return AMDGPU::V_SUB_U64_PSEUDO;
5933 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5934 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5935 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5936 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5937 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5938 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5939 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5940 case AMDGPU::S_XNOR_B32:
5941 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5942 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5943 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5944 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5945 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5946 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5947 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5948 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5949 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5950 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5951 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5952 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5953 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5954 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5955 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5956 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5957 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5958 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5959 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5960 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5961 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5962 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5963 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5964 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5965 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5966 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5967 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5968 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5969 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5970 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5971 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5972 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5973 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5974 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5975 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5976 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5977 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5978 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5979 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5980 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5981 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5982 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5983 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5984 case AMDGPU::S_CVT_F32_F16:
5985 case AMDGPU::S_CVT_HI_F32_F16:
5986 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
5987 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5988 case AMDGPU::S_CVT_F16_F32:
5989 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
5990 : AMDGPU::V_CVT_F16_F32_fake16_e64;
5991 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
5992 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
5993 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
5994 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
5995 case AMDGPU::S_CEIL_F16:
5996 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
5997 : AMDGPU::V_CEIL_F16_fake16_e64;
5998 case AMDGPU::S_FLOOR_F16:
5999 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
6000 : AMDGPU::V_FLOOR_F16_fake16_e64;
6001 case AMDGPU::S_TRUNC_F16:
6002 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
6003 : AMDGPU::V_TRUNC_F16_fake16_e64;
6004 case AMDGPU::S_RNDNE_F16:
6005 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
6006 : AMDGPU::V_RNDNE_F16_fake16_e64;
6007 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
6008 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
6009 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
6010 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
6011 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
6012 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
6013 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
6014 case AMDGPU::S_ADD_F16:
6015 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
6016 : AMDGPU::V_ADD_F16_fake16_e64;
6017 case AMDGPU::S_SUB_F16:
6018 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
6019 : AMDGPU::V_SUB_F16_fake16_e64;
6020 case AMDGPU::S_MIN_F16:
6021 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
6022 : AMDGPU::V_MIN_F16_fake16_e64;
6023 case AMDGPU::S_MAX_F16:
6024 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
6025 : AMDGPU::V_MAX_F16_fake16_e64;
6026 case AMDGPU::S_MINIMUM_F16:
6027 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
6028 : AMDGPU::V_MINIMUM_F16_fake16_e64;
6029 case AMDGPU::S_MAXIMUM_F16:
6030 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
6031 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
6032 case AMDGPU::S_MUL_F16:
6033 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
6034 : AMDGPU::V_MUL_F16_fake16_e64;
6035 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
6036 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
6037 case AMDGPU::S_FMAC_F16:
6038 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
6039 : AMDGPU::V_FMAC_F16_fake16_e64;
6040 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
6041 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
6042 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
6043 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
6044 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
6045 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
6046 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
6047 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
6048 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
6049 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
6050 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
6051 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
6052 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
6053 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
6054 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
6055 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
6056 case AMDGPU::S_CMP_LT_F16:
6057 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
6058 : AMDGPU::V_CMP_LT_F16_fake16_e64;
6059 case AMDGPU::S_CMP_EQ_F16:
6060 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
6061 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
6062 case AMDGPU::S_CMP_LE_F16:
6063 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
6064 : AMDGPU::V_CMP_LE_F16_fake16_e64;
6065 case AMDGPU::S_CMP_GT_F16:
6066 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
6067 : AMDGPU::V_CMP_GT_F16_fake16_e64;
6068 case AMDGPU::S_CMP_LG_F16:
6069 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
6070 : AMDGPU::V_CMP_LG_F16_fake16_e64;
6071 case AMDGPU::S_CMP_GE_F16:
6072 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
6073 : AMDGPU::V_CMP_GE_F16_fake16_e64;
6074 case AMDGPU::S_CMP_O_F16:
6075 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
6076 : AMDGPU::V_CMP_O_F16_fake16_e64;
6077 case AMDGPU::S_CMP_U_F16:
6078 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
6079 : AMDGPU::V_CMP_U_F16_fake16_e64;
6080 case AMDGPU::S_CMP_NGE_F16:
6081 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
6082 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
6083 case AMDGPU::S_CMP_NLG_F16:
6084 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
6085 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
6086 case AMDGPU::S_CMP_NGT_F16:
6087 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
6088 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
6089 case AMDGPU::S_CMP_NLE_F16:
6090 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
6091 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
6092 case AMDGPU::S_CMP_NEQ_F16:
6093 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
6094 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
6095 case AMDGPU::S_CMP_NLT_F16:
6096 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
6097 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
6098 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
6099 case AMDGPU::V_S_EXP_F16_e64:
6100 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
6101 : AMDGPU::V_EXP_F16_fake16_e64;
6102 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
6103 case AMDGPU::V_S_LOG_F16_e64:
6104 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
6105 : AMDGPU::V_LOG_F16_fake16_e64;
6106 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
6107 case AMDGPU::V_S_RCP_F16_e64:
6108 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
6109 : AMDGPU::V_RCP_F16_fake16_e64;
6110 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
6111 case AMDGPU::V_S_RSQ_F16_e64:
6112 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
6113 : AMDGPU::V_RSQ_F16_fake16_e64;
6114 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
6115 case AMDGPU::V_S_SQRT_F16_e64:
6116 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
6117 : AMDGPU::V_SQRT_F16_fake16_e64;
6120 "Unexpected scalar opcode without corresponding vector one!");
6169 "Not a whole wave func");
6172 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6173 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6180 unsigned OpNo)
const {
6182 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6183 Desc.operands()[OpNo].RegClass == -1) {
6186 if (Reg.isVirtual()) {
6190 return RI.getPhysRegBaseClass(Reg);
6193 int16_t RegClass = getOpRegClassID(
Desc.operands()[OpNo]);
6194 return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
6202 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6204 unsigned Size = RI.getRegSizeInBits(*RC);
6205 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6206 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6207 : AMDGPU::V_MOV_B32_e32;
6209 Opcode = AMDGPU::COPY;
6210 else if (RI.isSGPRClass(RC))
6211 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6225 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6231 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6242 if (SubIdx == AMDGPU::sub0)
6244 if (SubIdx == AMDGPU::sub1)
6256void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6272 if (Reg.isPhysical())
6282 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6285 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6292 unsigned Opc =
MI.getOpcode();
6298 constexpr AMDGPU::OpName OpNames[] = {
6299 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6302 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6303 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6313 bool IsAGPR = RI.isAGPR(MRI, MO.
getReg());
6314 if (IsAGPR && !ST.hasMAIInsts())
6320 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6321 const int DataIdx = AMDGPU::getNamedOperandIdx(
6322 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6323 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6324 MI.getOperand(DataIdx).isReg() &&
6325 RI.isAGPR(MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6327 if ((
int)
OpIdx == DataIdx) {
6328 if (VDstIdx != -1 &&
6329 RI.isAGPR(MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6332 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6333 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6334 RI.isAGPR(MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6339 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6340 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6341 RI.isSGPRReg(MRI, MO.
getReg()))
6344 if (ST.hasFlatScratchHiInB64InstHazard() &&
6351 if (
Opc == AMDGPU::S_BITCMP0_B64 ||
Opc == AMDGPU::S_BITCMP1_B64)
6354 if (!ST.hasDPPSrc1SGPR() &&
isDPP(
MI) && RI.isSGPRReg(MRI, MO.
getReg()) &&
6355 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1))
6375 constexpr unsigned NumOps = 3;
6376 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6377 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6378 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6379 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6384 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6387 MO = &
MI.getOperand(SrcIdx);
6390 if (!MO->
isReg() || !RI.isSGPRReg(MRI, MO->
getReg()))
6394 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6398 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6402 return !OpSel && !OpSelHi;
6411 int64_t RegClass = getOpRegClassID(OpInfo);
6413 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6419 if (
isVALU(
MI,
true) && !IsInlineConst &&
6423 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6424 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6428 if (!LiteralLimit--)
6438 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6446 if (--ConstantBusLimit <= 0)
6458 if (!LiteralLimit--)
6460 if (--ConstantBusLimit <= 0)
6466 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6470 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6472 !
Op.isIdenticalTo(*MO))
6482 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6497 bool Is64BitOp = Is64BitFPOp ||
6505 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6514 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6532 bool IsGFX950Only = ST.hasGFX950Insts();
6533 bool IsGFX940Only = ST.hasGFX940Insts();
6535 if (!IsGFX950Only && !IsGFX940Only)
6553 unsigned Opcode =
MI.getOpcode();
6555 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6556 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6557 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6558 case AMDGPU::V_MQSAD_U32_U8_e64:
6559 case AMDGPU::V_PK_ADD_F16:
6560 case AMDGPU::V_PK_ADD_F32:
6561 case AMDGPU::V_PK_ADD_I16:
6562 case AMDGPU::V_PK_ADD_U16:
6563 case AMDGPU::V_PK_ASHRREV_I16:
6564 case AMDGPU::V_PK_FMA_F16:
6565 case AMDGPU::V_PK_FMA_F32:
6566 case AMDGPU::V_PK_FMAC_F16_e32:
6567 case AMDGPU::V_PK_FMAC_F16_e64:
6568 case AMDGPU::V_PK_LSHLREV_B16:
6569 case AMDGPU::V_PK_LSHRREV_B16:
6570 case AMDGPU::V_PK_MAD_I16:
6571 case AMDGPU::V_PK_MAD_U16:
6572 case AMDGPU::V_PK_MAX_F16:
6573 case AMDGPU::V_PK_MAX_I16:
6574 case AMDGPU::V_PK_MAX_U16:
6575 case AMDGPU::V_PK_MIN_F16:
6576 case AMDGPU::V_PK_MIN_I16:
6577 case AMDGPU::V_PK_MIN_U16:
6578 case AMDGPU::V_PK_MOV_B32:
6579 case AMDGPU::V_PK_MUL_F16:
6580 case AMDGPU::V_PK_MUL_F32:
6581 case AMDGPU::V_PK_MUL_LO_U16:
6582 case AMDGPU::V_PK_SUB_I16:
6583 case AMDGPU::V_PK_SUB_U16:
6584 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6593 unsigned Opc =
MI.getOpcode();
6596 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6599 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6605 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6606 RI.isSGPRReg(MRI, Src0.
getReg()))
6612 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6614 if (Src0.
isReg() && RI.isVGPR(MRI, Src0.
getReg())) {
6620 if (Src1.
isReg() && RI.isVGPR(MRI, Src1.
getReg())) {
6631 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6632 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6633 if (!RI.isVGPR(MRI,
MI.getOperand(Src2Idx).getReg()))
6645 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6646 RI.isVGPR(MRI, Src1.
getReg())) {
6659 if (HasImplicitSGPR || !
MI.isCommutable()) {
6676 if (CommutedOpc == -1) {
6681 MI.setDesc(
get(CommutedOpc));
6685 bool Src0Kill = Src0.
isKill();
6689 else if (Src1.
isReg()) {
6704 unsigned Opc =
MI.getOpcode();
6707 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6708 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6709 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6712 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6713 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6714 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6715 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6716 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6717 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6718 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6728 if (VOP3Idx[2] != -1) {
6740 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6741 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6743 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6745 SGPRsUsed.
insert(SGPRReg);
6749 for (
int Idx : VOP3Idx) {
6758 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6770 if (!RI.isSGPRClass(RI.getRegClassForReg(MRI, MO.
getReg())))
6777 if (ConstantBusLimit > 0) {
6789 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6790 !RI.isVGPR(MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6797 for (
unsigned I = 0;
I < 3; ++
I) {
6810 SRC = RI.getCommonSubClass(SRC, DstRC);
6813 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6815 if (RI.hasAGPRs(VRC)) {
6816 VRC = RI.getEquivalentVGPRClass(VRC);
6819 get(TargetOpcode::COPY), NewSrcReg)
6826 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6832 for (
unsigned i = 0; i < SubRegs; ++i) {
6835 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6836 .
addReg(SrcReg, {}, RI.getSubRegFromChannel(i));
6842 get(AMDGPU::REG_SEQUENCE), DstReg);
6843 for (
unsigned i = 0; i < SubRegs; ++i) {
6845 MIB.
addImm(RI.getSubRegFromChannel(i));
6858 if (SBase && !RI.isSGPRClass(MRI.
getRegClass(SBase->getReg()))) {
6860 SBase->setReg(SGPR);
6863 if (SOff && !RI.isSGPRReg(MRI, SOff->
getReg())) {
6871 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6872 if (OldSAddrIdx < 0)
6885 if (RI.isSGPRReg(MRI, SAddr.
getReg()))
6888 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6889 if (NewVAddrIdx < 0)
6892 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6896 if (OldVAddrIdx >= 0) {
6910 if (OldVAddrIdx == NewVAddrIdx) {
6921 assert(OldSAddrIdx == NewVAddrIdx);
6923 if (OldVAddrIdx >= 0) {
6924 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6925 AMDGPU::OpName::vdst_in);
6929 if (NewVDstIn != -1) {
6930 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6936 if (NewVDstIn != -1) {
6937 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6978 unsigned OpSubReg =
Op.getSubReg();
6981 RI.getRegClassForReg(MRI, OpReg), OpSubReg);
6997 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
7000 bool ImpDef = Def->isImplicitDef();
7001 while (!ImpDef && Def && Def->isCopy()) {
7002 if (Def->getOperand(1).getReg().isPhysical())
7005 ImpDef = Def && Def->isImplicitDef();
7007 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
7023 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7027 for (
auto [Idx, ScalarOp] :
enumerate(ScalarOps)) {
7028 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(), MRI);
7029 unsigned NumSubRegs =
RegSize / 32;
7030 Register VScalarOp = ScalarOp->getReg();
7033 TII.getRegClass(
TII.get(AMDGPU::V_READFIRSTLANE_B32), 1);
7035 if (NumSubRegs == 1) {
7038 TRI->getCommonSubClass(VScalarOpRC, RFLSrcRC);
7039 Common != VScalarOpRC) {
7046 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
7051 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
7057 CondReg = NewCondReg;
7067 if (PhySGPRs.empty() || !PhySGPRs[Idx].isValid())
7068 ScalarOp->setReg(CurReg);
7071 BuildMI(*ScalarOp->getParent()->getParent(), ScalarOp->getParent(),
DL,
7072 TII.get(AMDGPU::COPY), PhySGPRs[Idx])
7074 ScalarOp->setReg(PhySGPRs[Idx]);
7076 ScalarOp->setIsKill();
7080 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
7081 "Unhandled register size");
7083 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
7090 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
7091 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
7094 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
7095 .
addReg(VScalarOp, VScalarOpUndef,
7096 TRI->getSubRegFromChannel(Idx + 1));
7103 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
7113 if (NumSubRegs <= 2)
7114 Cmp.addReg(VScalarOp);
7116 Cmp.addReg(VScalarOp, VScalarOpUndef,
7117 TRI->getSubRegFromChannel(Idx, 2));
7121 CondReg = NewCondReg;
7131 const auto *SScalarOpRC =
7137 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
7138 unsigned Channel = 0;
7139 for (
Register Piece : ReadlanePieces) {
7140 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
7144 if (PhySGPRs.empty() || !PhySGPRs[Idx].isValid())
7145 ScalarOp->setReg(SScalarOp);
7147 BuildMI(*ScalarOp->getParent()->getParent(), ScalarOp->getParent(),
DL,
7148 TII.get(AMDGPU::COPY), PhySGPRs[Idx])
7150 ScalarOp->setReg(PhySGPRs[Idx]);
7152 ScalarOp->setIsKill();
7184 assert((PhySGPRs.empty() || PhySGPRs.size() == ScalarOps.
size()) &&
7185 "Physical SGPRs must be empty or match the number of scalar operands");
7191 if (!Begin.isValid())
7193 if (!End.isValid()) {
7199 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7208 std::numeric_limits<unsigned>::max()) !=
7226 for (
auto I = Begin;
I != AfterMI;
I++) {
7227 for (
auto &MO :
I->all_uses())
7263 for (
auto &Succ : RemainderBB->
successors()) {
7288static std::tuple<unsigned, unsigned>
7296 TII.buildExtractSubReg(
MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7297 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7304 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7321 .
addImm(AMDGPU::sub0_sub1)
7327 return std::tuple(RsrcPtr, NewSRsrc);
7364 if (
MI.getOpcode() == AMDGPU::PHI) {
7366 assert(!RI.isSGPRClass(VRC));
7369 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7371 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7387 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7390 if (RI.hasVGPRs(DstRC)) {
7394 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7396 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7414 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7419 if (DstRC != Src0RC) {
7428 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7430 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7436 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7437 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7438 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7439 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7440 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7441 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7442 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7444 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7457 ? AMDGPU::OpName::rsrc
7458 : AMDGPU::OpName::srsrc;
7463 AMDGPU::OpName SampOpName =
7464 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7473 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7481 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7485 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7495 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_d2 ||
7496 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_d4 ||
7497 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_d2 ||
7498 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_d4) {
7500 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7507 bool isSoffsetLegal =
true;
7509 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7510 if (SoffsetIdx != -1) {
7514 isSoffsetLegal =
false;
7518 bool isRsrcLegal =
true;
7520 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7521 if (RsrcIdx != -1) {
7523 if (Rsrc->
isReg() && !RI.isSGPRReg(MRI, Rsrc->
getReg()))
7524 isRsrcLegal =
false;
7528 if (isRsrcLegal && isSoffsetLegal)
7556 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7560 unsigned RsrcPtr, NewSRsrc;
7567 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7568 .addReg(VAddr->
getReg(), {}, AMDGPU::sub0)
7574 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7575 .addReg(VAddr->
getReg(), {}, AMDGPU::sub1)
7588 }
else if (!VAddr && ST.hasAddr64()) {
7592 "FIXME: Need to emit flat atomics here");
7594 unsigned RsrcPtr, NewSRsrc;
7620 MIB.
addImm(CPol->getImm());
7625 MIB.
addImm(TFE->getImm());
7645 MI.removeFromParent();
7650 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7651 .addImm(AMDGPU::sub0)
7652 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7653 .addImm(AMDGPU::sub1);
7656 if (!isSoffsetLegal) {
7667 if (!isSoffsetLegal) {
7679 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7680 if (RsrcIdx != -1) {
7681 DeferredList.insert(
MI);
7686 return DeferredList.contains(
MI);
7696 if (!ST.useRealTrue16Insts())
7699 unsigned Opcode =
MI.getOpcode();
7703 OpIdx >=
get(Opcode).getNumOperands() ||
7704 get(Opcode).operands()[
OpIdx].RegClass == -1)
7708 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7712 if (!RI.isVGPRClass(CurrRC))
7715 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7717 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7718 Op.setSubReg(AMDGPU::lo16);
7719 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7729 Op.setReg(NewDstReg);
7742 assert(
MI->getOpcode() == AMDGPU::SI_CALL_ISEL &&
7743 "This only handle waterfall for SI_CALL_ISEL");
7750 while (Start->getOpcode() != AMDGPU::ADJCALLSTACKUP)
7753 while (End->getOpcode() != AMDGPU::ADJCALLSTACKDOWN)
7758 while (End !=
MBB.end() && End->isCopy() &&
7759 MI->definesRegister(End->getOperand(1).getReg(), &RI))
7769 while (!Worklist.
empty()) {
7775 moveToVALUImpl(Worklist, MDT, Inst, WaterFalls, V2SPhyCopiesToErase);
7781 moveToVALUImpl(Worklist, MDT, *Inst, WaterFalls, V2SPhyCopiesToErase);
7783 "Deferred MachineInstr are not supposed to re-populate worklist");
7786 for (std::pair<MachineInstr *, V2PhysSCopyInfo> &Entry : WaterFalls) {
7787 if (Entry.first->getOpcode() == AMDGPU::SI_CALL_ISEL)
7789 Entry.second.SGPRs);
7792 for (std::pair<MachineInstr *, bool> Entry : V2SPhyCopiesToErase)
7794 Entry.first->eraseFromParent();
7802 if (SubRegIndices.
size() <= 1) {
7805 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
7812 for (int16_t Indice : SubRegIndices) {
7815 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
7822 get(AMDGPU::REG_SEQUENCE), DstReg);
7823 for (
unsigned i = 0; i < SubRegIndices.size(); ++i) {
7825 MIB.
addImm(RI.getSubRegFromChannel(i));
7835 if (DstReg == AMDGPU::M0) {
7848 if (
I->getOpcode() == AMDGPU::SI_CALL_ISEL) {
7850 for (
unsigned i = 0; i <
UseMI->getNumOperands(); ++i) {
7851 if (
UseMI->getOperand(i).isReg() &&
7852 UseMI->getOperand(i).getReg() == DstReg) {
7856 V2SCopyInfo.MOs.push_back(MO);
7857 V2SCopyInfo.SGPRs.push_back(DstReg);
7861 }
else if (
I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG &&
7862 I->getOperand(0).isReg() &&
7863 I->getOperand(0).getReg() == DstReg) {
7866 }
else if (
I->readsRegister(DstReg, &RI)) {
7868 V2SPhyCopiesToErase[&Inst] =
false;
7870 if (
I->findRegisterDefOperand(DstReg, &RI))
7892 case AMDGPU::S_ADD_I32:
7893 case AMDGPU::S_SUB_I32: {
7897 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7905 case AMDGPU::S_MUL_U64:
7906 if (ST.hasVMulU64Inst()) {
7907 NewOpcode = AMDGPU::V_MUL_U64_e64;
7911 splitScalarSMulU64(Worklist, Inst, MDT);
7915 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7916 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7919 splitScalarSMulPseudo(Worklist, Inst, MDT);
7923 case AMDGPU::S_AND_B64:
7924 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7928 case AMDGPU::S_OR_B64:
7929 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7933 case AMDGPU::S_XOR_B64:
7934 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7938 case AMDGPU::S_NAND_B64:
7939 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7943 case AMDGPU::S_NOR_B64:
7944 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7948 case AMDGPU::S_XNOR_B64:
7949 if (ST.hasDLInsts())
7950 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7952 splitScalar64BitXnor(Worklist, Inst, MDT);
7956 case AMDGPU::S_ANDN2_B64:
7957 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7961 case AMDGPU::S_ORN2_B64:
7962 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7966 case AMDGPU::S_BREV_B64:
7967 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7971 case AMDGPU::S_NOT_B64:
7972 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7976 case AMDGPU::S_BCNT1_I32_B64:
7977 splitScalar64BitBCNT(Worklist, Inst);
7981 case AMDGPU::S_BFE_I64:
7982 splitScalar64BitBFE(Worklist, Inst);
7986 case AMDGPU::S_FLBIT_I32_B64:
7987 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7990 case AMDGPU::S_FF1_I32_B64:
7991 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7995 case AMDGPU::S_LSHL_B32:
7996 if (ST.hasOnlyRevVALUShifts()) {
7997 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
8001 case AMDGPU::S_ASHR_I32:
8002 if (ST.hasOnlyRevVALUShifts()) {
8003 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
8007 case AMDGPU::S_LSHR_B32:
8008 if (ST.hasOnlyRevVALUShifts()) {
8009 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
8013 case AMDGPU::S_LSHL_B64:
8014 if (ST.hasOnlyRevVALUShifts()) {
8016 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
8017 : AMDGPU::V_LSHLREV_B64_e64;
8021 case AMDGPU::S_ASHR_I64:
8022 if (ST.hasOnlyRevVALUShifts()) {
8023 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
8027 case AMDGPU::S_LSHR_B64:
8028 if (ST.hasOnlyRevVALUShifts()) {
8029 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
8034 case AMDGPU::S_ABS_I32:
8035 lowerScalarAbs(Worklist, Inst);
8039 case AMDGPU::S_ABSDIFF_I32:
8040 lowerScalarAbsDiff(Worklist, Inst);
8044 case AMDGPU::S_CBRANCH_SCC0:
8045 case AMDGPU::S_CBRANCH_SCC1: {
8048 bool IsSCC = CondReg == AMDGPU::SCC;
8056 case AMDGPU::S_BFE_U64:
8057 case AMDGPU::S_BFM_B64:
8060 case AMDGPU::S_PACK_LL_B32_B16:
8061 case AMDGPU::S_PACK_LH_B32_B16:
8062 case AMDGPU::S_PACK_HL_B32_B16:
8063 case AMDGPU::S_PACK_HH_B32_B16:
8064 movePackToVALU(Worklist, MRI, Inst);
8068 case AMDGPU::S_XNOR_B32:
8069 lowerScalarXnor(Worklist, Inst);
8073 case AMDGPU::S_NAND_B32:
8074 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
8078 case AMDGPU::S_NOR_B32:
8079 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
8083 case AMDGPU::S_ANDN2_B32:
8084 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
8088 case AMDGPU::S_ORN2_B32:
8089 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
8097 case AMDGPU::S_ADD_CO_PSEUDO:
8098 case AMDGPU::S_SUB_CO_PSEUDO: {
8099 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
8100 ? AMDGPU::V_ADDC_U32_e64
8101 : AMDGPU::V_SUBB_U32_e64;
8102 const auto *CarryRC = RI.getWaveMaskRegClass();
8124 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8128 case AMDGPU::S_UADDO_PSEUDO:
8129 case AMDGPU::S_USUBO_PSEUDO: {
8135 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
8136 ? AMDGPU::V_ADD_CO_U32_e64
8137 : AMDGPU::V_SUB_CO_U32_e64;
8149 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8153 case AMDGPU::S_LSHL1_ADD_U32:
8154 case AMDGPU::S_LSHL2_ADD_U32:
8155 case AMDGPU::S_LSHL3_ADD_U32:
8156 case AMDGPU::S_LSHL4_ADD_U32: {
8160 unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1
8161 : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2
8162 : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3
8176 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8180 case AMDGPU::S_CSELECT_B32:
8181 case AMDGPU::S_CSELECT_B64:
8182 lowerSelect(Worklist, Inst, MDT);
8185 case AMDGPU::S_CMP_EQ_I32:
8186 case AMDGPU::S_CMP_LG_I32:
8187 case AMDGPU::S_CMP_GT_I32:
8188 case AMDGPU::S_CMP_GE_I32:
8189 case AMDGPU::S_CMP_LT_I32:
8190 case AMDGPU::S_CMP_LE_I32:
8191 case AMDGPU::S_CMP_EQ_U32:
8192 case AMDGPU::S_CMP_LG_U32:
8193 case AMDGPU::S_CMP_GT_U32:
8194 case AMDGPU::S_CMP_GE_U32:
8195 case AMDGPU::S_CMP_LT_U32:
8196 case AMDGPU::S_CMP_LE_U32:
8197 case AMDGPU::S_CMP_EQ_U64:
8198 case AMDGPU::S_CMP_LG_U64:
8199 case AMDGPU::S_CMP_LT_F32:
8200 case AMDGPU::S_CMP_EQ_F32:
8201 case AMDGPU::S_CMP_LE_F32:
8202 case AMDGPU::S_CMP_GT_F32:
8203 case AMDGPU::S_CMP_LG_F32:
8204 case AMDGPU::S_CMP_GE_F32:
8205 case AMDGPU::S_CMP_O_F32:
8206 case AMDGPU::S_CMP_U_F32:
8207 case AMDGPU::S_CMP_NGE_F32:
8208 case AMDGPU::S_CMP_NLG_F32:
8209 case AMDGPU::S_CMP_NGT_F32:
8210 case AMDGPU::S_CMP_NLE_F32:
8211 case AMDGPU::S_CMP_NEQ_F32:
8212 case AMDGPU::S_CMP_NLT_F32: {
8217 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
8231 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8235 case AMDGPU::S_CMP_LT_F16:
8236 case AMDGPU::S_CMP_EQ_F16:
8237 case AMDGPU::S_CMP_LE_F16:
8238 case AMDGPU::S_CMP_GT_F16:
8239 case AMDGPU::S_CMP_LG_F16:
8240 case AMDGPU::S_CMP_GE_F16:
8241 case AMDGPU::S_CMP_O_F16:
8242 case AMDGPU::S_CMP_U_F16:
8243 case AMDGPU::S_CMP_NGE_F16:
8244 case AMDGPU::S_CMP_NLG_F16:
8245 case AMDGPU::S_CMP_NGT_F16:
8246 case AMDGPU::S_CMP_NLE_F16:
8247 case AMDGPU::S_CMP_NEQ_F16:
8248 case AMDGPU::S_CMP_NLT_F16: {
8271 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8275 case AMDGPU::S_CVT_HI_F32_F16: {
8278 if (ST.useRealTrue16Insts()) {
8283 .
addReg(TmpReg, {}, AMDGPU::hi16)
8299 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8303 case AMDGPU::S_MINIMUM_F32:
8304 case AMDGPU::S_MAXIMUM_F32: {
8316 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8320 case AMDGPU::S_MINIMUM_F16:
8321 case AMDGPU::S_MAXIMUM_F16: {
8323 ? &AMDGPU::VGPR_16RegClass
8324 : &AMDGPU::VGPR_32RegClass);
8336 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8340 case AMDGPU::V_S_EXP_F16_e64:
8341 case AMDGPU::V_S_LOG_F16_e64:
8342 case AMDGPU::V_S_RCP_F16_e64:
8343 case AMDGPU::V_S_RSQ_F16_e64:
8344 case AMDGPU::V_S_SQRT_F16_e64: {
8346 ? &AMDGPU::VGPR_16RegClass
8347 : &AMDGPU::VGPR_32RegClass);
8359 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8365 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8373 if (NewOpcode == Opcode) {
8380 V2SPhyCopiesToErase);
8388 RI.getCommonSubClass(NewDstRC, SrcRC)) {
8395 addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
8426 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8430 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8436 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8443 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8445 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8450 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8458 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8468 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8469 AMDGPU::OpName::src0_modifiers) >= 0)
8473 NewInstr->addOperand(Src);
8476 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8479 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8481 NewInstr.addImm(
Size);
8482 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8486 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8491 "Scalar BFE is only implemented for constant width and offset");
8499 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8500 AMDGPU::OpName::src1_modifiers) >= 0)
8502 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8504 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8505 AMDGPU::OpName::src2_modifiers) >= 0)
8507 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8509 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8511 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8513 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8519 NewInstr->addOperand(
Op);
8526 if (
Op.getReg() == AMDGPU::SCC) {
8528 if (
Op.isDef() && !
Op.isDead())
8529 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8531 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8536 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8537 Register DstReg = NewInstr->getOperand(0).getReg();
8552 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8556std::pair<bool, MachineBasicBlock *>
8559 if (ST.hasAddNoCarryInsts()) {
8571 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8573 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8574 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8585 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8586 return std::pair(
true, NewBB);
8589 return std::pair(
false,
nullptr);
8606 bool IsSCC = (CondReg == AMDGPU::SCC);
8620 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8625 bool CopyFound =
false;
8626 for (MachineInstr &CandI :
8629 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8631 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8633 .
addReg(CandI.getOperand(1).getReg());
8645 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8654 MachineInstr *NewInst;
8655 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8656 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8671 addUsersToMoveToVALUWorklist(NewDestReg, MRI, Worklist);
8686 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8687 : AMDGPU::V_SUB_CO_U32_e32;
8698 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8715 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8716 : AMDGPU::V_SUB_CO_U32_e32;
8729 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8743 if (ST.hasDLInsts()) {
8753 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8759 bool Src0IsSGPR = Src0.
isReg() &&
8761 bool Src1IsSGPR = Src1.
isReg() &&
8775 }
else if (Src1IsSGPR) {
8793 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8799 unsigned Opcode)
const {
8823 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8828 unsigned Opcode)
const {
8852 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8867 const MCInstrDesc &InstDesc =
get(Opcode);
8868 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8870 &AMDGPU::SGPR_32RegClass;
8872 const TargetRegisterClass *Src0SubRC =
8873 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8876 AMDGPU::sub0, Src0SubRC);
8879 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8880 const TargetRegisterClass *NewDestSubRC =
8881 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8884 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8887 AMDGPU::sub1, Src0SubRC);
8890 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8904 Worklist.
insert(&LoHalf);
8905 Worklist.
insert(&HiHalf);
8911 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
8934 const TargetRegisterClass *Src0SubRC =
8935 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8936 if (RI.isSGPRClass(Src0SubRC))
8937 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8938 const TargetRegisterClass *Src1SubRC =
8939 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8940 if (RI.isSGPRClass(Src1SubRC))
8941 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8945 MachineOperand Op0L =
8947 MachineOperand Op1L =
8949 MachineOperand Op0H =
8951 MachineOperand Op1H =
8970 MachineInstr *Op1L_Op0H =
8976 MachineInstr *Op1H_Op0L =
8982 MachineInstr *Carry =
8987 MachineInstr *LoHalf =
8997 MachineInstr *HiHalf =
9020 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9043 const TargetRegisterClass *Src0SubRC =
9044 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9045 if (RI.isSGPRClass(Src0SubRC))
9046 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
9047 const TargetRegisterClass *Src1SubRC =
9048 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9049 if (RI.isSGPRClass(Src1SubRC))
9050 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
9054 MachineOperand Op0L =
9056 MachineOperand Op1L =
9060 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
9061 ? AMDGPU::V_MUL_HI_U32_e64
9062 : AMDGPU::V_MUL_HI_I32_e64;
9063 MachineInstr *HiHalf =
9066 MachineInstr *LoHalf =
9085 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9101 const MCInstrDesc &InstDesc =
get(Opcode);
9102 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
9104 &AMDGPU::SGPR_32RegClass;
9106 const TargetRegisterClass *Src0SubRC =
9107 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9108 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
9110 &AMDGPU::SGPR_32RegClass;
9112 const TargetRegisterClass *Src1SubRC =
9113 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9116 AMDGPU::sub0, Src0SubRC);
9118 AMDGPU::sub0, Src1SubRC);
9120 AMDGPU::sub1, Src0SubRC);
9122 AMDGPU::sub1, Src1SubRC);
9125 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
9126 const TargetRegisterClass *NewDestSubRC =
9127 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
9130 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
9135 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
9148 Worklist.
insert(&LoHalf);
9149 Worklist.
insert(&HiHalf);
9152 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9172 MachineOperand* Op0;
9173 MachineOperand* Op1;
9175 if (Src0.
isReg() && RI.isSGPRReg(MRI, Src0.
getReg())) {
9208 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
9209 const TargetRegisterClass *SrcRC = Src.isReg() ?
9211 &AMDGPU::SGPR_32RegClass;
9216 const TargetRegisterClass *SrcSubRC =
9217 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9220 AMDGPU::sub0, SrcSubRC);
9222 AMDGPU::sub1, SrcSubRC);
9232 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9251 Offset == 0 &&
"Not implemented");
9274 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9284 .
addReg(Src.getReg(), {}, AMDGPU::sub0);
9287 .
addReg(Src.getReg(), {}, AMDGPU::sub0)
9293 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9312 const MCInstrDesc &InstDesc =
get(Opcode);
9314 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9315 unsigned OpcodeAdd = ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64
9316 : AMDGPU::V_ADD_CO_U32_e32;
9318 const TargetRegisterClass *SrcRC =
9319 Src.isReg() ? MRI.
getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9320 const TargetRegisterClass *SrcSubRC =
9321 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9323 MachineOperand SrcRegSub0 =
9325 MachineOperand SrcRegSub1 =
9338 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9344 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9348 addUsersToMoveToVALUWorklist(MidReg4, MRI, Worklist);
9351void SIInstrInfo::addUsersToMoveToVALUWorklist(
9355 MachineInstr &
UseMI = *MO.getParent();
9359 switch (
UseMI.getOpcode()) {
9362 case AMDGPU::SOFT_WQM:
9363 case AMDGPU::STRICT_WWM:
9364 case AMDGPU::STRICT_WQM:
9365 case AMDGPU::REG_SEQUENCE:
9367 case AMDGPU::INSERT_SUBREG:
9370 OpNo = MO.getOperandNo();
9377 if (!RI.hasVectorRegisters(OpRC))
9394 if (ST.useRealTrue16Insts()) {
9396 if (!Src0.
isReg() || !RI.isVGPR(MRI, Src0.
getReg())) {
9399 get(Src0.
isImm() ? AMDGPU::V_MOV_B32_e32 : AMDGPU::COPY), SrcReg0)
9405 if (!Src1.
isReg() || !RI.isVGPR(MRI, Src1.
getReg())) {
9408 get(Src1.
isImm() ? AMDGPU::V_MOV_B32_e32 : AMDGPU::COPY), SrcReg1)
9417 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9419 case AMDGPU::S_PACK_LL_B32_B16:
9421 .addReg(SrcReg0, {},
9422 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9423 .addImm(AMDGPU::lo16)
9424 .addReg(SrcReg1, {},
9425 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9426 .addImm(AMDGPU::hi16);
9428 case AMDGPU::S_PACK_LH_B32_B16:
9430 .addReg(SrcReg0, {},
9431 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9432 .addImm(AMDGPU::lo16)
9433 .addReg(SrcReg1, {}, AMDGPU::hi16)
9434 .addImm(AMDGPU::hi16);
9436 case AMDGPU::S_PACK_HL_B32_B16:
9437 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9438 .addImm(AMDGPU::lo16)
9439 .addReg(SrcReg1, {},
9440 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9441 .addImm(AMDGPU::hi16);
9443 case AMDGPU::S_PACK_HH_B32_B16:
9444 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9445 .addImm(AMDGPU::lo16)
9446 .addReg(SrcReg1, {}, AMDGPU::hi16)
9447 .addImm(AMDGPU::hi16);
9455 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9460 case AMDGPU::S_PACK_LL_B32_B16: {
9479 case AMDGPU::S_PACK_LH_B32_B16: {
9489 case AMDGPU::S_PACK_HL_B32_B16: {
9500 case AMDGPU::S_PACK_HH_B32_B16: {
9520 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9529 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9530 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9531 SmallVector<MachineInstr *, 4> CopyToDelete;
9534 for (MachineInstr &
MI :
9538 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9541 MachineRegisterInfo &MRI =
MI.getMF()->getRegInfo();
9542 Register DestReg =
MI.getOperand(0).getReg();
9549 MI.getOperand(SCCIdx).setReg(NewCond);
9555 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9558 for (
auto &Copy : CopyToDelete)
9559 Copy->eraseFromParent();
9567void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9573 for (MachineInstr &
MI :
9576 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9578 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9587 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9595 case AMDGPU::REG_SEQUENCE:
9596 case AMDGPU::INSERT_SUBREG:
9598 case AMDGPU::SOFT_WQM:
9599 case AMDGPU::STRICT_WWM:
9600 case AMDGPU::STRICT_WQM: {
9602 if (RI.isAGPRClass(SrcRC)) {
9603 if (RI.isAGPRClass(NewDstRC))
9608 case AMDGPU::REG_SEQUENCE:
9609 case AMDGPU::INSERT_SUBREG:
9610 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9613 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9619 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9622 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9636 int OpIndices[3])
const {
9637 const MCInstrDesc &
Desc =
MI.getDesc();
9653 const MachineRegisterInfo &MRI =
MI.getMF()->getRegInfo();
9655 for (
unsigned i = 0; i < 3; ++i) {
9656 int Idx = OpIndices[i];
9660 const MachineOperand &MO =
MI.getOperand(Idx);
9666 const TargetRegisterClass *OpRC =
9667 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9668 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9675 if (RI.isSGPRClass(RegRC))
9693 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9694 SGPRReg = UsedSGPRs[0];
9697 if (!SGPRReg && UsedSGPRs[1]) {
9698 if (UsedSGPRs[1] == UsedSGPRs[2])
9699 SGPRReg = UsedSGPRs[1];
9706 AMDGPU::OpName OperandName)
const {
9707 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9710 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9714 return &
MI.getOperand(Idx);
9728 if (ST.isAmdHsaOS()) {
9731 RsrcDataFormat |= (1ULL << 56);
9736 RsrcDataFormat |= (2ULL << 59);
9739 return RsrcDataFormat;
9749 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9754 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9761 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9767 unsigned Opc =
MI.getOpcode();
9773 return get(
Opc).mayLoad() &&
9780 if (!Addr || !Addr->
isFI())
9789 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdata);
9791 return MI.getOperand(VDataIdx).getReg();
9801 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::data);
9803 return MI.getOperand(DataIdx).getReg();
9837 unsigned Opc =
MI.getOpcode();
9839 unsigned DescSize =
Desc.getSize();
9844 unsigned Size = DescSize;
9848 if (
MI.isBranch() && ST.hasOffset3fBug())
9859 bool HasLiteral =
false;
9860 unsigned LiteralSize = 4;
9861 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9866 if (ST.has64BitLiterals()) {
9867 switch (OpInfo.OperandType) {
9892 return HasLiteral ? DescSize + LiteralSize : DescSize;
9897 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9901 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9902 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9906 case TargetOpcode::BUNDLE:
9907 return getInstBundleSize(
MI);
9908 case TargetOpcode::INLINEASM:
9909 case TargetOpcode::INLINEASM_BR: {
9911 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9915 if (
MI.isMetaInstruction())
9919 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9922 unsigned LoInstOpcode = D16Info->LoOp;
9924 DescSize =
Desc.getSize();
9928 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
9931 DescSize =
Desc.getSize();
9940 if (
MI.isBranch() && ST.hasOffset3fBug())
9941 return InstSizeVerifyMode::NoVerify;
9942 return InstSizeVerifyMode::ExactSize;
9949 if (
MI.memoperands_empty())
9961 static const std::pair<int, const char *> TargetIndices[] = {
10000std::pair<unsigned, unsigned>
10007 static const std::pair<unsigned, const char *> TargetFlags[] = {
10025 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
10041 return AMDGPU::WWM_COPY;
10043 return AMDGPU::COPY;
10060 if (!IsLRSplitInst && Opcode != AMDGPU::IMPLICIT_DEF)
10064 if (RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg)))
10065 return IsLRSplitInst;
10078 bool IsNullOrVectorRegister =
true;
10082 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg));
10085 return IsNullOrVectorRegister &&
10087 (!
MI.isTerminator() &&
MI.getOpcode() != AMDGPU::COPY &&
10088 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
10096 if (ST.hasAddNoCarryInsts())
10112 if (ST.hasAddNoCarryInsts())
10116 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
10118 : RS.scavengeRegisterBackwards(
10119 *RI.getBoolRC(),
I,
false,
10132 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
10133 case AMDGPU::SI_KILL_I1_TERMINATOR:
10142 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
10143 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
10144 case AMDGPU::SI_KILL_I1_PSEUDO:
10145 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
10157 const unsigned OffsetBits =
10159 return (1 << OffsetBits) - 1;
10163 if (!ST.isWave32())
10166 if (
MI.isInlineAsm())
10169 if (
MI.getNumOperands() <
MI.getNumExplicitOperands())
10172 for (
auto &
Op :
MI.implicit_operands()) {
10173 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
10174 Op.setReg(AMDGPU::VCC_LO);
10183 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
10187 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
10188 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
10204 if (Imm > MaxImm) {
10205 if (Imm <= MaxImm + 64) {
10207 Overflow = Imm - MaxImm;
10226 if (Overflow > 0) {
10234 if (ST.hasRestrictedSOffset())
10239 SOffset = Overflow;
10277 if (!ST.hasFlatInstOffsets())
10281 if (ST.hasFlatSegmentOffsetBug() && FlatVariant == FlatAddrSpace::FLAT &&
10286 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10287 FlatVariant == FlatAddrSpace::FlatScratch &&
Offset < 0 &&
10298std::pair<int64_t, int64_t>
10301 int64_t RemainderOffset = COffsetVal;
10302 int64_t ImmField = 0;
10307 if (AllowNegative) {
10309 int64_t
D = 1LL << NumBits;
10310 RemainderOffset = (COffsetVal /
D) *
D;
10311 ImmField = COffsetVal - RemainderOffset;
10313 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10315 (ImmField % 4) != 0) {
10317 RemainderOffset += ImmField % 4;
10318 ImmField -= ImmField % 4;
10320 }
else if (COffsetVal >= 0) {
10322 RemainderOffset = COffsetVal - ImmField;
10326 assert(RemainderOffset + ImmField == COffsetVal);
10327 return {ImmField, RemainderOffset};
10332 if (ST.hasNegativeScratchOffsetBug() &&
10340 switch (ST.getGeneration()) {
10369 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10370 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10371 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10372 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10373 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10374 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10375 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10376 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10383#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10384 case OPCODE##_dpp: \
10385 case OPCODE##_e32: \
10386 case OPCODE##_e64: \
10387 case OPCODE##_e64_dpp: \
10388 case OPCODE##_sdwa:
10402 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10403 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10404 case AMDGPU::V_FMA_F16_gfx9_e64:
10405 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10406 case AMDGPU::V_INTERP_P2_F16:
10407 case AMDGPU::V_MAD_F16_e64:
10408 case AMDGPU::V_MAD_U16_e64:
10409 case AMDGPU::V_MAD_I16_e64:
10418 "SIInsertWaitcnts should have promoted soft waitcnt instructions!");
10432 switch (ST.getGeneration()) {
10445 if (
isMAI(Opcode)) {
10453 if (MCOp == AMDGPU::INSTRUCTION_LIST_END && ST.hasGFX11_7Insts())
10456 if (MCOp == AMDGPU::INSTRUCTION_LIST_END && ST.hasGFX1250Insts())
10463 if (ST.hasGFX90AInsts()) {
10464 uint32_t NMCOp = AMDGPU::INSTRUCTION_LIST_END;
10465 if (ST.hasGFX940Insts())
10467 if (NMCOp == AMDGPU::INSTRUCTION_LIST_END)
10469 if (NMCOp == AMDGPU::INSTRUCTION_LIST_END)
10471 if (NMCOp != AMDGPU::INSTRUCTION_LIST_END)
10477 if (MCOp == AMDGPU::INSTRUCTION_LIST_END)
10496 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10497 if (
MI.getOperand(1 + 2 *
I + 1).getImm() == SubReg) {
10498 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10510 switch (
MI.getOpcode()) {
10512 case AMDGPU::REG_SEQUENCE:
10516 case AMDGPU::INSERT_SUBREG:
10517 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10534 if (!
P.Reg.isVirtual())
10539 while (
auto *
MI = DefInst) {
10541 switch (
MI->getOpcode()) {
10543 case AMDGPU::V_MOV_B32_e32: {
10544 auto &Op1 =
MI->getOperand(1);
10573 auto *DefBB =
DefMI.getParent();
10577 if (
UseMI.getParent() != DefBB)
10580 const int MaxInstScan = 20;
10584 auto E =
UseMI.getIterator();
10585 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10586 if (
I->isDebugInstr())
10589 if (++NumInst > MaxInstScan)
10592 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10605 auto *DefBB =
DefMI.getParent();
10607 const int MaxUseScan = 10;
10611 auto &UseInst = *
Use.getParent();
10614 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10617 if (++NumUse > MaxUseScan)
10624 const int MaxInstScan = 20;
10628 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10631 if (
I->isDebugInstr())
10634 if (++NumInst > MaxInstScan)
10647 if (Reg == VReg && --NumUse == 0)
10649 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10658 auto Cur =
MBB.begin();
10659 if (Cur !=
MBB.end())
10661 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10664 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10673 if (InsPt !=
MBB.end() &&
10674 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10675 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10676 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10677 InsPt->definesRegister(Src,
nullptr)) {
10681 .
addReg(Src, {}, SrcSubReg)
10724 if (isFullCopyInstr(
MI)) {
10725 Register DstReg =
MI.getOperand(0).getReg();
10726 Register SrcReg =
MI.getOperand(1).getReg();
10748 unsigned *PredCost)
const {
10749 if (
MI.isBundle()) {
10752 unsigned Lat = 0,
Count = 0;
10753 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10755 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10757 return Lat +
Count - 1;
10760 return SchedModel.computeInstrLatency(&
MI);
10767 return *CallAddrOp;
10774 unsigned Opcode =
MI.getOpcode();
10776 auto HandleAddrSpaceCast = [
this, &MRI](
const MachineInstr &
MI) {
10779 :
MI.getOperand(1).getReg();
10783 unsigned SrcAS = SrcTy.getAddressSpace();
10786 ST.hasGloballyAddressableScratch()
10794 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10795 return HandleAddrSpaceCast(
MI);
10798 auto IID = GI->getIntrinsicID();
10805 case Intrinsic::amdgcn_addrspacecast_nonnull:
10806 return HandleAddrSpaceCast(
MI);
10807 case Intrinsic::amdgcn_if:
10808 case Intrinsic::amdgcn_else:
10822 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10823 Opcode == AMDGPU::G_SEXTLOAD) {
10824 if (
MI.memoperands_empty())
10828 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10829 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10837 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10838 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10839 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10845 if (Opcode == TargetOpcode::G_DYN_STACKALLOC)
10848 if (Opcode == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
10856 Formatter = std::make_unique<AMDGPUMIRFormatter>(ST);
10857 return Formatter.get();
10865 unsigned opcode =
MI.getOpcode();
10866 if (opcode == AMDGPU::V_READLANE_B32 ||
10867 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10868 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10873 if (
MI.isInlineAsm()) {
10879 if (!RC || !RI.isSGPRClass(RC))
10884 if (isCopyInstr(
MI)) {
10888 RI.getPhysRegBaseClass(srcOp.
getReg());
10896 if (
MI.isPreISelOpcode())
10911 if (
MI.memoperands_empty())
10915 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10916 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10931 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10933 if (!
SrcOp.isReg())
10937 if (!Reg || !
SrcOp.readsReg())
10943 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10970 F,
"ds_ordered_count unsupported for this calling conv"));
10984 Register &SrcReg2, int64_t &CmpMask,
10985 int64_t &CmpValue)
const {
10986 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10989 switch (
MI.getOpcode()) {
10992 case AMDGPU::S_CMP_EQ_U32:
10993 case AMDGPU::S_CMP_EQ_I32:
10994 case AMDGPU::S_CMP_LG_U32:
10995 case AMDGPU::S_CMP_LG_I32:
10996 case AMDGPU::S_CMP_LT_U32:
10997 case AMDGPU::S_CMP_LT_I32:
10998 case AMDGPU::S_CMP_GT_U32:
10999 case AMDGPU::S_CMP_GT_I32:
11000 case AMDGPU::S_CMP_LE_U32:
11001 case AMDGPU::S_CMP_LE_I32:
11002 case AMDGPU::S_CMP_GE_U32:
11003 case AMDGPU::S_CMP_GE_I32:
11004 case AMDGPU::S_CMP_EQ_U64:
11005 case AMDGPU::S_CMP_LG_U64:
11006 SrcReg =
MI.getOperand(0).getReg();
11007 if (
MI.getOperand(1).isReg()) {
11008 if (
MI.getOperand(1).getSubReg())
11010 SrcReg2 =
MI.getOperand(1).getReg();
11012 }
else if (
MI.getOperand(1).isImm()) {
11014 CmpValue =
MI.getOperand(1).getImm();
11020 case AMDGPU::S_CMPK_EQ_U32:
11021 case AMDGPU::S_CMPK_EQ_I32:
11022 case AMDGPU::S_CMPK_LG_U32:
11023 case AMDGPU::S_CMPK_LG_I32:
11024 case AMDGPU::S_CMPK_LT_U32:
11025 case AMDGPU::S_CMPK_LT_I32:
11026 case AMDGPU::S_CMPK_GT_U32:
11027 case AMDGPU::S_CMPK_GT_I32:
11028 case AMDGPU::S_CMPK_LE_U32:
11029 case AMDGPU::S_CMPK_LE_I32:
11030 case AMDGPU::S_CMPK_GE_U32:
11031 case AMDGPU::S_CMPK_GE_I32:
11032 SrcReg =
MI.getOperand(0).getReg();
11034 CmpValue =
MI.getOperand(1).getImm();
11044 if (S->isLiveIn(AMDGPU::SCC))
11053bool SIInstrInfo::invertSCCUse(
MachineInstr *SCCDef)
const {
11056 bool SCCIsDead =
false;
11059 constexpr unsigned ScanLimit = 12;
11060 unsigned Count = 0;
11061 for (MachineInstr &
MI :
11063 if (++
Count > ScanLimit)
11065 if (
MI.readsRegister(AMDGPU::SCC, &RI)) {
11066 if (
MI.getOpcode() == AMDGPU::S_CSELECT_B32 ||
11067 MI.getOpcode() == AMDGPU::S_CSELECT_B64 ||
11068 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
11069 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC1)
11074 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
11087 for (MachineInstr *
MI : InvertInstr) {
11088 if (
MI->getOpcode() == AMDGPU::S_CSELECT_B32 ||
11089 MI->getOpcode() == AMDGPU::S_CSELECT_B64) {
11091 }
else if (
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
11092 MI->getOpcode() == AMDGPU::S_CBRANCH_SCC1) {
11093 MI->setDesc(
get(
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0
11094 ? AMDGPU::S_CBRANCH_SCC1
11095 : AMDGPU::S_CBRANCH_SCC0));
11108 bool NeedInversion)
const {
11109 MachineInstr *KillsSCC =
nullptr;
11114 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
11116 if (
MI.killsRegister(AMDGPU::SCC, &RI))
11119 if (NeedInversion && !invertSCCUse(SCCRedefine))
11121 if (MachineOperand *SccDef =
11123 SccDef->setIsDead(
false);
11131 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
11132 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
11134 bool Op1IsNonZeroImm =
11135 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
11136 bool Op2IsZeroImm =
11137 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
11138 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
11144 unsigned &NewDefOpc) {
11147 if (Def.getOpcode() != AMDGPU::S_ADD_I32 &&
11148 Def.getOpcode() != AMDGPU::S_ADD_U32)
11154 if ((!AddSrc1.
isImm() || AddSrc1.
getImm() != 1) &&
11160 if (Def.getOpcode() == AMDGPU::S_ADD_I32) {
11162 Def.findRegisterDefOperand(AMDGPU::SCC,
nullptr);
11165 NewDefOpc = AMDGPU::S_ADD_U32;
11167 NeedInversion = !NeedInversion;
11172 Register SrcReg2, int64_t CmpMask,
11181 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
11182 this](
bool NeedInversion) ->
bool {
11206 unsigned NewDefOpc = Def->getOpcode();
11212 if (!optimizeSCC(Def, &CmpInstr, NeedInversion))
11215 if (NewDefOpc != Def->getOpcode())
11216 Def->setDesc(
get(NewDefOpc));
11225 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
11232 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
11240 optimizeSCC(
Select, Def,
false);
11247 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
11248 this](int64_t ExpectedValue,
unsigned SrcSize,
11249 bool IsReversible,
bool IsSigned) ->
bool {
11277 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
11278 Def->getOpcode() != AMDGPU::S_AND_B64)
11282 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
11293 SrcOp = &Def->getOperand(2);
11294 else if (isMask(&Def->getOperand(2)))
11295 SrcOp = &Def->getOperand(1);
11303 if (IsSigned && BitNo == SrcSize - 1)
11306 ExpectedValue <<= BitNo;
11308 bool IsReversedCC =
false;
11309 if (CmpValue != ExpectedValue) {
11312 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
11317 Register DefReg = Def->getOperand(0).getReg();
11321 if (!optimizeSCC(Def, &CmpInstr,
false))
11332 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
11333 : AMDGPU::S_BITCMP1_B32
11334 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
11335 : AMDGPU::S_BITCMP1_B64;
11340 Def->eraseFromParent();
11348 case AMDGPU::S_CMP_EQ_U32:
11349 case AMDGPU::S_CMP_EQ_I32:
11350 case AMDGPU::S_CMPK_EQ_U32:
11351 case AMDGPU::S_CMPK_EQ_I32:
11352 return optimizeCmpAnd(1, 32,
true,
false) ||
11353 optimizeCmpSelect(
true);
11354 case AMDGPU::S_CMP_GE_U32:
11355 case AMDGPU::S_CMPK_GE_U32:
11356 return optimizeCmpAnd(1, 32,
false,
false);
11357 case AMDGPU::S_CMP_GE_I32:
11358 case AMDGPU::S_CMPK_GE_I32:
11359 return optimizeCmpAnd(1, 32,
false,
true);
11360 case AMDGPU::S_CMP_EQ_U64:
11361 return optimizeCmpAnd(1, 64,
true,
false);
11362 case AMDGPU::S_CMP_LG_U32:
11363 case AMDGPU::S_CMP_LG_I32:
11364 case AMDGPU::S_CMPK_LG_U32:
11365 case AMDGPU::S_CMPK_LG_I32:
11366 return optimizeCmpAnd(0, 32,
true,
false) ||
11367 optimizeCmpSelect(
false);
11368 case AMDGPU::S_CMP_GT_U32:
11369 case AMDGPU::S_CMPK_GT_U32:
11370 return optimizeCmpAnd(0, 32,
false,
false);
11371 case AMDGPU::S_CMP_GT_I32:
11372 case AMDGPU::S_CMPK_GT_I32:
11373 return optimizeCmpAnd(0, 32,
false,
true);
11374 case AMDGPU::S_CMP_LG_U64:
11375 return optimizeCmpAnd(0, 64,
true,
false) ||
11376 optimizeCmpSelect(
false);
11383 AMDGPU::OpName
OpName)
const {
11384 if (!ST.needsAlignedVGPRs())
11387 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
11399 bool IsAGPR = RI.isAGPR(MRI, DataReg);
11401 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
11405 : &AMDGPU::VReg_64_Align2RegClass);
11407 .
addReg(DataReg, {},
Op.getSubReg())
11412 Op.setSubReg(AMDGPU::sub0);
11427 if (ST.hasGFX1250Insts())
11434 unsigned Opcode =
MI.getOpcode();
11440 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
11441 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
11444 if (!ST.hasGFX940Insts())
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps, ArrayRef< Register > PhySGPRs={})
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
static MachineBasicBlock * generateWaterFallLoop(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr, ArrayRef< Register > PhySGPRs={})
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static unsigned getSGPRSpillSaveOpcode(unsigned Size, bool NeedsCFI)
static bool setsSCCIfResultIsZero(const MachineInstr &Def, bool &NeedInversion, unsigned &NewDefOpc)
static bool isSCCDeadOnExit(MachineBasicBlock *MBB)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static unsigned getAVSpillSaveOpcode(unsigned Size, bool NeedsCFI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getVGPRSpillSaveOpcode(unsigned Size, bool NeedsCFI)
static constexpr AMDGPU::OpName ModifierOpNames[]
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
Get the first element.
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
uint64_t getZExtValue() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
filtered_mop_range all_uses()
Returns an iterator range over all operands that are (explicit or implicit) register uses.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI void moveOperands(MachineOperand *Dst, MachineOperand *Src, unsigned NumOps)
Move NumOps operands from Src to Dst, updating use-def lists as needed.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
LLVM_ABI void clearVirtRegs()
clearVirtRegs - Remove all virtual registers (after physreg assignment).
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
void setSimpleHint(Register VReg, Register PrefReg)
Specify the preferred (target independent) register allocation hint for the specified virtual registe...
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
iterator_range< use_iterator > use_operands(Register Reg) const
LLVM_ABI void removeRegOperandFromUseList(MachineOperand *MO)
Remove MO from its use-def list.
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI void addRegOperandToUseList(MachineOperand *MO)
Add MO to the linked list of operands for its register.
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
bool canAddToBBProlog(const MachineInstr &MI) const
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
bool isSpill(uint32_t Opcode) const
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
bool mayAccessScratch(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, AMDGPU::FlatAddrSpace FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
void storeRegToStackSlotCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
unsigned getOpSize(uint32_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
static bool setsSCCIfResultIsNonZero(const MachineInstr &MI)
const MIRFormatter * getMIRFormatter() const override
static bool isXcntDrain(const MachineInstr &MI)
True if MI implicitly drains XCNT.
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
InstSizeVerifyMode getInstSizeVerifyMode(const MachineInstr &MI) const override
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
Register isStackAccess(const MachineInstr &MI, int &FrameIndex, TypeSize &MemBytes) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool hasRAWDependency(const MachineInstr &FirstMI, const MachineInstr &SecondMI) const
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
static bool isBUF(const MachineInstr &MI)
void handleCopyToPhysHelper(SIInstrWorklist &Worklist, Register DstReg, MachineInstr &Inst, MachineRegisterInfo &MRI, DenseMap< MachineInstr *, V2PhysSCopyInfo > &WaterFalls, DenseMap< MachineInstr *, bool > &V2SPhyCopiesToErase) const
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, int FrameIndex, MachineInstr *&CopyMI, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool isVOPDAntidependencyAllowed(const MachineInstr &MI) const
If OpX is multicycle, anti-dependencies are not allowed.
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex, TypeSize &MemBytes) const
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
static bool isVALU(const MachineInstr &MI, bool AllowLDSDMA)
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void createWaterFallForSiCall(MachineInstr *MI, MachineDominatorTree *MDT, ArrayRef< MachineOperand * > ScalarOps, ArrayRef< Register > PhySGPRs={}) const
Wrapper function for generating waterfall for instruction MI This function take into consideration of...
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
void mutateAndCleanupImplicit(MachineInstr &MI, const MCInstrDesc &NewDesc) const
ValueUniformity getGenericValueUniformity(const MachineInstr &MI) const
static bool isMAI(const MCInstrDesc &Desc)
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, LaneBitmask UsedLanes=LaneBitmask::getAll()) const override
static bool usesLGKM_CNT(const MachineInstr &MI)
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
const MachineOperand & getCalleeOperand(const MachineInstr &MI) const override
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
bool isAlwaysGDS(uint32_t Opcode) const
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
bool isLegalGFX12PlusPackedMathFP32or64BitOperand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 or 64 instructions.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI, bool NeedsCFI) const
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, AMDGPU::FlatAddrSpace FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
void createReadFirstLaneFromCopyToPhysReg(MachineRegisterInfo &MRI, Register DstReg, MachineInstr &Inst) const
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool isWWMRegSpillOpcode(uint32_t Opcode)
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
ValueUniformity getValueUniformity(const MachineInstr &MI) const final
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
bool allowNegativeFlatOffset(AMDGPU::FlatAddrSpace FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
std::optional< int64_t > getImmOrMaterializedImm(MachineOperand &Op) const
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst, DenseMap< MachineInstr *, V2PhysSCopyInfo > &WaterFalls, DenseMap< MachineInstr *, bool > &V2SPhyCopiesToErase) const
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
unsigned getScratchReservedForDynamicVGPRs() const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual const MachineOperand & getCalleeOperand(const MachineInstr &MI) const
Returns the callee operand from the given MI.
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, LaneBitmask UsedLanes=LaneBitmask::getAll()) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo & getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isDPMACCInstruction(unsigned Opc)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int32_t getCommuteRev(uint32_t Opcode)
LLVM_READONLY int32_t getCommuteOrig(uint32_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READONLY int32_t getGlobalVaddrOp(uint32_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
LLVM_READONLY int32_t getMFMAEarlyClobberOp(uint32_t Opcode)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY int32_t getIfAddr64Inst(uint32_t Opcode)
Check if Opcode is an Addr64 opcode.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
LLVM_READONLY int32_t getVOPe32(uint32_t Opcode)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
LLVM_READONLY int32_t getAddr64Inst(uint32_t Opcode)
int32_t getMCOpcode(uint32_t Opcode, unsigned Gen)
bool isPackedFP32or64BitInst(unsigned Opc)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT64
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
LLVM_READONLY int32_t getBasicFromSDWAOp(uint32_t Opcode)
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
LLVM_READONLY int32_t getFlatScratchInstSVfromSS(uint32_t Opcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Not(const Pred &P) -> Not< Pred >
constexpr bool isD16Buf(const T &...O)
constexpr bool isSDWA(const T &...O)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr RegState getUndefRegState(bool B)
ValueUniformity
Enum describing how values behave with respect to uniformity and divergence, to answer the question: ...
@ AlwaysUniform
The result value is always uniform.
@ NeverUniform
The result value can never be assumed to be uniform.
@ Default
The result value is uniform if and only if all operands are uniform.
MachineCycleInfo::CycleT MachineCycle
static const MachineMemOperand::Flags MOThreadPrivate
Mark the MMO of accesses to memory locations that are never written to by other threads.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
constexpr bool all() const
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.
VirtRegInfo - Information about a virtual register used by a set of operands.
bool Reads
Reads - One of the operands read the virtual register.
bool Writes
Writes - One of the operands writes the virtual register.