33#include "llvm/IR/IntrinsicsAMDGPU.h"
40#define DEBUG_TYPE "si-instr-info"
42#define GET_INSTRINFO_CTOR_DTOR
43#include "AMDGPUGenInstrInfo.inc"
46#define GET_D16ImageDimIntrinsics_IMPL
47#define GET_ImageDimIntrinsicTable_IMPL
48#define GET_RsrcIntrinsics_IMPL
49#include "AMDGPUGenSearchableTables.inc"
57 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
60 "amdgpu-fix-16-bit-physreg-copies",
61 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
77 unsigned N =
Node->getNumOperands();
78 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
90 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
91 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
93 if (Op0Idx == -1 && Op1Idx == -1)
97 if ((Op0Idx == -1 && Op1Idx != -1) ||
98 (Op1Idx == -1 && Op0Idx != -1))
119 return !
MI.memoperands_empty() &&
121 return MMO->isLoad() && MMO->isInvariant();
143 if (!
MI.hasImplicitDef() &&
144 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
145 !
MI.mayRaiseFPException())
153bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
156 if (
MI.isCompare()) {
167 switch (
Use.getOpcode()) {
168 case AMDGPU::S_AND_SAVEEXEC_B32:
169 case AMDGPU::S_AND_SAVEEXEC_B64:
171 case AMDGPU::S_AND_B32:
172 case AMDGPU::S_AND_B64:
173 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
183 switch (
MI.getOpcode()) {
186 case AMDGPU::V_READFIRSTLANE_B32:
203 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
208 for (
auto Op :
MI.uses()) {
209 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
210 RI.isSGPRClass(
MRI.getRegClass(
Op.getReg()))) {
215 if (FromCycle ==
nullptr)
221 while (FromCycle && !FromCycle->
contains(ToCycle)) {
241 int64_t &Offset1)
const {
249 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
253 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
269 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
270 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
271 if (Offset0Idx == -1 || Offset1Idx == -1)
278 Offset0Idx -=
get(Opc0).NumDefs;
279 Offset1Idx -=
get(Opc1).NumDefs;
309 if (!Load0Offset || !Load1Offset)
326 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
327 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
329 if (OffIdx0 == -1 || OffIdx1 == -1)
335 OffIdx0 -=
get(Opc0).NumDefs;
336 OffIdx1 -=
get(Opc1).NumDefs;
355 case AMDGPU::DS_READ2ST64_B32:
356 case AMDGPU::DS_READ2ST64_B64:
357 case AMDGPU::DS_WRITE2ST64_B32:
358 case AMDGPU::DS_WRITE2ST64_B64:
373 OffsetIsScalable =
false;
390 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
392 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
393 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
406 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
407 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
408 if (Offset0 + 1 != Offset1)
419 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
427 Offset = EltSize * Offset0;
429 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
430 if (DataOpIdx == -1) {
431 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
433 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
449 if (BaseOp && !BaseOp->
isFI())
457 if (SOffset->
isReg())
463 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
465 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
474 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
475 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
477 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
478 if (VAddr0Idx >= 0) {
480 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
487 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
502 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
519 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
521 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
538 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
546 if (MO1->getAddrSpace() != MO2->getAddrSpace())
549 const auto *Base1 = MO1->getValue();
550 const auto *Base2 = MO2->getValue();
551 if (!Base1 || !Base2)
559 return Base1 == Base2;
563 int64_t Offset1,
bool OffsetIsScalable1,
565 int64_t Offset2,
bool OffsetIsScalable2,
566 unsigned ClusterSize,
567 unsigned NumBytes)
const {
580 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
599 const unsigned LoadSize = NumBytes / ClusterSize;
600 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
601 return NumDWords <= MaxMemoryClusterDWords;
615 int64_t Offset0, int64_t Offset1,
616 unsigned NumLoads)
const {
617 assert(Offset1 > Offset0 &&
618 "Second offset should be larger than first offset!");
623 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
630 const char *Msg =
"illegal VGPR to SGPR copy") {
651 assert((
TII.getSubtarget().hasMAIInsts() &&
652 !
TII.getSubtarget().hasGFX90AInsts()) &&
653 "Expected GFX908 subtarget.");
656 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
657 "Source register of the copy should be either an SGPR or an AGPR.");
660 "Destination register of the copy should be an AGPR.");
669 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
672 if (!Def->modifiesRegister(SrcReg, &RI))
675 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
676 Def->getOperand(0).getReg() != SrcReg)
683 bool SafeToPropagate =
true;
686 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
687 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
688 SafeToPropagate =
false;
690 if (!SafeToPropagate)
693 for (
auto I = Def;
I !=
MI; ++
I)
694 I->clearRegisterKills(DefOp.
getReg(), &RI);
703 if (ImpUseSuperReg) {
704 Builder.addReg(ImpUseSuperReg,
712 RS.enterBasicBlockEnd(
MBB);
713 RS.backward(std::next(
MI));
722 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
725 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
726 "VGPR used for an intermediate copy should have been reserved.");
731 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
741 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
742 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
743 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
750 if (ImpUseSuperReg) {
751 UseBuilder.
addReg(ImpUseSuperReg,
772 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
773 int16_t SubIdx = BaseIndices[Idx];
774 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
775 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
776 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
777 unsigned Opcode = AMDGPU::S_MOV_B32;
780 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
781 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
782 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
786 DestSubReg = RI.getSubReg(DestReg, SubIdx);
787 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
788 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
789 Opcode = AMDGPU::S_MOV_B64;
804 assert(FirstMI && LastMI);
812 LastMI->addRegisterKilled(SrcReg, &RI);
818 Register SrcReg,
bool KillSrc,
bool RenamableDest,
819 bool RenamableSrc)
const {
821 unsigned Size = RI.getRegSizeInBits(*RC);
823 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
829 if (((
Size == 16) != (SrcSize == 16))) {
831 assert(ST.useRealTrue16Insts());
836 if (DestReg == SrcReg) {
842 RC = RI.getPhysRegBaseClass(DestReg);
843 Size = RI.getRegSizeInBits(*RC);
844 SrcRC = RI.getPhysRegBaseClass(SrcReg);
845 SrcSize = RI.getRegSizeInBits(*SrcRC);
849 if (RC == &AMDGPU::VGPR_32RegClass) {
851 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
852 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
853 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
854 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
860 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
861 RC == &AMDGPU::SReg_32RegClass) {
862 if (SrcReg == AMDGPU::SCC) {
869 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
870 if (DestReg == AMDGPU::VCC_LO) {
888 if (RC == &AMDGPU::SReg_64RegClass) {
889 if (SrcReg == AMDGPU::SCC) {
896 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
897 if (DestReg == AMDGPU::VCC) {
915 if (DestReg == AMDGPU::SCC) {
918 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
922 assert(ST.hasScalarCompareEq64());
936 if (RC == &AMDGPU::AGPR_32RegClass) {
937 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
938 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
944 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
953 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
960 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
961 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
963 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
964 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
965 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
966 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
969 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
970 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
983 if (IsAGPRDst || IsAGPRSrc) {
984 if (!DstLow || !SrcLow) {
986 "Cannot use hi16 subreg with an AGPR!");
993 if (ST.useRealTrue16Insts()) {
999 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1000 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1012 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1013 if (!DstLow || !SrcLow) {
1015 "Cannot use hi16 subreg on VI!");
1038 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1039 if (ST.hasMovB64()) {
1044 if (ST.hasPkMovB32()) {
1060 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1061 if (RI.isSGPRClass(RC)) {
1062 if (!RI.isSGPRClass(SrcRC)) {
1066 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1072 unsigned EltSize = 4;
1073 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1074 if (RI.isAGPRClass(RC)) {
1075 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1076 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1077 else if (RI.hasVGPRs(SrcRC) ||
1078 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1079 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1081 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1082 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1083 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1084 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1085 (RI.isProperlyAlignedRC(*RC) &&
1086 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1088 if (ST.hasMovB64()) {
1089 Opcode = AMDGPU::V_MOV_B64_e32;
1091 }
else if (ST.hasPkMovB32()) {
1092 Opcode = AMDGPU::V_PK_MOV_B32;
1102 std::unique_ptr<RegScavenger> RS;
1103 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1104 RS = std::make_unique<RegScavenger>();
1110 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1111 const bool CanKillSuperReg = KillSrc && !Overlap;
1113 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1116 SubIdx = SubIndices[Idx];
1118 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1119 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1120 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1121 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1123 bool IsFirstSubreg = Idx == 0;
1124 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1126 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1130 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1131 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1177 return &AMDGPU::VGPR_32RegClass;
1189 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1190 "Not a VGPR32 reg");
1192 if (
Cond.size() == 1) {
1193 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1202 }
else if (
Cond.size() == 2) {
1203 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1205 case SIInstrInfo::SCC_TRUE: {
1206 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1216 case SIInstrInfo::SCC_FALSE: {
1217 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1227 case SIInstrInfo::VCCNZ: {
1230 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1241 case SIInstrInfo::VCCZ: {
1244 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1255 case SIInstrInfo::EXECNZ: {
1256 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1257 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1268 case SIInstrInfo::EXECZ: {
1269 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1270 Register SReg2 =
MRI.createVirtualRegister(RI.getBoolRC());
1295 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1308 Register Reg =
MRI.createVirtualRegister(RI.getBoolRC());
1318 int64_t &ImmVal)
const {
1319 switch (
MI.getOpcode()) {
1320 case AMDGPU::V_MOV_B32_e32:
1321 case AMDGPU::S_MOV_B32:
1322 case AMDGPU::S_MOVK_I32:
1323 case AMDGPU::S_MOV_B64:
1324 case AMDGPU::V_MOV_B64_e32:
1325 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1326 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1327 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1328 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1329 case AMDGPU::V_MOV_B64_PSEUDO:
1330 case AMDGPU::V_MOV_B16_t16_e32: {
1334 return MI.getOperand(0).getReg() == Reg;
1339 case AMDGPU::V_MOV_B16_t16_e64: {
1341 if (Src0.
isImm() && !
MI.getOperand(1).getImm()) {
1343 return MI.getOperand(0).getReg() == Reg;
1348 case AMDGPU::S_BREV_B32:
1349 case AMDGPU::V_BFREV_B32_e32:
1350 case AMDGPU::V_BFREV_B32_e64: {
1354 return MI.getOperand(0).getReg() == Reg;
1359 case AMDGPU::S_NOT_B32:
1360 case AMDGPU::V_NOT_B32_e32:
1361 case AMDGPU::V_NOT_B32_e64: {
1364 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1365 return MI.getOperand(0).getReg() == Reg;
1375std::optional<int64_t>
1380 if (!
Op.isReg() || !
Op.getReg().isVirtual())
1381 return std::nullopt;
1384 if (Def && Def->isMoveImmediate()) {
1390 return std::nullopt;
1395 if (RI.isAGPRClass(DstRC))
1396 return AMDGPU::COPY;
1397 if (RI.getRegSizeInBits(*DstRC) == 16) {
1400 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1402 if (RI.getRegSizeInBits(*DstRC) == 32)
1403 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1404 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1405 return AMDGPU::S_MOV_B64;
1406 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1407 return AMDGPU::V_MOV_B64_PSEUDO;
1408 return AMDGPU::COPY;
1413 bool IsIndirectSrc)
const {
1414 if (IsIndirectSrc) {
1416 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1418 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1426 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6);
1428 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7);
1430 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1432 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1434 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1436 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1438 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1440 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1441 if (VecSize <= 1024)
1442 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1448 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1450 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1454 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1456 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1458 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6);
1460 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7);
1462 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1464 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1466 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1468 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1470 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1472 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1473 if (VecSize <= 1024)
1474 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1481 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1483 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1485 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1487 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1489 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1491 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1493 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1495 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1497 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1499 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1501 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1503 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1505 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1506 if (VecSize <= 1024)
1507 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1514 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1516 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1518 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1520 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1522 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1524 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1526 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1528 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1530 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1532 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1534 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1536 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1538 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1539 if (VecSize <= 1024)
1540 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1547 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1549 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1551 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1553 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1554 if (VecSize <= 1024)
1555 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1562 bool IsSGPR)
const {
1574 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1581 return AMDGPU::SI_SPILL_S32_SAVE;
1583 return AMDGPU::SI_SPILL_S64_SAVE;
1585 return AMDGPU::SI_SPILL_S96_SAVE;
1587 return AMDGPU::SI_SPILL_S128_SAVE;
1589 return AMDGPU::SI_SPILL_S160_SAVE;
1591 return AMDGPU::SI_SPILL_S192_SAVE;
1593 return AMDGPU::SI_SPILL_S224_SAVE;
1595 return AMDGPU::SI_SPILL_S256_SAVE;
1597 return AMDGPU::SI_SPILL_S288_SAVE;
1599 return AMDGPU::SI_SPILL_S320_SAVE;
1601 return AMDGPU::SI_SPILL_S352_SAVE;
1603 return AMDGPU::SI_SPILL_S384_SAVE;
1605 return AMDGPU::SI_SPILL_S512_SAVE;
1607 return AMDGPU::SI_SPILL_S1024_SAVE;
1616 return AMDGPU::SI_SPILL_V16_SAVE;
1618 return AMDGPU::SI_SPILL_V32_SAVE;
1620 return AMDGPU::SI_SPILL_V64_SAVE;
1622 return AMDGPU::SI_SPILL_V96_SAVE;
1624 return AMDGPU::SI_SPILL_V128_SAVE;
1626 return AMDGPU::SI_SPILL_V160_SAVE;
1628 return AMDGPU::SI_SPILL_V192_SAVE;
1630 return AMDGPU::SI_SPILL_V224_SAVE;
1632 return AMDGPU::SI_SPILL_V256_SAVE;
1634 return AMDGPU::SI_SPILL_V288_SAVE;
1636 return AMDGPU::SI_SPILL_V320_SAVE;
1638 return AMDGPU::SI_SPILL_V352_SAVE;
1640 return AMDGPU::SI_SPILL_V384_SAVE;
1642 return AMDGPU::SI_SPILL_V512_SAVE;
1644 return AMDGPU::SI_SPILL_V1024_SAVE;
1653 return AMDGPU::SI_SPILL_AV32_SAVE;
1655 return AMDGPU::SI_SPILL_AV64_SAVE;
1657 return AMDGPU::SI_SPILL_AV96_SAVE;
1659 return AMDGPU::SI_SPILL_AV128_SAVE;
1661 return AMDGPU::SI_SPILL_AV160_SAVE;
1663 return AMDGPU::SI_SPILL_AV192_SAVE;
1665 return AMDGPU::SI_SPILL_AV224_SAVE;
1667 return AMDGPU::SI_SPILL_AV256_SAVE;
1669 return AMDGPU::SI_SPILL_AV288_SAVE;
1671 return AMDGPU::SI_SPILL_AV320_SAVE;
1673 return AMDGPU::SI_SPILL_AV352_SAVE;
1675 return AMDGPU::SI_SPILL_AV384_SAVE;
1677 return AMDGPU::SI_SPILL_AV512_SAVE;
1679 return AMDGPU::SI_SPILL_AV1024_SAVE;
1686 bool IsVectorSuperClass) {
1691 if (IsVectorSuperClass)
1692 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1694 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1700 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1707 if (ST.hasMAIInsts())
1726 FrameInfo.getObjectAlign(FrameIndex));
1727 unsigned SpillSize = RI.getSpillSize(*RC);
1730 if (RI.isSGPRClass(RC)) {
1732 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1733 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1734 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1742 if (SrcReg.
isVirtual() && SpillSize == 4) {
1743 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1752 if (RI.spillSGPRToVGPR())
1772 return AMDGPU::SI_SPILL_S32_RESTORE;
1774 return AMDGPU::SI_SPILL_S64_RESTORE;
1776 return AMDGPU::SI_SPILL_S96_RESTORE;
1778 return AMDGPU::SI_SPILL_S128_RESTORE;
1780 return AMDGPU::SI_SPILL_S160_RESTORE;
1782 return AMDGPU::SI_SPILL_S192_RESTORE;
1784 return AMDGPU::SI_SPILL_S224_RESTORE;
1786 return AMDGPU::SI_SPILL_S256_RESTORE;
1788 return AMDGPU::SI_SPILL_S288_RESTORE;
1790 return AMDGPU::SI_SPILL_S320_RESTORE;
1792 return AMDGPU::SI_SPILL_S352_RESTORE;
1794 return AMDGPU::SI_SPILL_S384_RESTORE;
1796 return AMDGPU::SI_SPILL_S512_RESTORE;
1798 return AMDGPU::SI_SPILL_S1024_RESTORE;
1807 return AMDGPU::SI_SPILL_V16_RESTORE;
1809 return AMDGPU::SI_SPILL_V32_RESTORE;
1811 return AMDGPU::SI_SPILL_V64_RESTORE;
1813 return AMDGPU::SI_SPILL_V96_RESTORE;
1815 return AMDGPU::SI_SPILL_V128_RESTORE;
1817 return AMDGPU::SI_SPILL_V160_RESTORE;
1819 return AMDGPU::SI_SPILL_V192_RESTORE;
1821 return AMDGPU::SI_SPILL_V224_RESTORE;
1823 return AMDGPU::SI_SPILL_V256_RESTORE;
1825 return AMDGPU::SI_SPILL_V288_RESTORE;
1827 return AMDGPU::SI_SPILL_V320_RESTORE;
1829 return AMDGPU::SI_SPILL_V352_RESTORE;
1831 return AMDGPU::SI_SPILL_V384_RESTORE;
1833 return AMDGPU::SI_SPILL_V512_RESTORE;
1835 return AMDGPU::SI_SPILL_V1024_RESTORE;
1844 return AMDGPU::SI_SPILL_AV32_RESTORE;
1846 return AMDGPU::SI_SPILL_AV64_RESTORE;
1848 return AMDGPU::SI_SPILL_AV96_RESTORE;
1850 return AMDGPU::SI_SPILL_AV128_RESTORE;
1852 return AMDGPU::SI_SPILL_AV160_RESTORE;
1854 return AMDGPU::SI_SPILL_AV192_RESTORE;
1856 return AMDGPU::SI_SPILL_AV224_RESTORE;
1858 return AMDGPU::SI_SPILL_AV256_RESTORE;
1860 return AMDGPU::SI_SPILL_AV288_RESTORE;
1862 return AMDGPU::SI_SPILL_AV320_RESTORE;
1864 return AMDGPU::SI_SPILL_AV352_RESTORE;
1866 return AMDGPU::SI_SPILL_AV384_RESTORE;
1868 return AMDGPU::SI_SPILL_AV512_RESTORE;
1870 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1877 bool IsVectorSuperClass) {
1882 if (IsVectorSuperClass)
1883 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1885 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1891 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1898 if (ST.hasMAIInsts())
1901 assert(!RI.isAGPRClass(RC));
1915 unsigned SpillSize = RI.getSpillSize(*RC);
1922 FrameInfo.getObjectAlign(FrameIndex));
1924 if (RI.isSGPRClass(RC)) {
1926 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1927 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1928 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1933 if (DestReg.
isVirtual() && SpillSize == 4) {
1935 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1938 if (RI.spillSGPRToVGPR())
1964 unsigned Quantity)
const {
1966 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1967 while (Quantity > 0) {
1968 unsigned Arg = std::min(Quantity, MaxSNopCount);
1975 auto *MF =
MBB.getParent();
1978 assert(Info->isEntryFunction());
1980 if (
MBB.succ_empty()) {
1981 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1982 if (HasNoTerminator) {
1983 if (Info->returnsVoid()) {
1997 constexpr unsigned DoorbellIDMask = 0x3ff;
1998 constexpr unsigned ECQueueWaveAbort = 0x400;
2003 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
2004 MBB.splitAt(
MI,
false);
2008 MBB.addSuccessor(TrapBB);
2014 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2018 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2021 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2022 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2026 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2027 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2028 .
addUse(DoorbellRegMasked)
2029 .
addImm(ECQueueWaveAbort);
2030 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2031 .
addUse(SetWaveAbortBit);
2034 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2045 return MBB.getNextNode();
2049 switch (
MI.getOpcode()) {
2051 if (
MI.isMetaInstruction())
2056 return MI.getOperand(0).getImm() + 1;
2066 switch (
MI.getOpcode()) {
2068 case AMDGPU::S_MOV_B64_term:
2071 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2074 case AMDGPU::S_MOV_B32_term:
2077 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2080 case AMDGPU::S_XOR_B64_term:
2083 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2086 case AMDGPU::S_XOR_B32_term:
2089 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2091 case AMDGPU::S_OR_B64_term:
2094 MI.setDesc(
get(AMDGPU::S_OR_B64));
2096 case AMDGPU::S_OR_B32_term:
2099 MI.setDesc(
get(AMDGPU::S_OR_B32));
2102 case AMDGPU::S_ANDN2_B64_term:
2105 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2108 case AMDGPU::S_ANDN2_B32_term:
2111 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2114 case AMDGPU::S_AND_B64_term:
2117 MI.setDesc(
get(AMDGPU::S_AND_B64));
2120 case AMDGPU::S_AND_B32_term:
2123 MI.setDesc(
get(AMDGPU::S_AND_B32));
2126 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2129 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2132 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2135 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2138 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2139 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2142 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2143 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2145 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2149 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2152 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2155 int64_t Imm =
MI.getOperand(1).getImm();
2157 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2158 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2165 MI.eraseFromParent();
2171 case AMDGPU::V_MOV_B64_PSEUDO: {
2173 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2174 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2179 if (ST.hasMovB64()) {
2180 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2185 if (
SrcOp.isImm()) {
2187 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2188 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2210 if (ST.hasPkMovB32() &&
2231 MI.eraseFromParent();
2234 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2238 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2242 if (ST.has64BitLiterals()) {
2243 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2249 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2254 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2255 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2257 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2258 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2265 MI.eraseFromParent();
2268 case AMDGPU::V_SET_INACTIVE_B32: {
2272 .
add(
MI.getOperand(3))
2273 .
add(
MI.getOperand(4))
2274 .
add(
MI.getOperand(1))
2275 .
add(
MI.getOperand(2))
2276 .
add(
MI.getOperand(5));
2277 MI.eraseFromParent();
2280 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2281 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2282 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2283 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2284 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2285 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2286 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2287 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2288 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2289 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2290 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2291 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2292 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2293 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2294 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2295 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2296 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2297 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2298 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2299 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2300 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2301 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2302 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2303 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2304 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2305 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2306 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2307 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2308 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2309 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2310 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2311 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2312 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2316 if (RI.hasVGPRs(EltRC)) {
2317 Opc = AMDGPU::V_MOVRELD_B32_e32;
2319 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2320 : AMDGPU::S_MOVRELD_B32;
2325 bool IsUndef =
MI.getOperand(1).isUndef();
2326 unsigned SubReg =
MI.getOperand(3).getImm();
2327 assert(VecReg ==
MI.getOperand(1).getReg());
2332 .
add(
MI.getOperand(2))
2336 const int ImpDefIdx =
2338 const int ImpUseIdx = ImpDefIdx + 1;
2340 MI.eraseFromParent();
2343 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2344 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2345 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2346 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2347 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2348 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6:
2349 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7:
2350 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2351 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2352 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2353 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2354 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2355 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2356 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2357 assert(ST.useVGPRIndexMode());
2359 bool IsUndef =
MI.getOperand(1).isUndef();
2368 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2372 .
add(
MI.getOperand(2))
2376 const int ImpDefIdx =
2378 const int ImpUseIdx = ImpDefIdx + 1;
2385 MI.eraseFromParent();
2388 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2389 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2390 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2391 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2392 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2393 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6:
2394 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7:
2395 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2396 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2397 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2398 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2399 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2400 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2401 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2402 assert(ST.useVGPRIndexMode());
2405 bool IsUndef =
MI.getOperand(1).isUndef();
2423 MI.eraseFromParent();
2426 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2429 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2430 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2449 if (ST.hasGetPCZeroExtension()) {
2453 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2460 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2470 MI.eraseFromParent();
2473 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2483 Op.setOffset(
Op.getOffset() + 4);
2485 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2489 MI.eraseFromParent();
2492 case AMDGPU::ENTER_STRICT_WWM: {
2498 case AMDGPU::ENTER_STRICT_WQM: {
2505 MI.eraseFromParent();
2508 case AMDGPU::EXIT_STRICT_WWM:
2509 case AMDGPU::EXIT_STRICT_WQM: {
2515 case AMDGPU::SI_RETURN: {
2529 MI.eraseFromParent();
2533 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2534 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2535 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2538 case AMDGPU::S_GETPC_B64_pseudo:
2539 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2540 if (ST.hasGetPCZeroExtension()) {
2542 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2551 case AMDGPU::V_MAX_BF16_PSEUDO_e64: {
2552 assert(ST.hasBF16PackedInsts());
2553 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2578 case AMDGPU::S_LOAD_DWORDX16_IMM:
2579 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2592 for (
auto &CandMO :
I->operands()) {
2593 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2601 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2605 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2609 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2611 unsigned NewOpcode = -1;
2612 if (SubregSize == 256)
2613 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2614 else if (SubregSize == 128)
2615 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2622 MRI.setRegClass(DestReg, NewRC);
2625 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2630 MI->getOperand(0).setReg(DestReg);
2631 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2635 OffsetMO->
setImm(FinalOffset);
2641 MI->setMemRefs(*MF, NewMMOs);
2654std::pair<MachineInstr*, MachineInstr*>
2656 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2658 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2661 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2662 return std::pair(&
MI,
nullptr);
2673 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2675 if (Dst.isPhysical()) {
2676 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2679 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2683 for (
unsigned I = 1;
I <= 2; ++
I) {
2686 if (
SrcOp.isImm()) {
2688 Imm.ashrInPlace(Part * 32);
2689 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2693 if (Src.isPhysical())
2694 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2701 MovDPP.addImm(MO.getImm());
2703 Split[Part] = MovDPP;
2707 if (Dst.isVirtual())
2714 MI.eraseFromParent();
2715 return std::pair(Split[0], Split[1]);
2718std::optional<DestSourcePair>
2720 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2723 return std::nullopt;
2727 AMDGPU::OpName Src0OpName,
2729 AMDGPU::OpName Src1OpName)
const {
2736 "All commutable instructions have both src0 and src1 modifiers");
2738 int Src0ModsVal = Src0Mods->
getImm();
2739 int Src1ModsVal = Src1Mods->
getImm();
2741 Src1Mods->
setImm(Src0ModsVal);
2742 Src0Mods->
setImm(Src1ModsVal);
2751 bool IsKill = RegOp.
isKill();
2753 bool IsUndef = RegOp.
isUndef();
2754 bool IsDebug = RegOp.
isDebug();
2756 if (NonRegOp.
isImm())
2758 else if (NonRegOp.
isFI())
2779 int64_t NonRegVal = NonRegOp1.
getImm();
2782 NonRegOp2.
setImm(NonRegVal);
2789 unsigned OpIdx1)
const {
2794 unsigned Opc =
MI.getOpcode();
2795 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2805 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2808 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2813 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2819 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2834 unsigned Src1Idx)
const {
2835 assert(!NewMI &&
"this should never be used");
2837 unsigned Opc =
MI.getOpcode();
2839 if (CommutedOpcode == -1)
2842 if (Src0Idx > Src1Idx)
2845 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2846 static_cast<int>(Src0Idx) &&
2847 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2848 static_cast<int>(Src1Idx) &&
2849 "inconsistency with findCommutedOpIndices");
2874 Src1, AMDGPU::OpName::src1_modifiers);
2877 AMDGPU::OpName::src1_sel);
2889 unsigned &SrcOpIdx0,
2890 unsigned &SrcOpIdx1)
const {
2895 unsigned &SrcOpIdx0,
2896 unsigned &SrcOpIdx1)
const {
2897 if (!
Desc.isCommutable())
2900 unsigned Opc =
Desc.getOpcode();
2901 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2905 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2909 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2913 int64_t BrOffset)
const {
2930 return MI.getOperand(0).getMBB();
2935 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2936 MI.getOpcode() == AMDGPU::SI_LOOP)
2948 "new block should be inserted for expanding unconditional branch");
2951 "restore block should be inserted for restoring clobbered registers");
2959 if (ST.useAddPC64Inst()) {
2961 MCCtx.createTempSymbol(
"offset",
true);
2965 MCCtx.createTempSymbol(
"post_addpc",
true);
2966 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
2970 Offset->setVariableValue(OffsetExpr);
2974 assert(RS &&
"RegScavenger required for long branching");
2978 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2982 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
2983 ST.hasVALUReadSGPRHazard();
2984 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2985 if (FlushSGPRWrites)
2993 ApplyHazardWorkarounds();
2996 MCCtx.createTempSymbol(
"post_getpc",
true);
3000 MCCtx.createTempSymbol(
"offset_lo",
true);
3002 MCCtx.createTempSymbol(
"offset_hi",
true);
3005 .
addReg(PCReg, {}, AMDGPU::sub0)
3009 .
addReg(PCReg, {}, AMDGPU::sub1)
3011 ApplyHazardWorkarounds();
3052 if (LongBranchReservedReg) {
3053 RS->enterBasicBlock(
MBB);
3054 Scav = LongBranchReservedReg;
3056 RS->enterBasicBlockEnd(
MBB);
3057 Scav = RS->scavengeRegisterBackwards(
3062 RS->setRegUsed(Scav);
3063 MRI.replaceRegWith(PCReg, Scav);
3064 MRI.clearVirtRegs();
3070 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3071 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
3072 MRI.clearVirtRegs();
3087unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3089 case SIInstrInfo::SCC_TRUE:
3090 return AMDGPU::S_CBRANCH_SCC1;
3091 case SIInstrInfo::SCC_FALSE:
3092 return AMDGPU::S_CBRANCH_SCC0;
3093 case SIInstrInfo::VCCNZ:
3094 return AMDGPU::S_CBRANCH_VCCNZ;
3095 case SIInstrInfo::VCCZ:
3096 return AMDGPU::S_CBRANCH_VCCZ;
3097 case SIInstrInfo::EXECNZ:
3098 return AMDGPU::S_CBRANCH_EXECNZ;
3099 case SIInstrInfo::EXECZ:
3100 return AMDGPU::S_CBRANCH_EXECZ;
3106SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3108 case AMDGPU::S_CBRANCH_SCC0:
3110 case AMDGPU::S_CBRANCH_SCC1:
3112 case AMDGPU::S_CBRANCH_VCCNZ:
3114 case AMDGPU::S_CBRANCH_VCCZ:
3116 case AMDGPU::S_CBRANCH_EXECNZ:
3118 case AMDGPU::S_CBRANCH_EXECZ:
3130 bool AllowModify)
const {
3131 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3133 TBB =
I->getOperand(0).getMBB();
3137 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3138 if (Pred == INVALID_BR)
3143 Cond.push_back(
I->getOperand(1));
3147 if (
I ==
MBB.end()) {
3153 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3155 FBB =
I->getOperand(0).getMBB();
3165 bool AllowModify)
const {
3173 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3174 switch (
I->getOpcode()) {
3175 case AMDGPU::S_MOV_B64_term:
3176 case AMDGPU::S_XOR_B64_term:
3177 case AMDGPU::S_OR_B64_term:
3178 case AMDGPU::S_ANDN2_B64_term:
3179 case AMDGPU::S_AND_B64_term:
3180 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3181 case AMDGPU::S_MOV_B32_term:
3182 case AMDGPU::S_XOR_B32_term:
3183 case AMDGPU::S_OR_B32_term:
3184 case AMDGPU::S_ANDN2_B32_term:
3185 case AMDGPU::S_AND_B32_term:
3186 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3189 case AMDGPU::SI_ELSE:
3190 case AMDGPU::SI_KILL_I1_TERMINATOR:
3191 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3208 int *BytesRemoved)
const {
3210 unsigned RemovedSize = 0;
3213 if (
MI.isBranch() ||
MI.isReturn()) {
3215 MI.eraseFromParent();
3221 *BytesRemoved = RemovedSize;
3238 int *BytesAdded)
const {
3239 if (!FBB &&
Cond.empty()) {
3243 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3250 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3262 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3280 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3287 if (
Cond.size() != 2) {
3291 if (
Cond[0].isImm()) {
3302 Register FalseReg,
int &CondCycles,
3303 int &TrueCycles,
int &FalseCycles)
const {
3309 if (
MRI.getRegClass(FalseReg) != RC)
3313 CondCycles = TrueCycles = FalseCycles = NumInsts;
3316 return RI.hasVGPRs(RC) && NumInsts <= 6;
3324 if (
MRI.getRegClass(FalseReg) != RC)
3330 if (NumInsts % 2 == 0)
3333 CondCycles = TrueCycles = FalseCycles = NumInsts;
3334 return RI.isSGPRClass(RC);
3345 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3346 if (Pred == VCCZ || Pred == SCC_FALSE) {
3347 Pred =
static_cast<BranchPredicate
>(-Pred);
3353 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3355 if (DstSize == 32) {
3357 if (Pred == SCC_TRUE) {
3372 if (DstSize == 64 && Pred == SCC_TRUE) {
3382 static const int16_t Sub0_15[] = {
3383 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3384 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3385 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3386 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3389 static const int16_t Sub0_15_64[] = {
3390 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3391 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3392 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3393 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3396 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3398 const int16_t *SubIndices = Sub0_15;
3399 int NElts = DstSize / 32;
3403 if (Pred == SCC_TRUE) {
3405 SelOp = AMDGPU::S_CSELECT_B32;
3406 EltRC = &AMDGPU::SGPR_32RegClass;
3408 SelOp = AMDGPU::S_CSELECT_B64;
3409 EltRC = &AMDGPU::SGPR_64RegClass;
3410 SubIndices = Sub0_15_64;
3416 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3421 for (
int Idx = 0; Idx != NElts; ++Idx) {
3422 Register DstElt =
MRI.createVirtualRegister(EltRC);
3425 unsigned SubIdx = SubIndices[Idx];
3428 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3430 .
addReg(FalseReg, {}, SubIdx)
3431 .addReg(TrueReg, {}, SubIdx);
3434 .
addReg(TrueReg, {}, SubIdx)
3435 .addReg(FalseReg, {}, SubIdx);
3447 switch (
MI.getOpcode()) {
3448 case AMDGPU::V_MOV_B16_t16_e32:
3449 case AMDGPU::V_MOV_B16_t16_e64:
3450 case AMDGPU::V_MOV_B32_e32:
3451 case AMDGPU::V_MOV_B32_e64:
3452 case AMDGPU::V_MOV_B64_PSEUDO:
3453 case AMDGPU::V_MOV_B64_e32:
3454 case AMDGPU::V_MOV_B64_e64:
3455 case AMDGPU::S_MOV_B32:
3456 case AMDGPU::S_MOV_B64:
3457 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3459 case AMDGPU::WWM_COPY:
3460 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3461 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3462 case AMDGPU::V_ACCVGPR_MOV_B32:
3463 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3464 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3472 switch (
MI.getOpcode()) {
3473 case AMDGPU::V_MOV_B16_t16_e32:
3474 case AMDGPU::V_MOV_B16_t16_e64:
3476 case AMDGPU::V_MOV_B32_e32:
3477 case AMDGPU::V_MOV_B32_e64:
3478 case AMDGPU::V_MOV_B64_PSEUDO:
3479 case AMDGPU::V_MOV_B64_e32:
3480 case AMDGPU::V_MOV_B64_e64:
3481 case AMDGPU::S_MOV_B32:
3482 case AMDGPU::S_MOV_B64:
3483 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3485 case AMDGPU::WWM_COPY:
3486 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3487 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3488 case AMDGPU::V_ACCVGPR_MOV_B32:
3489 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3490 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3498 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3499 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3500 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3503 unsigned Opc =
MI.getOpcode();
3505 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3507 MI.removeOperand(Idx);
3513 MI.setDesc(NewDesc);
3519 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
3520 Desc.implicit_defs().size();
3522 for (
unsigned I =
MI.getNumOperands() - 1;
I >=
NumOps; --
I)
3523 MI.removeOperand(
I);
3527 unsigned SubRegIndex) {
3528 switch (SubRegIndex) {
3529 case AMDGPU::NoSubRegister:
3539 case AMDGPU::sub1_lo16:
3541 case AMDGPU::sub1_hi16:
3544 return std::nullopt;
3552 case AMDGPU::V_MAC_F16_e32:
3553 case AMDGPU::V_MAC_F16_e64:
3554 case AMDGPU::V_MAD_F16_e64:
3555 return AMDGPU::V_MADAK_F16;
3556 case AMDGPU::V_MAC_F32_e32:
3557 case AMDGPU::V_MAC_F32_e64:
3558 case AMDGPU::V_MAD_F32_e64:
3559 return AMDGPU::V_MADAK_F32;
3560 case AMDGPU::V_FMAC_F32_e32:
3561 case AMDGPU::V_FMAC_F32_e64:
3562 case AMDGPU::V_FMA_F32_e64:
3563 return AMDGPU::V_FMAAK_F32;
3564 case AMDGPU::V_FMAC_F16_e32:
3565 case AMDGPU::V_FMAC_F16_e64:
3566 case AMDGPU::V_FMAC_F16_t16_e64:
3567 case AMDGPU::V_FMAC_F16_fake16_e64:
3568 case AMDGPU::V_FMAC_F16_t16_e32:
3569 case AMDGPU::V_FMAC_F16_fake16_e32:
3570 case AMDGPU::V_FMA_F16_e64:
3571 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3572 ? AMDGPU::V_FMAAK_F16_t16
3573 : AMDGPU::V_FMAAK_F16_fake16
3574 : AMDGPU::V_FMAAK_F16;
3575 case AMDGPU::V_FMAC_F64_e32:
3576 case AMDGPU::V_FMAC_F64_e64:
3577 case AMDGPU::V_FMA_F64_e64:
3578 return AMDGPU::V_FMAAK_F64;
3586 case AMDGPU::V_MAC_F16_e32:
3587 case AMDGPU::V_MAC_F16_e64:
3588 case AMDGPU::V_MAD_F16_e64:
3589 return AMDGPU::V_MADMK_F16;
3590 case AMDGPU::V_MAC_F32_e32:
3591 case AMDGPU::V_MAC_F32_e64:
3592 case AMDGPU::V_MAD_F32_e64:
3593 return AMDGPU::V_MADMK_F32;
3594 case AMDGPU::V_FMAC_F32_e32:
3595 case AMDGPU::V_FMAC_F32_e64:
3596 case AMDGPU::V_FMA_F32_e64:
3597 return AMDGPU::V_FMAMK_F32;
3598 case AMDGPU::V_FMAC_F16_e32:
3599 case AMDGPU::V_FMAC_F16_e64:
3600 case AMDGPU::V_FMAC_F16_t16_e64:
3601 case AMDGPU::V_FMAC_F16_fake16_e64:
3602 case AMDGPU::V_FMAC_F16_t16_e32:
3603 case AMDGPU::V_FMAC_F16_fake16_e32:
3604 case AMDGPU::V_FMA_F16_e64:
3605 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3606 ? AMDGPU::V_FMAMK_F16_t16
3607 : AMDGPU::V_FMAMK_F16_fake16
3608 : AMDGPU::V_FMAMK_F16;
3609 case AMDGPU::V_FMAC_F64_e32:
3610 case AMDGPU::V_FMAC_F64_e64:
3611 case AMDGPU::V_FMA_F64_e64:
3612 return AMDGPU::V_FMAMK_F64;
3624 const bool HasMultipleUses = !
MRI->hasOneNonDBGUse(Reg);
3626 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3629 if (
Opc == AMDGPU::COPY) {
3630 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3637 if (HasMultipleUses) {
3640 unsigned ImmDefSize = RI.getRegSizeInBits(*
MRI->getRegClass(Reg));
3643 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3651 if (ImmDefSize == 32 &&
3656 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3657 RI.getSubRegIdxSize(UseSubReg) == 16;
3660 if (RI.hasVGPRs(DstRC))
3663 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3669 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3676 for (
unsigned MovOp :
3677 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3678 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3686 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3690 if (MovDstPhysReg) {
3694 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3701 if (MovDstPhysReg) {
3702 if (!MovDstRC->
contains(MovDstPhysReg))
3704 }
else if (!
MRI->constrainRegClass(DstReg, MovDstRC)) {
3718 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3726 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3730 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3732 UseMI.getOperand(0).setReg(MovDstPhysReg);
3737 UseMI.setDesc(NewMCID);
3738 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3739 UseMI.addImplicitDefUseOperands(*MF);
3743 if (HasMultipleUses)
3746 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3747 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3748 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3749 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3750 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3751 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3752 Opc == AMDGPU::V_FMAC_F64_e64) {
3761 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3772 auto CopyRegOperandToNarrowerRC =
3775 if (!
MI.getOperand(OpNo).isReg())
3779 if (RI.getCommonSubClass(RC, NewRC) != NewRC)
3782 BuildMI(*
MI.getParent(),
MI.getIterator(),
MI.getDebugLoc(),
3783 get(AMDGPU::COPY), Tmp)
3785 MI.getOperand(OpNo).setReg(Tmp);
3786 MI.getOperand(OpNo).setIsKill();
3793 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3794 if (!RegSrc->
isReg())
3796 if (RI.isSGPRClass(
MRI->getRegClass(RegSrc->
getReg())) &&
3797 ST.getConstantBusLimit(
Opc) < 2)
3800 if (!Src2->
isReg() || RI.isSGPRClass(
MRI->getRegClass(Src2->
getReg())))
3812 if (Def && Def->isMoveImmediate() &&
3827 unsigned SrcSubReg = RegSrc->
getSubReg();
3832 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3833 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3834 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3835 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3836 UseMI.untieRegOperand(
3837 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3844 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3845 NewOpc == AMDGPU::V_FMAMK_F16_fake16) {
3849 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3850 UseMI.getOperand(0).getReg())
3852 UseMI.getOperand(0).setReg(Tmp);
3853 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3854 CopyRegOperandToNarrowerRC(
UseMI, 3, NewRC);
3857 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3859 DefMI.eraseFromParent();
3866 if (ST.getConstantBusLimit(
Opc) < 2) {
3869 bool Src0Inlined =
false;
3870 if (Src0->
isReg()) {
3875 if (Def && Def->isMoveImmediate() &&
3880 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3887 if (Src1->
isReg() && !Src0Inlined) {
3890 if (Def && Def->isMoveImmediate() &&
3892 MRI->hasOneNonDBGUse(Src1->
getReg()) && commuteInstruction(
UseMI))
3894 else if (RI.isSGPRReg(*
MRI, Src1->
getReg()))
3907 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3908 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3909 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3910 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3911 UseMI.untieRegOperand(
3912 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3914 const std::optional<int64_t> SubRegImm =
3924 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3925 NewOpc == AMDGPU::V_FMAAK_F16_fake16) {
3929 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3930 UseMI.getOperand(0).getReg())
3932 UseMI.getOperand(0).setReg(Tmp);
3933 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3934 CopyRegOperandToNarrowerRC(
UseMI, 2, NewRC);
3942 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3944 DefMI.eraseFromParent();
3956 if (BaseOps1.
size() != BaseOps2.
size())
3958 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3959 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3967 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3968 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3969 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3971 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3974bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3977 int64_t Offset0, Offset1;
3980 bool Offset0IsScalable, Offset1IsScalable;
3994 LocationSize Width0 = MIa.
memoperands().front()->getSize();
3995 LocationSize Width1 = MIb.
memoperands().front()->getSize();
4002 "MIa must load from or modify a memory location");
4004 "MIb must load from or modify a memory location");
4026 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4033 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4043 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4057 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4068 if (
Reg.isPhysical())
4070 auto *Def =
MRI.getUniqueVRegDef(
Reg);
4072 Imm = Def->getOperand(1).getImm();
4092 unsigned NumOps =
MI.getNumOperands();
4095 if (
Op.isReg() &&
Op.isKill())
4103 case AMDGPU::V_MAC_F16_e32:
4104 case AMDGPU::V_MAC_F16_e64:
4105 return AMDGPU::V_MAD_F16_e64;
4106 case AMDGPU::V_MAC_F32_e32:
4107 case AMDGPU::V_MAC_F32_e64:
4108 return AMDGPU::V_MAD_F32_e64;
4109 case AMDGPU::V_MAC_LEGACY_F32_e32:
4110 case AMDGPU::V_MAC_LEGACY_F32_e64:
4111 return AMDGPU::V_MAD_LEGACY_F32_e64;
4112 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4113 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4114 return AMDGPU::V_FMA_LEGACY_F32_e64;
4115 case AMDGPU::V_FMAC_F16_e32:
4116 case AMDGPU::V_FMAC_F16_e64:
4117 case AMDGPU::V_FMAC_F16_t16_e64:
4118 case AMDGPU::V_FMAC_F16_fake16_e64:
4119 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4120 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4121 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4122 : AMDGPU::V_FMA_F16_gfx9_e64;
4123 case AMDGPU::V_FMAC_F32_e32:
4124 case AMDGPU::V_FMAC_F32_e64:
4125 return AMDGPU::V_FMA_F32_e64;
4126 case AMDGPU::V_FMAC_F64_e32:
4127 case AMDGPU::V_FMAC_F64_e64:
4128 return AMDGPU::V_FMA_F64_e64;
4148 if (
MI.isBundle()) {
4151 if (
MI.getBundleSize() != 1)
4153 CandidateMI =
MI.getNextNode();
4157 MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4161 if (
MI.isBundle()) {
4166 MI.untieRegOperand(MO.getOperandNo());
4174 if (Def.isEarlyClobber() && Def.isReg() &&
4179 auto UpdateDefIndex = [&](
LiveRange &LR) {
4180 auto *S = LR.find(OldIndex);
4181 if (S != LR.end() && S->start == OldIndex) {
4182 assert(S->valno && S->valno->def == OldIndex);
4183 S->start = NewIndex;
4184 S->valno->def = NewIndex;
4188 for (
auto &SR : LI.subranges())
4194 if (U.RemoveMIUse) {
4197 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4199 if (
MRI.hasOneNonDBGUse(DefReg)) {
4201 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4202 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4203 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4204 U.RemoveMIUse->removeOperand(
I);
4209 if (
MI.isBundle()) {
4213 if (MO.isReg() && MO.getReg() == DefReg) {
4214 assert(MO.getSubReg() == 0 &&
4215 "tied sub-registers in bundles currently not supported");
4216 MI.removeOperand(MO.getOperandNo());
4231 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
4233 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4234 MIOp.setIsUndef(
true);
4235 MIOp.setReg(DummyReg);
4239 if (
MI.isBundle()) {
4243 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4244 MIOp.setIsUndef(
true);
4245 MIOp.setReg(DummyReg);
4258 return MI.isBundle() ? &
MI : NewMI;
4263 ThreeAddressUpdates &U)
const {
4265 unsigned Opc =
MI.getOpcode();
4269 if (NewMFMAOpc != -1) {
4272 for (
unsigned I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I)
4273 MIB.
add(
MI.getOperand(
I));
4281 for (
unsigned I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I)
4286 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4287 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4288 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4292 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4293 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4294 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4295 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4296 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4297 bool Src0Literal =
false;
4302 case AMDGPU::V_MAC_F16_e64:
4303 case AMDGPU::V_FMAC_F16_e64:
4304 case AMDGPU::V_FMAC_F16_t16_e64:
4305 case AMDGPU::V_FMAC_F16_fake16_e64:
4306 case AMDGPU::V_MAC_F32_e64:
4307 case AMDGPU::V_MAC_LEGACY_F32_e64:
4308 case AMDGPU::V_FMAC_F32_e64:
4309 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4310 case AMDGPU::V_FMAC_F64_e64:
4312 case AMDGPU::V_MAC_F16_e32:
4313 case AMDGPU::V_FMAC_F16_e32:
4314 case AMDGPU::V_MAC_F32_e32:
4315 case AMDGPU::V_MAC_LEGACY_F32_e32:
4316 case AMDGPU::V_FMAC_F32_e32:
4317 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4318 case AMDGPU::V_FMAC_F64_e32: {
4319 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4320 AMDGPU::OpName::src0);
4321 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4332 MachineInstrBuilder MIB;
4335 const MachineOperand *Src0Mods =
4338 const MachineOperand *Src1Mods =
4341 const MachineOperand *Src2Mods =
4347 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4348 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4350 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4352 MachineInstr *
DefMI;
4388 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4404 if (Src0Literal && !ST.hasVOP3Literal())
4432 switch (
MI.getOpcode()) {
4433 case AMDGPU::S_SET_GPR_IDX_ON:
4434 case AMDGPU::S_SET_GPR_IDX_MODE:
4435 case AMDGPU::S_SET_GPR_IDX_OFF:
4453 if (
MI.isTerminator() ||
MI.isPosition())
4457 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4460 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4466 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4467 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4468 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4469 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4470 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4475 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4476 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4477 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4491 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4496 if (
MI.memoperands_empty())
4501 unsigned AS = Memop->getAddrSpace();
4502 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4503 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4504 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4505 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4520 if (
MI.memoperands_empty())
4529 unsigned AS = Memop->getAddrSpace();
4546 if (ST.isTgSplitEnabled())
4551 if (
MI.memoperands_empty())
4556 unsigned AS = Memop->getAddrSpace();
4572 unsigned Opcode =
MI.getOpcode();
4587 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4588 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4589 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4592 if (
MI.isCall() ||
MI.isInlineAsm())
4608 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4609 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4610 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4611 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4619 if (
MI.isMetaInstruction())
4623 if (
MI.isCopyLike()) {
4624 if (!RI.isSGPRReg(
MRI,
MI.getOperand(0).getReg()))
4628 return MI.readsRegister(AMDGPU::EXEC, &RI);
4639 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4643 switch (Imm.getBitWidth()) {
4649 ST.hasInv2PiInlineImm());
4652 ST.hasInv2PiInlineImm());
4654 return ST.has16BitInsts() &&
4656 ST.hasInv2PiInlineImm());
4663 APInt IntImm = Imm.bitcastToAPInt();
4665 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4673 return ST.has16BitInsts() &&
4676 return ST.has16BitInsts() &&
4686 switch (OperandType) {
4696 int32_t Trunc =
static_cast<int32_t
>(Imm);
4738 int16_t Trunc =
static_cast<int16_t
>(Imm);
4739 return ST.has16BitInsts() &&
4748 int16_t Trunc =
static_cast<int16_t
>(Imm);
4749 return ST.has16BitInsts() &&
4800 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4806 return ST.hasVOP3Literal();
4810 int64_t ImmVal)
const {
4813 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4814 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4815 AMDGPU::OpName::src2))
4817 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4829 "unexpected imm-like operand kind");
4842 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4860 AMDGPU::OpName
OpName)
const {
4862 return Mods && Mods->
getImm();
4875 switch (
MI.getOpcode()) {
4876 default:
return false;
4878 case AMDGPU::V_ADDC_U32_e64:
4879 case AMDGPU::V_SUBB_U32_e64:
4880 case AMDGPU::V_SUBBREV_U32_e64: {
4888 case AMDGPU::V_MAC_F16_e64:
4889 case AMDGPU::V_MAC_F32_e64:
4890 case AMDGPU::V_MAC_LEGACY_F32_e64:
4891 case AMDGPU::V_FMAC_F16_e64:
4892 case AMDGPU::V_FMAC_F16_t16_e64:
4893 case AMDGPU::V_FMAC_F16_fake16_e64:
4894 case AMDGPU::V_FMAC_F32_e64:
4895 case AMDGPU::V_FMAC_F64_e64:
4896 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4902 case AMDGPU::V_CNDMASK_B32_e64:
4908 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(
MRI, Src1->
getReg()) ||
4938 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4947 unsigned Op32)
const {
4961 Inst32.
add(
MI.getOperand(
I));
4965 int Idx =
MI.getNumExplicitDefs();
4967 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
4972 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
4994 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
5002 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
5005 return AMDGPU::SReg_32RegClass.contains(Reg) ||
5006 AMDGPU::SReg_64RegClass.contains(Reg);
5012 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
5024 return Reg.
isVirtual() ? RI.isSGPRClass(
MRI.getRegClass(Reg))
5034 switch (MO.getReg()) {
5036 case AMDGPU::VCC_LO:
5037 case AMDGPU::VCC_HI:
5039 case AMDGPU::FLAT_SCR:
5052 switch (
MI.getOpcode()) {
5053 case AMDGPU::V_READLANE_B32:
5054 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
5055 case AMDGPU::V_WRITELANE_B32:
5056 case AMDGPU::SI_SPILL_S32_TO_VGPR:
5063 if (
MI.isPreISelOpcode() ||
5064 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
5079 if (
SubReg.getReg().isPhysical())
5082 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
5093 if (RI.isVectorRegister(
MRI, SrcReg) && RI.isSGPRReg(
MRI, DstReg)) {
5094 ErrInfo =
"illegal copy from vector register to SGPR";
5112 if (!
MRI.isSSA() &&
MI.isCopy())
5113 return verifyCopy(
MI,
MRI, ErrInfo);
5115 if (SIInstrInfo::isGenericOpcode(Opcode))
5118 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
5119 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
5120 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
5122 if (Src0Idx == -1) {
5124 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
5125 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
5126 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
5127 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
5132 if (!
Desc.isVariadic() &&
5133 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
5134 ErrInfo =
"Instruction has wrong number of operands.";
5138 if (
MI.isInlineAsm()) {
5151 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5152 ErrInfo =
"inlineasm operand has incorrect register class.";
5160 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5161 ErrInfo =
"missing memory operand from image instruction.";
5166 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5169 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5170 "all fp values to integers.";
5175 int16_t RegClass = getOpRegClassID(OpInfo);
5177 switch (OpInfo.OperandType) {
5179 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5180 ErrInfo =
"Illegal immediate value for operand.";
5215 ErrInfo =
"Illegal immediate value for operand.";
5222 ErrInfo =
"Expected inline constant for operand.";
5237 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5238 ErrInfo =
"Expected immediate, but got non-immediate";
5247 if (OpInfo.isGenericType())
5262 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
5264 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5266 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5267 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5274 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5275 ErrInfo =
"Subtarget requires even aligned vector registers";
5280 if (RegClass != -1) {
5281 if (Reg.isVirtual())
5286 ErrInfo =
"Operand has incorrect register class.";
5294 if (!ST.hasSDWA()) {
5295 ErrInfo =
"SDWA is not supported on this target";
5299 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5300 AMDGPU::OpName::dst_sel}) {
5304 int64_t Imm = MO->
getImm();
5306 ErrInfo =
"Invalid SDWA selection";
5311 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5313 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5318 if (!ST.hasSDWAScalar()) {
5320 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(
MRI, MO.
getReg()))) {
5321 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5328 "Only reg allowed as operands in SDWA instructions on GFX9+";
5334 if (!ST.hasSDWAOmod()) {
5337 if (OMod !=
nullptr &&
5339 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5344 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5345 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5346 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5347 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5350 unsigned Mods = Src0ModsMO->
getImm();
5353 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5359 if (
isVOPC(BasicOpcode)) {
5360 if (!ST.hasSDWASdst() && DstIdx != -1) {
5363 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5364 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5367 }
else if (!ST.hasSDWAOutModsVOPC()) {
5370 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5371 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5377 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5378 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5385 if (DstUnused && DstUnused->isImm() &&
5388 if (!Dst.isReg() || !Dst.isTied()) {
5389 ErrInfo =
"Dst register should have tied register";
5394 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5397 "Dst register should be tied to implicit use of preserved register";
5401 ErrInfo =
"Dst register should use same physical register as preserved";
5408 if (
isImage(Opcode) && !
MI.mayStore()) {
5420 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5428 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5432 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5433 if (RegCount > DstSize) {
5434 ErrInfo =
"Image instruction returns too many registers for dst "
5443 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5444 unsigned ConstantBusCount = 0;
5445 bool UsesLiteral =
false;
5448 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5452 LiteralVal = &
MI.getOperand(ImmIdx);
5461 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5472 }
else if (!MO.
isFI()) {
5479 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5489 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5490 return !RI.regsOverlap(SGPRUsed, SGPR);
5499 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5500 Opcode != AMDGPU::V_WRITELANE_B32) {
5501 ErrInfo =
"VOP* instruction violates constant bus restriction";
5505 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5506 ErrInfo =
"VOP3 instruction uses literal";
5513 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5514 unsigned SGPRCount = 0;
5517 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5525 if (MO.
getReg() != SGPRUsed)
5530 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5531 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5538 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5539 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5546 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5556 ErrInfo =
"ABS not allowed in VOP3B instructions";
5569 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5576 if (
Desc.isBranch()) {
5578 ErrInfo =
"invalid branch target for SOPK instruction";
5585 ErrInfo =
"invalid immediate for SOPK instruction";
5590 ErrInfo =
"invalid immediate for SOPK instruction";
5597 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5598 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5599 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5600 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5601 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5602 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5604 const unsigned StaticNumOps =
5605 Desc.getNumOperands() +
Desc.implicit_uses().size();
5606 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5612 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5613 ErrInfo =
"missing implicit register operands";
5619 if (!Dst->isUse()) {
5620 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5625 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5626 UseOpIdx != StaticNumOps + 1) {
5627 ErrInfo =
"movrel implicit operands should be tied";
5634 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5636 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5637 ErrInfo =
"src0 should be subreg of implicit vector use";
5645 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5646 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5652 if (
MI.mayStore() &&
5657 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5658 ErrInfo =
"scalar stores must use m0 as offset register";
5664 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5666 if (
Offset->getImm() != 0) {
5667 ErrInfo =
"subtarget does not support offsets in flat instructions";
5672 if (
isDS(
MI) && !ST.hasGDS()) {
5674 if (GDSOp && GDSOp->
getImm() != 0) {
5675 ErrInfo =
"GDS is not supported on this subtarget";
5683 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5684 AMDGPU::OpName::vaddr0);
5685 AMDGPU::OpName RSrcOpName =
5686 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5687 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5695 ErrInfo =
"dim is out of range";
5700 if (ST.hasR128A16()) {
5702 IsA16 = R128A16->
getImm() != 0;
5703 }
else if (ST.hasA16()) {
5705 IsA16 = A16->
getImm() != 0;
5708 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5710 unsigned AddrWords =
5713 unsigned VAddrWords;
5715 VAddrWords = RsrcIdx - VAddr0Idx;
5716 if (ST.hasPartialNSAEncoding() &&
5718 unsigned LastVAddrIdx = RsrcIdx - 1;
5719 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5727 if (VAddrWords != AddrWords) {
5729 <<
" but got " << VAddrWords <<
"\n");
5730 ErrInfo =
"bad vaddr size";
5740 unsigned DC = DppCt->
getImm();
5741 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5742 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5743 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5744 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5745 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5746 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5747 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5748 ErrInfo =
"Invalid dpp_ctrl value";
5751 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5753 ErrInfo =
"Invalid dpp_ctrl value: "
5754 "wavefront shifts are not supported on GFX10+";
5757 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5759 ErrInfo =
"Invalid dpp_ctrl value: "
5760 "broadcasts are not supported on GFX10+";
5763 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5765 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5766 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5767 !ST.hasGFX90AInsts()) {
5768 ErrInfo =
"Invalid dpp_ctrl value: "
5769 "row_newbroadcast/row_share is not supported before "
5773 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5774 ErrInfo =
"Invalid dpp_ctrl value: "
5775 "row_share and row_xmask are not supported before GFX10";
5780 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5783 ErrInfo =
"Invalid dpp_ctrl value: "
5784 "DP ALU dpp only support row_newbcast";
5791 AMDGPU::OpName DataName =
5792 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5798 if (ST.hasGFX90AInsts()) {
5799 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5800 (RI.isAGPR(
MRI, Dst->getReg()) != RI.isAGPR(
MRI,
Data->getReg()))) {
5801 ErrInfo =
"Invalid register class: "
5802 "vdata and vdst should be both VGPR or AGPR";
5805 if (
Data && Data2 &&
5807 ErrInfo =
"Invalid register class: "
5808 "both data operands should be VGPR or AGPR";
5812 if ((Dst && RI.isAGPR(
MRI, Dst->getReg())) ||
5814 (Data2 && RI.isAGPR(
MRI, Data2->
getReg()))) {
5815 ErrInfo =
"Invalid register class: "
5816 "agpr loads and stores not supported on this GPU";
5822 if (ST.needsAlignedVGPRs()) {
5823 const auto isAlignedReg = [&
MI, &
MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5828 if (Reg.isPhysical())
5829 return !(RI.getHWRegIndex(Reg) & 1);
5831 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5832 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5835 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5836 Opcode == AMDGPU::DS_GWS_BARRIER) {
5838 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5839 ErrInfo =
"Subtarget requires even aligned vector registers "
5840 "for DS_GWS instructions";
5846 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5847 ErrInfo =
"Subtarget requires even aligned vector registers "
5848 "for vaddr operand of image instructions";
5854 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5856 if (Src->isReg() && RI.isSGPRReg(
MRI, Src->getReg())) {
5857 ErrInfo =
"Invalid register class: "
5858 "v_accvgpr_write with an SGPR is not supported on this GPU";
5863 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5866 ErrInfo =
"pseudo expects only physical SGPRs";
5873 if (!ST.hasScaleOffset()) {
5874 ErrInfo =
"Subtarget does not support offset scaling";
5878 ErrInfo =
"Instruction does not support offset scaling";
5887 for (
unsigned I = 0;
I < 3; ++
I) {
5893 if (ST.hasFlatScratchHiInB64InstHazard() &&
isSALU(
MI) &&
5894 MI.readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI,
nullptr)) {
5896 if ((Dst && RI.getRegClassForReg(
MRI, Dst->getReg()) ==
5897 &AMDGPU::SReg_64RegClass) ||
5898 Opcode == AMDGPU::S_BITCMP0_B64 || Opcode == AMDGPU::S_BITCMP1_B64) {
5899 ErrInfo =
"Instruction cannot read flat_scratch_base_hi";
5911 switch (
MI.getOpcode()) {
5912 default:
return AMDGPU::INSTRUCTION_LIST_END;
5913 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5914 case AMDGPU::COPY:
return AMDGPU::COPY;
5915 case AMDGPU::PHI:
return AMDGPU::PHI;
5916 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5917 case AMDGPU::WQM:
return AMDGPU::WQM;
5918 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5919 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5920 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5921 case AMDGPU::S_MOV_B32: {
5923 return MI.getOperand(1).isReg() ||
5924 RI.isAGPR(
MRI,
MI.getOperand(0).getReg()) ?
5925 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5927 case AMDGPU::S_ADD_I32:
5928 return ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5929 case AMDGPU::S_ADDC_U32:
5930 return AMDGPU::V_ADDC_U32_e32;
5931 case AMDGPU::S_SUB_I32:
5932 return ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5935 case AMDGPU::S_ADD_U32:
5936 return AMDGPU::V_ADD_CO_U32_e32;
5937 case AMDGPU::S_SUB_U32:
5938 return AMDGPU::V_SUB_CO_U32_e32;
5939 case AMDGPU::S_ADD_U64_PSEUDO:
5940 return AMDGPU::V_ADD_U64_PSEUDO;
5941 case AMDGPU::S_SUB_U64_PSEUDO:
5942 return AMDGPU::V_SUB_U64_PSEUDO;
5943 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5944 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5945 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5946 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5947 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5948 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5949 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5950 case AMDGPU::S_XNOR_B32:
5951 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5952 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5953 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5954 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5955 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
5956 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
5957 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
5958 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
5959 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
5960 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
5961 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
5962 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
5963 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
5964 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
5965 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
5966 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
5967 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
5968 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
5969 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
5970 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
5971 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
5972 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
5973 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
5974 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
5975 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
5976 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
5977 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
5978 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
5979 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
5980 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
5981 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
5982 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
5983 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
5984 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
5985 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
5986 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
5987 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
5988 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
5989 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
5990 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
5991 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
5992 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
5993 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
5994 case AMDGPU::S_CVT_F32_F16:
5995 case AMDGPU::S_CVT_HI_F32_F16:
5996 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
5997 : AMDGPU::V_CVT_F32_F16_fake16_e64;
5998 case AMDGPU::S_CVT_F16_F32:
5999 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
6000 : AMDGPU::V_CVT_F16_F32_fake16_e64;
6001 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
6002 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
6003 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
6004 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
6005 case AMDGPU::S_CEIL_F16:
6006 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
6007 : AMDGPU::V_CEIL_F16_fake16_e64;
6008 case AMDGPU::S_FLOOR_F16:
6009 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
6010 : AMDGPU::V_FLOOR_F16_fake16_e64;
6011 case AMDGPU::S_TRUNC_F16:
6012 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
6013 : AMDGPU::V_TRUNC_F16_fake16_e64;
6014 case AMDGPU::S_RNDNE_F16:
6015 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
6016 : AMDGPU::V_RNDNE_F16_fake16_e64;
6017 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
6018 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
6019 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
6020 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
6021 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
6022 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
6023 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
6024 case AMDGPU::S_ADD_F16:
6025 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
6026 : AMDGPU::V_ADD_F16_fake16_e64;
6027 case AMDGPU::S_SUB_F16:
6028 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
6029 : AMDGPU::V_SUB_F16_fake16_e64;
6030 case AMDGPU::S_MIN_F16:
6031 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
6032 : AMDGPU::V_MIN_F16_fake16_e64;
6033 case AMDGPU::S_MAX_F16:
6034 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
6035 : AMDGPU::V_MAX_F16_fake16_e64;
6036 case AMDGPU::S_MINIMUM_F16:
6037 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
6038 : AMDGPU::V_MINIMUM_F16_fake16_e64;
6039 case AMDGPU::S_MAXIMUM_F16:
6040 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
6041 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
6042 case AMDGPU::S_MUL_F16:
6043 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
6044 : AMDGPU::V_MUL_F16_fake16_e64;
6045 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
6046 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
6047 case AMDGPU::S_FMAC_F16:
6048 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
6049 : AMDGPU::V_FMAC_F16_fake16_e64;
6050 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
6051 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
6052 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
6053 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
6054 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
6055 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
6056 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
6057 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
6058 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
6059 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
6060 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
6061 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
6062 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
6063 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
6064 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
6065 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
6066 case AMDGPU::S_CMP_LT_F16:
6067 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
6068 : AMDGPU::V_CMP_LT_F16_fake16_e64;
6069 case AMDGPU::S_CMP_EQ_F16:
6070 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
6071 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
6072 case AMDGPU::S_CMP_LE_F16:
6073 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
6074 : AMDGPU::V_CMP_LE_F16_fake16_e64;
6075 case AMDGPU::S_CMP_GT_F16:
6076 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
6077 : AMDGPU::V_CMP_GT_F16_fake16_e64;
6078 case AMDGPU::S_CMP_LG_F16:
6079 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
6080 : AMDGPU::V_CMP_LG_F16_fake16_e64;
6081 case AMDGPU::S_CMP_GE_F16:
6082 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
6083 : AMDGPU::V_CMP_GE_F16_fake16_e64;
6084 case AMDGPU::S_CMP_O_F16:
6085 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
6086 : AMDGPU::V_CMP_O_F16_fake16_e64;
6087 case AMDGPU::S_CMP_U_F16:
6088 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
6089 : AMDGPU::V_CMP_U_F16_fake16_e64;
6090 case AMDGPU::S_CMP_NGE_F16:
6091 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
6092 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
6093 case AMDGPU::S_CMP_NLG_F16:
6094 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
6095 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
6096 case AMDGPU::S_CMP_NGT_F16:
6097 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
6098 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
6099 case AMDGPU::S_CMP_NLE_F16:
6100 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
6101 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
6102 case AMDGPU::S_CMP_NEQ_F16:
6103 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
6104 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
6105 case AMDGPU::S_CMP_NLT_F16:
6106 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
6107 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
6108 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
6109 case AMDGPU::V_S_EXP_F16_e64:
6110 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
6111 : AMDGPU::V_EXP_F16_fake16_e64;
6112 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
6113 case AMDGPU::V_S_LOG_F16_e64:
6114 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
6115 : AMDGPU::V_LOG_F16_fake16_e64;
6116 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
6117 case AMDGPU::V_S_RCP_F16_e64:
6118 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
6119 : AMDGPU::V_RCP_F16_fake16_e64;
6120 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
6121 case AMDGPU::V_S_RSQ_F16_e64:
6122 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
6123 : AMDGPU::V_RSQ_F16_fake16_e64;
6124 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
6125 case AMDGPU::V_S_SQRT_F16_e64:
6126 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
6127 : AMDGPU::V_SQRT_F16_fake16_e64;
6130 "Unexpected scalar opcode without corresponding vector one!");
6179 "Not a whole wave func");
6182 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6183 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6190 unsigned OpNo)
const {
6192 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6193 Desc.operands()[OpNo].RegClass == -1) {
6196 if (Reg.isVirtual()) {
6198 return MRI.getRegClass(Reg);
6200 return RI.getPhysRegBaseClass(Reg);
6203 int16_t RegClass = getOpRegClassID(
Desc.operands()[OpNo]);
6204 return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
6212 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6214 unsigned Size = RI.getRegSizeInBits(*RC);
6215 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6216 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6217 : AMDGPU::V_MOV_B32_e32;
6219 Opcode = AMDGPU::COPY;
6220 else if (RI.isSGPRClass(RC))
6221 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6235 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6241 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6252 if (SubIdx == AMDGPU::sub0)
6254 if (SubIdx == AMDGPU::sub1)
6266void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6282 if (Reg.isPhysical())
6292 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6295 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6302 unsigned Opc =
MI.getOpcode();
6308 constexpr AMDGPU::OpName OpNames[] = {
6309 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6312 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6313 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6323 bool IsAGPR = RI.isAGPR(
MRI, MO.
getReg());
6324 if (IsAGPR && !ST.hasMAIInsts())
6326 if (IsAGPR && (!ST.hasGFX90AInsts() || !
MRI.reservedRegsFrozen()) &&
6330 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6331 const int DataIdx = AMDGPU::getNamedOperandIdx(
6332 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6333 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6334 MI.getOperand(DataIdx).isReg() &&
6335 RI.isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6337 if ((
int)
OpIdx == DataIdx) {
6338 if (VDstIdx != -1 &&
6339 RI.isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6342 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6343 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6344 RI.isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6349 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6350 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6354 if (ST.hasFlatScratchHiInB64InstHazard() &&
6361 if (
Opc == AMDGPU::S_BITCMP0_B64 ||
Opc == AMDGPU::S_BITCMP1_B64)
6382 constexpr unsigned NumOps = 3;
6383 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6384 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6385 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6386 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6391 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6394 MO = &
MI.getOperand(SrcIdx);
6401 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6405 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6409 return !OpSel && !OpSelHi;
6418 int64_t RegClass = getOpRegClassID(OpInfo);
6420 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6429 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6430 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6434 if (!LiteralLimit--)
6444 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6452 if (--ConstantBusLimit <= 0)
6464 if (!LiteralLimit--)
6466 if (--ConstantBusLimit <= 0)
6472 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6476 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6478 !
Op.isIdenticalTo(*MO))
6488 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6502 bool Is64BitOp = Is64BitFPOp ||
6509 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6518 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6536 bool IsGFX950Only = ST.hasGFX950Insts();
6537 bool IsGFX940Only = ST.hasGFX940Insts();
6539 if (!IsGFX950Only && !IsGFX940Only)
6557 unsigned Opcode =
MI.getOpcode();
6559 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6560 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6561 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6562 case AMDGPU::V_MQSAD_U32_U8_e64:
6563 case AMDGPU::V_PK_ADD_F16:
6564 case AMDGPU::V_PK_ADD_F32:
6565 case AMDGPU::V_PK_ADD_I16:
6566 case AMDGPU::V_PK_ADD_U16:
6567 case AMDGPU::V_PK_ASHRREV_I16:
6568 case AMDGPU::V_PK_FMA_F16:
6569 case AMDGPU::V_PK_FMA_F32:
6570 case AMDGPU::V_PK_FMAC_F16_e32:
6571 case AMDGPU::V_PK_FMAC_F16_e64:
6572 case AMDGPU::V_PK_LSHLREV_B16:
6573 case AMDGPU::V_PK_LSHRREV_B16:
6574 case AMDGPU::V_PK_MAD_I16:
6575 case AMDGPU::V_PK_MAD_U16:
6576 case AMDGPU::V_PK_MAX_F16:
6577 case AMDGPU::V_PK_MAX_I16:
6578 case AMDGPU::V_PK_MAX_U16:
6579 case AMDGPU::V_PK_MIN_F16:
6580 case AMDGPU::V_PK_MIN_I16:
6581 case AMDGPU::V_PK_MIN_U16:
6582 case AMDGPU::V_PK_MOV_B32:
6583 case AMDGPU::V_PK_MUL_F16:
6584 case AMDGPU::V_PK_MUL_F32:
6585 case AMDGPU::V_PK_MUL_LO_U16:
6586 case AMDGPU::V_PK_SUB_I16:
6587 case AMDGPU::V_PK_SUB_U16:
6588 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6597 unsigned Opc =
MI.getOpcode();
6600 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6603 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6609 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6616 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6619 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6625 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6635 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6636 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6637 if (!RI.isVGPR(
MRI,
MI.getOperand(Src2Idx).getReg()))
6649 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6651 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6663 if (HasImplicitSGPR || !
MI.isCommutable()) {
6680 if (CommutedOpc == -1) {
6685 MI.setDesc(
get(CommutedOpc));
6689 bool Src0Kill = Src0.
isKill();
6693 else if (Src1.
isReg()) {
6708 unsigned Opc =
MI.getOpcode();
6711 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6712 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6713 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6716 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6717 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6718 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6719 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6720 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6721 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6722 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6726 if (Src1.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()))) {
6727 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6732 if (VOP3Idx[2] != -1) {
6734 if (Src2.
isReg() && !RI.isSGPRClass(
MRI.getRegClass(Src2.
getReg()))) {
6735 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
6744 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6745 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6747 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6749 SGPRsUsed.
insert(SGPRReg);
6753 for (
int Idx : VOP3Idx) {
6762 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6774 if (!RI.isSGPRClass(RI.getRegClassForReg(
MRI, MO.
getReg())))
6781 if (ConstantBusLimit > 0) {
6793 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6794 !RI.isVGPR(
MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6800 for (
unsigned I = 0;
I < 3; ++
I) {
6813 SRC = RI.getCommonSubClass(SRC, DstRC);
6816 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6818 if (RI.hasAGPRs(VRC)) {
6819 VRC = RI.getEquivalentVGPRClass(VRC);
6820 Register NewSrcReg =
MRI.createVirtualRegister(VRC);
6822 get(TargetOpcode::COPY), NewSrcReg)
6829 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6835 for (
unsigned i = 0; i < SubRegs; ++i) {
6836 Register SGPR =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
6838 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6839 .
addReg(SrcReg, {}, RI.getSubRegFromChannel(i));
6845 get(AMDGPU::REG_SEQUENCE), DstReg);
6846 for (
unsigned i = 0; i < SubRegs; ++i) {
6848 MIB.
addImm(RI.getSubRegFromChannel(i));
6861 if (SBase && !RI.isSGPRClass(
MRI.getRegClass(SBase->getReg()))) {
6863 SBase->setReg(SGPR);
6866 if (SOff && !RI.isSGPRReg(
MRI, SOff->
getReg())) {
6874 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6875 if (OldSAddrIdx < 0)
6891 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6892 if (NewVAddrIdx < 0)
6895 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6899 if (OldVAddrIdx >= 0) {
6901 VAddrDef =
MRI.getUniqueVRegDef(VAddr.
getReg());
6913 if (OldVAddrIdx == NewVAddrIdx) {
6916 MRI.removeRegOperandFromUseList(&NewVAddr);
6917 MRI.moveOperands(&NewVAddr, &SAddr, 1);
6921 MRI.removeRegOperandFromUseList(&NewVAddr);
6922 MRI.addRegOperandToUseList(&NewVAddr);
6924 assert(OldSAddrIdx == NewVAddrIdx);
6926 if (OldVAddrIdx >= 0) {
6927 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6928 AMDGPU::OpName::vdst_in);
6932 if (NewVDstIn != -1) {
6933 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6939 if (NewVDstIn != -1) {
6940 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
6961 if (!SAddr || RI.isSGPRClass(
MRI.getRegClass(SAddr->
getReg())))
6981 unsigned OpSubReg =
Op.getSubReg();
6984 RI.getRegClassForReg(
MRI, OpReg), OpSubReg);
6990 Register DstReg =
MRI.createVirtualRegister(DstRC);
7000 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
7003 bool ImpDef = Def->isImplicitDef();
7004 while (!ImpDef && Def && Def->isCopy()) {
7005 if (Def->getOperand(1).getReg().isPhysical())
7007 Def =
MRI.getUniqueVRegDef(Def->getOperand(1).getReg());
7008 ImpDef = Def && Def->isImplicitDef();
7010 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
7029 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7035 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(),
MRI);
7036 unsigned NumSubRegs =
RegSize / 32;
7037 Register VScalarOp = ScalarOp->getReg();
7039 if (NumSubRegs == 1) {
7040 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7042 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
7045 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
7047 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
7053 CondReg = NewCondReg;
7055 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
7063 ScalarOp->setReg(CurReg);
7064 ScalarOp->setIsKill();
7068 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
7069 "Unhandled register size");
7071 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
7073 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7075 MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7078 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
7079 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
7082 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
7083 .
addReg(VScalarOp, VScalarOpUndef,
7084 TRI->getSubRegFromChannel(Idx + 1));
7090 Register CurReg =
MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
7091 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
7097 Register NewCondReg =
MRI.createVirtualRegister(BoolXExecRC);
7098 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
7101 if (NumSubRegs <= 2)
7102 Cmp.addReg(VScalarOp);
7104 Cmp.addReg(VScalarOp, VScalarOpUndef,
7105 TRI->getSubRegFromChannel(Idx, 2));
7109 CondReg = NewCondReg;
7111 Register AndReg =
MRI.createVirtualRegister(BoolXExecRC);
7119 const auto *SScalarOpRC =
7120 TRI->getEquivalentSGPRClass(
MRI.getRegClass(VScalarOp));
7121 Register SScalarOp =
MRI.createVirtualRegister(SScalarOpRC);
7125 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
7126 unsigned Channel = 0;
7127 for (
Register Piece : ReadlanePieces) {
7128 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
7132 ScalarOp->setReg(SScalarOp);
7133 ScalarOp->setIsKill();
7137 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7138 MRI.setSimpleHint(SaveExec, CondReg);
7169 if (!Begin.isValid())
7171 if (!End.isValid()) {
7177 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7185 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
7186 std::numeric_limits<unsigned>::max()) !=
7189 SaveSCCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
7195 Register SaveExec =
MRI.createVirtualRegister(BoolXExecRC);
7204 for (
auto I = Begin;
I != AfterMI;
I++) {
7205 for (
auto &MO :
I->all_uses())
7206 MRI.clearKillFlags(MO.getReg());
7231 MBB.addSuccessor(LoopBB);
7241 for (
auto &Succ : RemainderBB->
successors()) {
7265static std::tuple<unsigned, unsigned>
7273 TII.buildExtractSubReg(
MI,
MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7274 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7277 Register Zero64 =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
7278 Register SRsrcFormatLo =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7279 Register SRsrcFormatHi =
MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
7280 Register NewSRsrc =
MRI.createVirtualRegister(&AMDGPU::SGPR_128RegClass);
7281 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7298 .
addImm(AMDGPU::sub0_sub1)
7304 return std::tuple(RsrcPtr, NewSRsrc);
7341 if (
MI.getOpcode() == AMDGPU::PHI) {
7343 assert(!RI.isSGPRClass(VRC));
7346 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7348 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7364 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7367 if (RI.hasVGPRs(DstRC)) {
7371 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7373 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7391 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7396 if (DstRC != Src0RC) {
7405 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7407 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7413 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7414 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7415 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7416 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7417 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7418 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7419 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7421 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7434 ? AMDGPU::OpName::rsrc
7435 : AMDGPU::OpName::srsrc;
7437 if (SRsrc && !RI.isSGPRClass(
MRI.getRegClass(SRsrc->
getReg())))
7440 AMDGPU::OpName SampOpName =
7441 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7443 if (SSamp && !RI.isSGPRClass(
MRI.getRegClass(SSamp->
getReg())))
7450 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7452 if (!RI.isSGPRClass(
MRI.getRegClass(Dest->
getReg()))) {
7456 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7457 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7462 while (Start->getOpcode() != FrameSetupOpcode)
7465 while (End->getOpcode() != FrameDestroyOpcode)
7469 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7470 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7478 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7480 Register Reg =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
7482 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7492 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS ||
7493 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_D2 ||
7494 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS ||
7495 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_D2) {
7497 if (Src.isReg() && RI.hasVectorRegisters(
MRI.getRegClass(Src.getReg())))
7504 bool isSoffsetLegal =
true;
7506 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7507 if (SoffsetIdx != -1) {
7510 !RI.isSGPRClass(
MRI.getRegClass(Soffset->
getReg()))) {
7511 isSoffsetLegal =
false;
7515 bool isRsrcLegal =
true;
7517 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7518 if (RsrcIdx != -1) {
7521 isRsrcLegal =
false;
7525 if (isRsrcLegal && isSoffsetLegal)
7549 Register NewVAddrLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7550 Register NewVAddrHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7551 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7553 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7554 Register CondReg0 =
MRI.createVirtualRegister(BoolXExecRC);
7555 Register CondReg1 =
MRI.createVirtualRegister(BoolXExecRC);
7557 unsigned RsrcPtr, NewSRsrc;
7564 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7565 .addReg(VAddr->
getReg(), {}, AMDGPU::sub0)
7571 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7572 .addReg(VAddr->
getReg(), {}, AMDGPU::sub1)
7585 }
else if (!VAddr && ST.hasAddr64()) {
7589 "FIXME: Need to emit flat atomics here");
7591 unsigned RsrcPtr, NewSRsrc;
7594 Register NewVAddr =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
7617 MIB.
addImm(CPol->getImm());
7622 MIB.
addImm(TFE->getImm());
7642 MI.removeFromParent();
7647 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7648 .addImm(AMDGPU::sub0)
7649 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7650 .addImm(AMDGPU::sub1);
7653 if (!isSoffsetLegal) {
7665 if (!isSoffsetLegal) {
7677 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7678 if (RsrcIdx != -1) {
7679 DeferredList.insert(
MI);
7684 return DeferredList.contains(
MI);
7694 if (!ST.useRealTrue16Insts())
7697 unsigned Opcode =
MI.getOpcode();
7701 OpIdx >=
get(Opcode).getNumOperands() ||
7702 get(Opcode).operands()[
OpIdx].RegClass == -1)
7706 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7710 if (!RI.isVGPRClass(CurrRC))
7713 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7715 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7716 Op.setSubReg(AMDGPU::lo16);
7717 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7719 Register NewDstReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
7727 Op.setReg(NewDstReg);
7739 while (!Worklist.
empty()) {
7753 "Deferred MachineInstr are not supposed to re-populate worklist");
7773 case AMDGPU::S_ADD_I32:
7774 case AMDGPU::S_SUB_I32: {
7778 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7786 case AMDGPU::S_MUL_U64:
7787 if (ST.hasVectorMulU64()) {
7788 NewOpcode = AMDGPU::V_MUL_U64_e64;
7792 splitScalarSMulU64(Worklist, Inst, MDT);
7796 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7797 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7800 splitScalarSMulPseudo(Worklist, Inst, MDT);
7804 case AMDGPU::S_AND_B64:
7805 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7809 case AMDGPU::S_OR_B64:
7810 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7814 case AMDGPU::S_XOR_B64:
7815 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7819 case AMDGPU::S_NAND_B64:
7820 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7824 case AMDGPU::S_NOR_B64:
7825 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7829 case AMDGPU::S_XNOR_B64:
7830 if (ST.hasDLInsts())
7831 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7833 splitScalar64BitXnor(Worklist, Inst, MDT);
7837 case AMDGPU::S_ANDN2_B64:
7838 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7842 case AMDGPU::S_ORN2_B64:
7843 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7847 case AMDGPU::S_BREV_B64:
7848 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7852 case AMDGPU::S_NOT_B64:
7853 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7857 case AMDGPU::S_BCNT1_I32_B64:
7858 splitScalar64BitBCNT(Worklist, Inst);
7862 case AMDGPU::S_BFE_I64:
7863 splitScalar64BitBFE(Worklist, Inst);
7867 case AMDGPU::S_FLBIT_I32_B64:
7868 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7871 case AMDGPU::S_FF1_I32_B64:
7872 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7876 case AMDGPU::S_LSHL_B32:
7877 if (ST.hasOnlyRevVALUShifts()) {
7878 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7882 case AMDGPU::S_ASHR_I32:
7883 if (ST.hasOnlyRevVALUShifts()) {
7884 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7888 case AMDGPU::S_LSHR_B32:
7889 if (ST.hasOnlyRevVALUShifts()) {
7890 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7894 case AMDGPU::S_LSHL_B64:
7895 if (ST.hasOnlyRevVALUShifts()) {
7897 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7898 : AMDGPU::V_LSHLREV_B64_e64;
7902 case AMDGPU::S_ASHR_I64:
7903 if (ST.hasOnlyRevVALUShifts()) {
7904 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7908 case AMDGPU::S_LSHR_B64:
7909 if (ST.hasOnlyRevVALUShifts()) {
7910 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7915 case AMDGPU::S_ABS_I32:
7916 lowerScalarAbs(Worklist, Inst);
7920 case AMDGPU::S_ABSDIFF_I32:
7921 lowerScalarAbsDiff(Worklist, Inst);
7925 case AMDGPU::S_CBRANCH_SCC0:
7926 case AMDGPU::S_CBRANCH_SCC1: {
7929 bool IsSCC = CondReg == AMDGPU::SCC;
7937 case AMDGPU::S_BFE_U64:
7938 case AMDGPU::S_BFM_B64:
7941 case AMDGPU::S_PACK_LL_B32_B16:
7942 case AMDGPU::S_PACK_LH_B32_B16:
7943 case AMDGPU::S_PACK_HL_B32_B16:
7944 case AMDGPU::S_PACK_HH_B32_B16:
7945 movePackToVALU(Worklist,
MRI, Inst);
7949 case AMDGPU::S_XNOR_B32:
7950 lowerScalarXnor(Worklist, Inst);
7954 case AMDGPU::S_NAND_B32:
7955 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
7959 case AMDGPU::S_NOR_B32:
7960 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
7964 case AMDGPU::S_ANDN2_B32:
7965 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
7969 case AMDGPU::S_ORN2_B32:
7970 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
7978 case AMDGPU::S_ADD_CO_PSEUDO:
7979 case AMDGPU::S_SUB_CO_PSEUDO: {
7980 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
7981 ? AMDGPU::V_ADDC_U32_e64
7982 : AMDGPU::V_SUBB_U32_e64;
7983 const auto *CarryRC = RI.getWaveMaskRegClass();
7986 if (!
MRI.constrainRegClass(CarryInReg, CarryRC)) {
7987 Register NewCarryReg =
MRI.createVirtualRegister(CarryRC);
7994 Register DestReg =
MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
8005 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8009 case AMDGPU::S_UADDO_PSEUDO:
8010 case AMDGPU::S_USUBO_PSEUDO: {
8016 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
8017 ? AMDGPU::V_ADD_CO_U32_e64
8018 : AMDGPU::V_SUB_CO_U32_e64;
8020 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest0.
getReg()));
8021 Register DestReg =
MRI.createVirtualRegister(NewRC);
8029 MRI.replaceRegWith(Dest0.
getReg(), DestReg);
8030 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8034 case AMDGPU::S_LSHL1_ADD_U32:
8035 case AMDGPU::S_LSHL2_ADD_U32:
8036 case AMDGPU::S_LSHL3_ADD_U32:
8037 case AMDGPU::S_LSHL4_ADD_U32: {
8041 unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1
8042 : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2
8043 : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3
8047 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg()));
8048 Register DestReg =
MRI.createVirtualRegister(NewRC);
8056 MRI.replaceRegWith(Dest.
getReg(), DestReg);
8057 addUsersToMoveToVALUWorklist(DestReg,
MRI, Worklist);
8061 case AMDGPU::S_CSELECT_B32:
8062 case AMDGPU::S_CSELECT_B64:
8063 lowerSelect(Worklist, Inst, MDT);
8066 case AMDGPU::S_CMP_EQ_I32:
8067 case AMDGPU::S_CMP_LG_I32:
8068 case AMDGPU::S_CMP_GT_I32:
8069 case AMDGPU::S_CMP_GE_I32:
8070 case AMDGPU::S_CMP_LT_I32:
8071 case AMDGPU::S_CMP_LE_I32:
8072 case AMDGPU::S_CMP_EQ_U32:
8073 case AMDGPU::S_CMP_LG_U32:
8074 case AMDGPU::S_CMP_GT_U32:
8075 case AMDGPU::S_CMP_GE_U32:
8076 case AMDGPU::S_CMP_LT_U32:
8077 case AMDGPU::S_CMP_LE_U32:
8078 case AMDGPU::S_CMP_EQ_U64:
8079 case AMDGPU::S_CMP_LG_U64:
8080 case AMDGPU::S_CMP_LT_F32:
8081 case AMDGPU::S_CMP_EQ_F32:
8082 case AMDGPU::S_CMP_LE_F32:
8083 case AMDGPU::S_CMP_GT_F32:
8084 case AMDGPU::S_CMP_LG_F32:
8085 case AMDGPU::S_CMP_GE_F32:
8086 case AMDGPU::S_CMP_O_F32:
8087 case AMDGPU::S_CMP_U_F32:
8088 case AMDGPU::S_CMP_NGE_F32:
8089 case AMDGPU::S_CMP_NLG_F32:
8090 case AMDGPU::S_CMP_NGT_F32:
8091 case AMDGPU::S_CMP_NLE_F32:
8092 case AMDGPU::S_CMP_NEQ_F32:
8093 case AMDGPU::S_CMP_NLT_F32: {
8094 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8098 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
8112 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8116 case AMDGPU::S_CMP_LT_F16:
8117 case AMDGPU::S_CMP_EQ_F16:
8118 case AMDGPU::S_CMP_LE_F16:
8119 case AMDGPU::S_CMP_GT_F16:
8120 case AMDGPU::S_CMP_LG_F16:
8121 case AMDGPU::S_CMP_GE_F16:
8122 case AMDGPU::S_CMP_O_F16:
8123 case AMDGPU::S_CMP_U_F16:
8124 case AMDGPU::S_CMP_NGE_F16:
8125 case AMDGPU::S_CMP_NLG_F16:
8126 case AMDGPU::S_CMP_NGT_F16:
8127 case AMDGPU::S_CMP_NLE_F16:
8128 case AMDGPU::S_CMP_NEQ_F16:
8129 case AMDGPU::S_CMP_NLT_F16: {
8130 Register CondReg =
MRI.createVirtualRegister(RI.getWaveMaskRegClass());
8152 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8156 case AMDGPU::S_CVT_HI_F32_F16: {
8157 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8158 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8159 if (ST.useRealTrue16Insts()) {
8164 .
addReg(TmpReg, {}, AMDGPU::hi16)
8180 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8184 case AMDGPU::S_MINIMUM_F32:
8185 case AMDGPU::S_MAXIMUM_F32: {
8186 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8197 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8201 case AMDGPU::S_MINIMUM_F16:
8202 case AMDGPU::S_MAXIMUM_F16: {
8203 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8204 ? &AMDGPU::VGPR_16RegClass
8205 : &AMDGPU::VGPR_32RegClass);
8217 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8221 case AMDGPU::V_S_EXP_F16_e64:
8222 case AMDGPU::V_S_LOG_F16_e64:
8223 case AMDGPU::V_S_RCP_F16_e64:
8224 case AMDGPU::V_S_RSQ_F16_e64:
8225 case AMDGPU::V_S_SQRT_F16_e64: {
8226 Register NewDst =
MRI.createVirtualRegister(ST.useRealTrue16Insts()
8227 ? &AMDGPU::VGPR_16RegClass
8228 : &AMDGPU::VGPR_32RegClass);
8240 addUsersToMoveToVALUWorklist(NewDst,
MRI, Worklist);
8246 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8254 if (NewOpcode == Opcode) {
8262 Register NewDst =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8264 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8278 RI.getCommonSubClass(NewDstRC, SrcRC)) {
8285 addUsersToMoveToVALUWorklist(DstReg,
MRI, Worklist);
8286 MRI.replaceRegWith(DstReg, NewDstReg);
8287 MRI.clearKillFlags(NewDstReg);
8290 if (!
MRI.constrainRegClass(NewDstReg, CommonRC))
8307 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8311 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8312 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8317 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8323 MRI.replaceRegWith(DstReg, NewDstReg);
8324 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8326 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8329 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8330 MRI.replaceRegWith(DstReg, NewDstReg);
8331 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8336 Register NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8337 MRI.replaceRegWith(DstReg, NewDstReg);
8339 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8349 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8350 AMDGPU::OpName::src0_modifiers) >= 0)
8354 NewInstr->addOperand(Src);
8357 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8360 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8362 NewInstr.addImm(
Size);
8363 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8367 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8372 "Scalar BFE is only implemented for constant width and offset");
8380 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8381 AMDGPU::OpName::src1_modifiers) >= 0)
8383 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8385 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8386 AMDGPU::OpName::src2_modifiers) >= 0)
8388 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8390 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8392 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8394 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8400 NewInstr->addOperand(
Op);
8407 if (
Op.getReg() == AMDGPU::SCC) {
8409 if (
Op.isDef() && !
Op.isDead())
8410 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8412 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8417 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8418 Register DstReg = NewInstr->getOperand(0).getReg();
8423 NewDstReg =
MRI.createVirtualRegister(NewDstRC);
8424 MRI.replaceRegWith(DstReg, NewDstReg);
8433 addUsersToMoveToVALUWorklist(NewDstReg,
MRI, Worklist);
8437std::pair<bool, MachineBasicBlock *>
8440 if (ST.hasAddNoCarryInsts()) {
8449 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8452 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8454 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8455 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8463 MRI.replaceRegWith(OldDstReg, ResultReg);
8466 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8467 return std::pair(
true, NewBB);
8470 return std::pair(
false,
nullptr);
8487 bool IsSCC = (CondReg == AMDGPU::SCC);
8495 MRI.replaceRegWith(Dest.
getReg(), CondReg);
8501 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8502 NewCondReg =
MRI.createVirtualRegister(TC);
8506 bool CopyFound =
false;
8507 for (MachineInstr &CandI :
8510 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8512 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8514 .
addReg(CandI.getOperand(1).getReg());
8526 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8534 RI.getEquivalentVGPRClass(
MRI.getRegClass(Dest.
getReg())));
8535 MachineInstr *NewInst;
8536 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8537 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8550 MRI.replaceRegWith(Dest.
getReg(), NewDestReg);
8552 addUsersToMoveToVALUWorklist(NewDestReg,
MRI, Worklist);
8564 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8565 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8567 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8568 : AMDGPU::V_SUB_CO_U32_e32;
8578 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8579 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8592 Register SubResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8593 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8594 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8596 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8597 : AMDGPU::V_SUB_CO_U32_e32;
8609 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
8610 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
8624 if (ST.hasDLInsts()) {
8625 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8633 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8634 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8640 bool Src0IsSGPR = Src0.
isReg() &&
8641 RI.isSGPRClass(
MRI.getRegClass(Src0.
getReg()));
8642 bool Src1IsSGPR = Src1.
isReg() &&
8643 RI.isSGPRClass(
MRI.getRegClass(Src1.
getReg()));
8645 Register Temp =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8646 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8656 }
else if (Src1IsSGPR) {
8670 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8674 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8680 unsigned Opcode)
const {
8690 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8691 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
8703 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8704 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8709 unsigned Opcode)
const {
8719 Register NewDest =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8720 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
8732 MRI.replaceRegWith(Dest.
getReg(), NewDest);
8733 addUsersToMoveToVALUWorklist(NewDest,
MRI, Worklist);
8748 const MCInstrDesc &InstDesc =
get(Opcode);
8749 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8751 &AMDGPU::SGPR_32RegClass;
8753 const TargetRegisterClass *Src0SubRC =
8754 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8757 AMDGPU::sub0, Src0SubRC);
8759 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
8760 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8761 const TargetRegisterClass *NewDestSubRC =
8762 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8764 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
8765 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8768 AMDGPU::sub1, Src0SubRC);
8770 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
8771 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8776 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
8783 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8785 Worklist.
insert(&LoHalf);
8786 Worklist.
insert(&HiHalf);
8792 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8803 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8804 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8805 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8813 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8814 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8815 const TargetRegisterClass *Src0SubRC =
8816 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8817 if (RI.isSGPRClass(Src0SubRC))
8818 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8819 const TargetRegisterClass *Src1SubRC =
8820 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8821 if (RI.isSGPRClass(Src1SubRC))
8822 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8826 MachineOperand Op0L =
8828 MachineOperand Op1L =
8830 MachineOperand Op0H =
8832 MachineOperand Op1H =
8850 Register Op1L_Op0H_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8851 MachineInstr *Op1L_Op0H =
8856 Register Op1H_Op0L_Reg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8857 MachineInstr *Op1H_Op0L =
8862 Register CarryReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8863 MachineInstr *Carry =
8868 MachineInstr *LoHalf =
8873 Register AddReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8878 MachineInstr *HiHalf =
8889 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8901 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8912 Register FullDestReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
8913 Register DestSub0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8914 Register DestSub1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
8922 const TargetRegisterClass *Src0RC =
MRI.getRegClass(Src0.
getReg());
8923 const TargetRegisterClass *Src1RC =
MRI.getRegClass(Src1.
getReg());
8924 const TargetRegisterClass *Src0SubRC =
8925 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8926 if (RI.isSGPRClass(Src0SubRC))
8927 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8928 const TargetRegisterClass *Src1SubRC =
8929 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8930 if (RI.isSGPRClass(Src1SubRC))
8931 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8935 MachineOperand Op0L =
8937 MachineOperand Op1L =
8941 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8942 ? AMDGPU::V_MUL_HI_U32_e64
8943 : AMDGPU::V_MUL_HI_I32_e64;
8944 MachineInstr *HiHalf =
8947 MachineInstr *LoHalf =
8958 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
8966 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
8982 const MCInstrDesc &InstDesc =
get(Opcode);
8983 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8985 &AMDGPU::SGPR_32RegClass;
8987 const TargetRegisterClass *Src0SubRC =
8988 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8989 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
8991 &AMDGPU::SGPR_32RegClass;
8993 const TargetRegisterClass *Src1SubRC =
8994 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8997 AMDGPU::sub0, Src0SubRC);
8999 AMDGPU::sub0, Src1SubRC);
9001 AMDGPU::sub1, Src0SubRC);
9003 AMDGPU::sub1, Src1SubRC);
9005 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
9006 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
9007 const TargetRegisterClass *NewDestSubRC =
9008 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
9010 Register DestSub0 =
MRI.createVirtualRegister(NewDestSubRC);
9011 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
9015 Register DestSub1 =
MRI.createVirtualRegister(NewDestSubRC);
9016 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
9020 Register FullDestReg =
MRI.createVirtualRegister(NewDestRC);
9027 MRI.replaceRegWith(Dest.
getReg(), FullDestReg);
9029 Worklist.
insert(&LoHalf);
9030 Worklist.
insert(&HiHalf);
9033 addUsersToMoveToVALUWorklist(FullDestReg,
MRI, Worklist);
9049 const TargetRegisterClass *DestRC =
MRI.getRegClass(Dest.
getReg());
9051 Register Interm =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
9053 MachineOperand* Op0;
9054 MachineOperand* Op1;
9067 Register NewDest =
MRI.createVirtualRegister(DestRC);
9073 MRI.replaceRegWith(Dest.
getReg(), NewDest);
9089 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
9090 const TargetRegisterClass *SrcRC = Src.isReg() ?
9091 MRI.getRegClass(Src.getReg()) :
9092 &AMDGPU::SGPR_32RegClass;
9094 Register MidReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9095 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9097 const TargetRegisterClass *SrcSubRC =
9098 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9101 AMDGPU::sub0, SrcSubRC);
9103 AMDGPU::sub1, SrcSubRC);
9109 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9113 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9132 Offset == 0 &&
"Not implemented");
9135 Register MidRegLo =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9136 Register MidRegHi =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9137 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9154 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9155 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9160 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9161 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
9165 .
addReg(Src.getReg(), {}, AMDGPU::sub0);
9168 .
addReg(Src.getReg(), {}, AMDGPU::sub0)
9173 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9174 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9193 const MCInstrDesc &InstDesc =
get(Opcode);
9195 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9196 unsigned OpcodeAdd = ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64
9197 : AMDGPU::V_ADD_CO_U32_e32;
9199 const TargetRegisterClass *SrcRC =
9200 Src.isReg() ?
MRI.getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9201 const TargetRegisterClass *SrcSubRC =
9202 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9204 MachineOperand SrcRegSub0 =
9206 MachineOperand SrcRegSub1 =
9209 Register MidReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9210 Register MidReg2 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9211 Register MidReg3 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9212 Register MidReg4 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9219 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9225 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9227 MRI.replaceRegWith(Dest.
getReg(), MidReg4);
9229 addUsersToMoveToVALUWorklist(MidReg4,
MRI, Worklist);
9232void SIInstrInfo::addUsersToMoveToVALUWorklist(
9236 MachineInstr &
UseMI = *MO.getParent();
9240 switch (
UseMI.getOpcode()) {
9243 case AMDGPU::SOFT_WQM:
9244 case AMDGPU::STRICT_WWM:
9245 case AMDGPU::STRICT_WQM:
9246 case AMDGPU::REG_SEQUENCE:
9248 case AMDGPU::INSERT_SUBREG:
9251 OpNo = MO.getOperandNo();
9256 MRI.constrainRegClass(DstReg, OpRC);
9258 if (!RI.hasVectorRegisters(OpRC))
9269 Register ResultReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9275 if (ST.useRealTrue16Insts()) {
9278 SrcReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9285 SrcReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9291 bool isSrc0Reg16 =
MRI.constrainRegClass(SrcReg0, &AMDGPU::VGPR_16RegClass);
9292 bool isSrc1Reg16 =
MRI.constrainRegClass(SrcReg1, &AMDGPU::VGPR_16RegClass);
9294 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9296 case AMDGPU::S_PACK_LL_B32_B16:
9298 .addReg(SrcReg0, {},
9299 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9300 .addImm(AMDGPU::lo16)
9301 .addReg(SrcReg1, {},
9302 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9303 .addImm(AMDGPU::hi16);
9305 case AMDGPU::S_PACK_LH_B32_B16:
9307 .addReg(SrcReg0, {},
9308 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9309 .addImm(AMDGPU::lo16)
9310 .addReg(SrcReg1, {}, AMDGPU::hi16)
9311 .addImm(AMDGPU::hi16);
9313 case AMDGPU::S_PACK_HL_B32_B16:
9314 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9315 .addImm(AMDGPU::lo16)
9316 .addReg(SrcReg1, {},
9317 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9318 .addImm(AMDGPU::hi16);
9320 case AMDGPU::S_PACK_HH_B32_B16:
9321 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9322 .addImm(AMDGPU::lo16)
9323 .addReg(SrcReg1, {}, AMDGPU::hi16)
9324 .addImm(AMDGPU::hi16);
9331 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9332 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9337 case AMDGPU::S_PACK_LL_B32_B16: {
9338 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9339 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9356 case AMDGPU::S_PACK_LH_B32_B16: {
9357 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9366 case AMDGPU::S_PACK_HL_B32_B16: {
9367 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9377 case AMDGPU::S_PACK_HH_B32_B16: {
9378 Register ImmReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9379 Register TmpReg =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
9396 MRI.replaceRegWith(Dest.
getReg(), ResultReg);
9397 addUsersToMoveToVALUWorklist(ResultReg,
MRI, Worklist);
9406 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9407 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9408 SmallVector<MachineInstr *, 4> CopyToDelete;
9411 for (MachineInstr &
MI :
9415 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9418 MachineRegisterInfo &
MRI =
MI.getMF()->getRegInfo();
9419 Register DestReg =
MI.getOperand(0).getReg();
9421 MRI.replaceRegWith(DestReg, NewCond);
9426 MI.getOperand(SCCIdx).setReg(NewCond);
9432 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9435 for (
auto &Copy : CopyToDelete)
9436 Copy->eraseFromParent();
9444void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9450 for (MachineInstr &
MI :
9453 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9455 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9464 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9472 case AMDGPU::REG_SEQUENCE:
9473 case AMDGPU::INSERT_SUBREG:
9475 case AMDGPU::SOFT_WQM:
9476 case AMDGPU::STRICT_WWM:
9477 case AMDGPU::STRICT_WQM: {
9479 if (RI.isAGPRClass(SrcRC)) {
9480 if (RI.isAGPRClass(NewDstRC))
9485 case AMDGPU::REG_SEQUENCE:
9486 case AMDGPU::INSERT_SUBREG:
9487 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9490 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9496 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9499 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9513 int OpIndices[3])
const {
9514 const MCInstrDesc &
Desc =
MI.getDesc();
9530 const MachineRegisterInfo &
MRI =
MI.getMF()->getRegInfo();
9532 for (
unsigned i = 0; i < 3; ++i) {
9533 int Idx = OpIndices[i];
9537 const MachineOperand &MO =
MI.getOperand(Idx);
9543 const TargetRegisterClass *OpRC =
9544 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9545 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9551 const TargetRegisterClass *RegRC =
MRI.getRegClass(
Reg);
9552 if (RI.isSGPRClass(RegRC))
9570 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9571 SGPRReg = UsedSGPRs[0];
9574 if (!SGPRReg && UsedSGPRs[1]) {
9575 if (UsedSGPRs[1] == UsedSGPRs[2])
9576 SGPRReg = UsedSGPRs[1];
9583 AMDGPU::OpName OperandName)
const {
9584 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9587 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9591 return &
MI.getOperand(Idx);
9605 if (ST.isAmdHsaOS()) {
9608 RsrcDataFormat |= (1ULL << 56);
9613 RsrcDataFormat |= (2ULL << 59);
9616 return RsrcDataFormat;
9626 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9631 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9638 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9644 unsigned Opc =
MI.getOpcode();
9650 return get(
Opc).mayLoad() &&
9655 int &FrameIndex)
const {
9657 if (!Addr || !Addr->
isFI())
9668 int &FrameIndex)
const {
9676 int &FrameIndex)
const {
9690 int &FrameIndex)
const {
9707 while (++
I != E &&
I->isInsideBundle()) {
9708 assert(!
I->isBundle() &&
"No nested bundle!");
9716 unsigned Opc =
MI.getOpcode();
9718 unsigned DescSize =
Desc.getSize();
9723 unsigned Size = DescSize;
9727 if (
MI.isBranch() && ST.hasOffset3fBug())
9738 bool HasLiteral =
false;
9739 unsigned LiteralSize = 4;
9740 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9745 if (ST.has64BitLiterals()) {
9746 switch (OpInfo.OperandType) {
9762 return HasLiteral ? DescSize + LiteralSize : DescSize;
9767 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9771 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9772 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9776 case TargetOpcode::BUNDLE:
9778 case TargetOpcode::INLINEASM:
9779 case TargetOpcode::INLINEASM_BR: {
9781 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9785 if (
MI.isMetaInstruction())
9789 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9792 unsigned LoInstOpcode = D16Info->LoOp;
9794 DescSize =
Desc.getSize();
9798 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
9801 DescSize =
Desc.getSize();
9812 if (
MI.memoperands_empty())
9824 static const std::pair<int, const char *> TargetIndices[] = {
9862std::pair<unsigned, unsigned>
9869 static const std::pair<unsigned, const char *> TargetFlags[] = {
9887 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9902 return AMDGPU::WWM_COPY;
9904 return AMDGPU::COPY;
9921 if (!IsLRSplitInst && Opcode != AMDGPU::IMPLICIT_DEF)
9925 if (RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg)))
9926 return IsLRSplitInst;
9939 bool IsNullOrVectorRegister =
true;
9943 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(
MRI, Reg));
9946 return IsNullOrVectorRegister &&
9948 (!
MI.isTerminator() &&
MI.getOpcode() != AMDGPU::COPY &&
9949 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
9957 if (ST.hasAddNoCarryInsts())
9961 Register UnusedCarry =
MRI.createVirtualRegister(RI.getBoolRC());
9962 MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
9973 if (ST.hasAddNoCarryInsts())
9977 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
9979 : RS.scavengeRegisterBackwards(
9980 *RI.getBoolRC(),
I,
false,
9993 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
9994 case AMDGPU::SI_KILL_I1_TERMINATOR:
10003 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
10004 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
10005 case AMDGPU::SI_KILL_I1_PSEUDO:
10006 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
10018 const unsigned OffsetBits =
10020 return (1 << OffsetBits) - 1;
10024 if (!ST.isWave32())
10027 if (
MI.isInlineAsm())
10030 if (
MI.getNumOperands() <
MI.getNumExplicitOperands())
10033 for (
auto &
Op :
MI.implicit_operands()) {
10034 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
10035 Op.setReg(AMDGPU::VCC_LO);
10044 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
10048 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
10049 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
10065 if (Imm > MaxImm) {
10066 if (Imm <= MaxImm + 64) {
10068 Overflow = Imm - MaxImm;
10087 if (Overflow > 0) {
10095 if (ST.hasRestrictedSOffset())
10100 SOffset = Overflow;
10138 if (!ST.hasFlatInstOffsets())
10146 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10158std::pair<int64_t, int64_t>
10161 int64_t RemainderOffset = COffsetVal;
10162 int64_t ImmField = 0;
10167 if (AllowNegative) {
10169 int64_t
D = 1LL << NumBits;
10170 RemainderOffset = (COffsetVal /
D) *
D;
10171 ImmField = COffsetVal - RemainderOffset;
10173 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10175 (ImmField % 4) != 0) {
10177 RemainderOffset += ImmField % 4;
10178 ImmField -= ImmField % 4;
10180 }
else if (COffsetVal >= 0) {
10182 RemainderOffset = COffsetVal - ImmField;
10186 assert(RemainderOffset + ImmField == COffsetVal);
10187 return {ImmField, RemainderOffset};
10191 if (ST.hasNegativeScratchOffsetBug() &&
10199 switch (ST.getGeneration()) {
10227 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10228 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10229 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10230 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10231 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10232 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10233 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10234 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10241#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10242 case OPCODE##_dpp: \
10243 case OPCODE##_e32: \
10244 case OPCODE##_e64: \
10245 case OPCODE##_e64_dpp: \
10246 case OPCODE##_sdwa:
10260 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10261 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10262 case AMDGPU::V_FMA_F16_gfx9_e64:
10263 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10264 case AMDGPU::V_INTERP_P2_F16:
10265 case AMDGPU::V_MAD_F16_e64:
10266 case AMDGPU::V_MAD_U16_e64:
10267 case AMDGPU::V_MAD_I16_e64:
10276 "SIInsertWaitcnts should have promoted soft waitcnt instructions!");
10290 switch (ST.getGeneration()) {
10303 if (
isMAI(Opcode)) {
10311 if (MCOp == (
uint16_t)-1 && ST.hasGFX1250Insts())
10318 if (ST.hasGFX90AInsts()) {
10320 if (ST.hasGFX940Insts())
10351 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10352 if (
MI.getOperand(1 + 2 *
I + 1).getImm() ==
SubReg) {
10353 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10365 switch (
MI.getOpcode()) {
10367 case AMDGPU::REG_SEQUENCE:
10371 case AMDGPU::INSERT_SUBREG:
10372 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10389 if (!
P.Reg.isVirtual())
10393 auto *DefInst =
MRI.getVRegDef(RSR.Reg);
10394 while (
auto *
MI = DefInst) {
10396 switch (
MI->getOpcode()) {
10398 case AMDGPU::V_MOV_B32_e32: {
10399 auto &Op1 =
MI->getOperand(1);
10404 DefInst =
MRI.getVRegDef(RSR.Reg);
10412 DefInst =
MRI.getVRegDef(RSR.Reg);
10425 assert(
MRI.isSSA() &&
"Must be run on SSA");
10427 auto *
TRI =
MRI.getTargetRegisterInfo();
10428 auto *DefBB =
DefMI.getParent();
10432 if (
UseMI.getParent() != DefBB)
10435 const int MaxInstScan = 20;
10439 auto E =
UseMI.getIterator();
10440 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10441 if (
I->isDebugInstr())
10444 if (++NumInst > MaxInstScan)
10447 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10457 assert(
MRI.isSSA() &&
"Must be run on SSA");
10459 auto *
TRI =
MRI.getTargetRegisterInfo();
10460 auto *DefBB =
DefMI.getParent();
10462 const int MaxUseScan = 10;
10465 for (
auto &
Use :
MRI.use_nodbg_operands(VReg)) {
10466 auto &UseInst = *
Use.getParent();
10469 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10472 if (++NumUse > MaxUseScan)
10479 const int MaxInstScan = 20;
10483 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10486 if (
I->isDebugInstr())
10489 if (++NumInst > MaxInstScan)
10502 if (Reg == VReg && --NumUse == 0)
10504 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10513 auto Cur =
MBB.begin();
10514 if (Cur !=
MBB.end())
10516 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10519 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10528 if (InsPt !=
MBB.end() &&
10529 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10530 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10531 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10532 InsPt->definesRegister(Src,
nullptr)) {
10536 .
addReg(Src, {}, SrcSubReg)
10561 if (isFullCopyInstr(
MI)) {
10562 Register DstReg =
MI.getOperand(0).getReg();
10563 Register SrcReg =
MI.getOperand(1).getReg();
10570 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
10574 MRI.constrainRegClass(VirtReg, &AMDGPU::SReg_64_XEXECRegClass);
10585 unsigned *PredCost)
const {
10586 if (
MI.isBundle()) {
10589 unsigned Lat = 0,
Count = 0;
10590 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10592 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10594 return Lat +
Count - 1;
10597 return SchedModel.computeInstrLatency(&
MI);
10604 return *CallAddrOp;
10611 unsigned Opcode =
MI.getOpcode();
10616 :
MI.getOperand(1).getReg();
10617 LLT DstTy =
MRI.getType(Dst);
10618 LLT SrcTy =
MRI.getType(Src);
10620 unsigned SrcAS = SrcTy.getAddressSpace();
10623 ST.hasGloballyAddressableScratch()
10631 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10632 return HandleAddrSpaceCast(
MI);
10635 auto IID = GI->getIntrinsicID();
10642 case Intrinsic::amdgcn_addrspacecast_nonnull:
10643 return HandleAddrSpaceCast(
MI);
10644 case Intrinsic::amdgcn_if:
10645 case Intrinsic::amdgcn_else:
10659 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10660 Opcode == AMDGPU::G_SEXTLOAD) {
10661 if (
MI.memoperands_empty())
10665 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10666 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10674 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10675 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10676 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10689 unsigned opcode =
MI.getOpcode();
10690 if (opcode == AMDGPU::V_READLANE_B32 ||
10691 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10692 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10695 if (isCopyInstr(
MI)) {
10699 RI.getPhysRegBaseClass(srcOp.
getReg());
10707 if (
MI.isPreISelOpcode())
10722 if (
MI.memoperands_empty())
10726 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10727 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10742 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10744 if (!
SrcOp.isReg())
10748 if (!Reg || !
SrcOp.readsReg())
10754 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10781 F,
"ds_ordered_count unsupported for this calling conv"));
10795 Register &SrcReg2, int64_t &CmpMask,
10796 int64_t &CmpValue)
const {
10797 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10800 switch (
MI.getOpcode()) {
10803 case AMDGPU::S_CMP_EQ_U32:
10804 case AMDGPU::S_CMP_EQ_I32:
10805 case AMDGPU::S_CMP_LG_U32:
10806 case AMDGPU::S_CMP_LG_I32:
10807 case AMDGPU::S_CMP_LT_U32:
10808 case AMDGPU::S_CMP_LT_I32:
10809 case AMDGPU::S_CMP_GT_U32:
10810 case AMDGPU::S_CMP_GT_I32:
10811 case AMDGPU::S_CMP_LE_U32:
10812 case AMDGPU::S_CMP_LE_I32:
10813 case AMDGPU::S_CMP_GE_U32:
10814 case AMDGPU::S_CMP_GE_I32:
10815 case AMDGPU::S_CMP_EQ_U64:
10816 case AMDGPU::S_CMP_LG_U64:
10817 SrcReg =
MI.getOperand(0).getReg();
10818 if (
MI.getOperand(1).isReg()) {
10819 if (
MI.getOperand(1).getSubReg())
10821 SrcReg2 =
MI.getOperand(1).getReg();
10823 }
else if (
MI.getOperand(1).isImm()) {
10825 CmpValue =
MI.getOperand(1).getImm();
10831 case AMDGPU::S_CMPK_EQ_U32:
10832 case AMDGPU::S_CMPK_EQ_I32:
10833 case AMDGPU::S_CMPK_LG_U32:
10834 case AMDGPU::S_CMPK_LG_I32:
10835 case AMDGPU::S_CMPK_LT_U32:
10836 case AMDGPU::S_CMPK_LT_I32:
10837 case AMDGPU::S_CMPK_GT_U32:
10838 case AMDGPU::S_CMPK_GT_I32:
10839 case AMDGPU::S_CMPK_LE_U32:
10840 case AMDGPU::S_CMPK_LE_I32:
10841 case AMDGPU::S_CMPK_GE_U32:
10842 case AMDGPU::S_CMPK_GE_I32:
10843 SrcReg =
MI.getOperand(0).getReg();
10845 CmpValue =
MI.getOperand(1).getImm();
10855 if (S->isLiveIn(AMDGPU::SCC))
10864bool SIInstrInfo::invertSCCUse(
MachineInstr *SCCDef)
const {
10867 bool SCCIsDead =
false;
10870 constexpr unsigned ScanLimit = 12;
10871 unsigned Count = 0;
10872 for (MachineInstr &
MI :
10874 if (++
Count > ScanLimit)
10876 if (
MI.readsRegister(AMDGPU::SCC, &RI)) {
10877 if (
MI.getOpcode() == AMDGPU::S_CSELECT_B32 ||
10878 MI.getOpcode() == AMDGPU::S_CSELECT_B64 ||
10879 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
10880 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC1)
10885 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
10898 for (MachineInstr *
MI : InvertInstr) {
10899 if (
MI->getOpcode() == AMDGPU::S_CSELECT_B32 ||
10900 MI->getOpcode() == AMDGPU::S_CSELECT_B64) {
10902 }
else if (
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
10903 MI->getOpcode() == AMDGPU::S_CBRANCH_SCC1) {
10904 MI->setDesc(
get(
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0
10905 ? AMDGPU::S_CBRANCH_SCC1
10906 : AMDGPU::S_CBRANCH_SCC0));
10919 bool NeedInversion)
const {
10920 MachineInstr *KillsSCC =
nullptr;
10925 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
10927 if (
MI.killsRegister(AMDGPU::SCC, &RI))
10930 if (NeedInversion && !invertSCCUse(SCCRedefine))
10932 if (MachineOperand *SccDef =
10934 SccDef->setIsDead(
false);
10942 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
10943 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
10945 bool Op1IsNonZeroImm =
10946 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
10947 bool Op2IsZeroImm =
10948 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
10949 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
10955 unsigned &NewDefOpc) {
10958 if (Def.getOpcode() != AMDGPU::S_ADD_I32 &&
10959 Def.getOpcode() != AMDGPU::S_ADD_U32)
10965 if ((!AddSrc1.
isImm() || AddSrc1.
getImm() != 1) &&
10971 if (Def.getOpcode() == AMDGPU::S_ADD_I32) {
10973 Def.findRegisterDefOperand(AMDGPU::SCC,
nullptr);
10976 NewDefOpc = AMDGPU::S_ADD_U32;
10978 NeedInversion = !NeedInversion;
10983 Register SrcReg2, int64_t CmpMask,
10992 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue,
MRI,
10993 this](
bool NeedInversion) ->
bool {
11017 unsigned NewDefOpc = Def->getOpcode();
11023 if (!optimizeSCC(Def, &CmpInstr, NeedInversion))
11026 if (NewDefOpc != Def->getOpcode())
11027 Def->setDesc(
get(NewDefOpc));
11036 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
11037 MRI->use_nodbg_empty(Def->getOperand(0).getReg())) {
11043 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
11051 optimizeSCC(
Select, Def,
false);
11058 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue,
MRI,
11059 this](int64_t ExpectedValue,
unsigned SrcSize,
11060 bool IsReversible,
bool IsSigned) ->
bool {
11088 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
11089 Def->getOpcode() != AMDGPU::S_AND_B64)
11093 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
11104 SrcOp = &Def->getOperand(2);
11105 else if (isMask(&Def->getOperand(2)))
11106 SrcOp = &Def->getOperand(1);
11114 if (IsSigned && BitNo == SrcSize - 1)
11117 ExpectedValue <<= BitNo;
11119 bool IsReversedCC =
false;
11120 if (CmpValue != ExpectedValue) {
11123 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
11128 Register DefReg = Def->getOperand(0).getReg();
11129 if (IsReversedCC && !
MRI->hasOneNonDBGUse(DefReg))
11132 if (!optimizeSCC(Def, &CmpInstr,
false))
11135 if (!
MRI->use_nodbg_empty(DefReg)) {
11143 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
11144 : AMDGPU::S_BITCMP1_B32
11145 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
11146 : AMDGPU::S_BITCMP1_B64;
11151 Def->eraseFromParent();
11159 case AMDGPU::S_CMP_EQ_U32:
11160 case AMDGPU::S_CMP_EQ_I32:
11161 case AMDGPU::S_CMPK_EQ_U32:
11162 case AMDGPU::S_CMPK_EQ_I32:
11163 return optimizeCmpAnd(1, 32,
true,
false) ||
11164 optimizeCmpSelect(
true);
11165 case AMDGPU::S_CMP_GE_U32:
11166 case AMDGPU::S_CMPK_GE_U32:
11167 return optimizeCmpAnd(1, 32,
false,
false);
11168 case AMDGPU::S_CMP_GE_I32:
11169 case AMDGPU::S_CMPK_GE_I32:
11170 return optimizeCmpAnd(1, 32,
false,
true);
11171 case AMDGPU::S_CMP_EQ_U64:
11172 return optimizeCmpAnd(1, 64,
true,
false);
11173 case AMDGPU::S_CMP_LG_U32:
11174 case AMDGPU::S_CMP_LG_I32:
11175 case AMDGPU::S_CMPK_LG_U32:
11176 case AMDGPU::S_CMPK_LG_I32:
11177 return optimizeCmpAnd(0, 32,
true,
false) ||
11178 optimizeCmpSelect(
false);
11179 case AMDGPU::S_CMP_GT_U32:
11180 case AMDGPU::S_CMPK_GT_U32:
11181 return optimizeCmpAnd(0, 32,
false,
false);
11182 case AMDGPU::S_CMP_GT_I32:
11183 case AMDGPU::S_CMPK_GT_I32:
11184 return optimizeCmpAnd(0, 32,
false,
true);
11185 case AMDGPU::S_CMP_LG_U64:
11186 return optimizeCmpAnd(0, 64,
true,
false) ||
11187 optimizeCmpSelect(
false);
11194 AMDGPU::OpName
OpName)
const {
11195 if (!ST.needsAlignedVGPRs())
11198 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
11210 bool IsAGPR = RI.isAGPR(
MRI, DataReg);
11212 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
11215 MRI.createVirtualRegister(IsAGPR ? &AMDGPU::AReg_64_Align2RegClass
11216 : &AMDGPU::VReg_64_Align2RegClass);
11218 .
addReg(DataReg, {},
Op.getSubReg())
11223 Op.setSubReg(AMDGPU::sub0);
11238 if (ST.hasGFX1250Insts())
11245 unsigned Opcode =
MI.getOpcode();
11251 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
11252 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
11255 if (!ST.hasGFX940Insts())
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool setsSCCIfResultIsZero(const MachineInstr &Def, bool &NeedInversion, unsigned &NewDefOpc)
static bool isSCCDeadOnExit(MachineBasicBlock *MBB)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static MachineBasicBlock * loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
bool canAddToBBProlog(const MachineInstr &MI) const
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool mayAccessScratch(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
static bool setsSCCIfResultIsNonZero(const MachineInstr &MI)
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const final
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
static bool isBUF(const MachineInstr &MI)
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const override
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
void mutateAndCleanupImplicit(MachineInstr &MI, const MCInstrDesc &NewDesc) const
bool isAlwaysGDS(uint16_t Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
static bool usesLGKM_CNT(const MachineInstr &MI)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
const MachineOperand & getCalleeOperand(const MachineInstr &MI) const override
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
static bool isWWMRegSpillOpcode(uint16_t Opcode)
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
std::optional< int64_t > getImmOrMaterializedImm(MachineOperand &Op) const
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual const MachineOperand & getCalleeOperand(const MachineInstr &MI) const
Returns the callee operand from the given MI.
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
LLVM_READONLY int getBasicFromSDWAOp(uint16_t Opcode)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
LLVM_READONLY int getGlobalVaddrOp(uint16_t Opcode)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
LLVM_READONLY int getAddr64Inst(uint16_t Opcode)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
LLVM_READONLY int getCommuteRev(uint16_t Opcode)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
LLVM_READONLY int getCommuteOrig(uint16_t Opcode)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
int getMCOpcode(uint16_t Opcode, unsigned Gen)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
LLVM_READONLY int getIfAddr64Inst(uint16_t Opcode)
Check if Opcode is an Addr64 opcode.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr RegState getUndefRegState(bool B)
GenericCycleInfo< MachineSSAContext > MachineCycleInfo
MachineCycleInfo::CycleT MachineCycle
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.
VirtRegInfo - Information about a virtual register used by a set of operands.
bool Reads
Reads - One of the operands read the virtual register.
bool Writes
Writes - One of the operands writes the virtual register.