27 #include "llvm/IR/IntrinsicsAMDGPU.h"
33 #define DEBUG_TYPE "si-instr-info"
35 #define GET_INSTRINFO_CTOR_DTOR
36 #include "AMDGPUGenInstrInfo.inc"
43 #define GET_D16ImageDimIntrinsics_IMPL
44 #define GET_ImageDimIntrinsicTable_IMPL
45 #define GET_RsrcIntrinsics_IMPL
46 #include "AMDGPUGenSearchableTables.inc"
56 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
59 "amdgpu-fix-16-bit-physreg-copies",
60 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
75 unsigned N = Node->getNumOperands();
76 while (
N && Node->getOperand(
N - 1).getValueType() ==
MVT::Glue)
90 if (Op0Idx == -1 && Op1Idx == -1)
94 if ((Op0Idx == -1 && Op1Idx != -1) ||
95 (Op1Idx == -1 && Op0Idx != -1))
113 switch (
MI.getOpcode()) {
114 case AMDGPU::V_MOV_B32_e32:
115 case AMDGPU::V_MOV_B32_e64:
116 case AMDGPU::V_MOV_B64_PSEUDO:
117 case AMDGPU::V_ACCVGPR_READ_B32_e64:
118 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
120 assert(
MI.getDesc().getNumOperands() == 2);
121 assert(
MI.getDesc().getNumImplicitDefs() == 0);
122 assert(
MI.getDesc().getNumImplicitUses() == 1);
123 return MI.getNumOperands() == 3;
131 int64_t &Offset1)
const {
139 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
157 if (Offset0Idx == -1 || Offset1Idx == -1)
164 Offset0Idx -=
get(Opc0).NumDefs;
165 Offset1Idx -=
get(Opc1).NumDefs;
166 Offset0 = cast<ConstantSDNode>(Load0->
getOperand(Offset0Idx))->getZExtValue();
167 Offset1 = cast<ConstantSDNode>(Load1->
getOperand(Offset1Idx))->getZExtValue();
184 dyn_cast<ConstantSDNode>(Load0->
getOperand(1));
186 dyn_cast<ConstantSDNode>(Load1->
getOperand(1));
188 if (!Load0Offset || !Load1Offset)
208 if (OffIdx0 == -1 || OffIdx1 == -1)
214 OffIdx0 -=
get(Opc0).NumDefs;
215 OffIdx1 -=
get(Opc1).NumDefs;
221 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
224 Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
225 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
234 case AMDGPU::DS_READ2ST64_B32:
235 case AMDGPU::DS_READ2ST64_B64:
236 case AMDGPU::DS_WRITE2ST64_B32:
237 case AMDGPU::DS_WRITE2ST64_B64:
246 int64_t &
Offset,
bool &OffsetIsScalable,
unsigned &
Width,
252 OffsetIsScalable =
false;
266 BaseOps.push_back(BaseOp);
282 unsigned Offset0 = Offset0Op->
getImm();
283 unsigned Offset1 = Offset1Op->
getImm();
284 if (Offset0 + 1 != Offset1)
302 BaseOps.push_back(BaseOp);
303 Offset = EltSize * Offset0;
306 if (DataOpIdx == -1) {
320 if (SOffset && SOffset->
isReg()) {
324 if (AddrReg && !AddrReg->
isFI())
335 BaseOps.push_back(RSrc);
336 BaseOps.push_back(SOffset);
342 BaseOps.push_back(BaseOp);
346 BaseOps.push_back(BaseOp);
364 BaseOps.push_back(&LdSt.
getOperand(SRsrcIdx));
366 if (VAddr0Idx >= 0) {
368 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
384 BaseOps.push_back(BaseOp);
397 BaseOps.push_back(BaseOp);
400 BaseOps.push_back(BaseOp);
428 if (MO1->getAddrSpace() != MO2->getAddrSpace())
431 auto Base1 = MO1->getValue();
432 auto Base2 = MO2->getValue();
433 if (!Base1 || !Base2)
438 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
441 return Base1 == Base2;
447 unsigned NumBytes)
const {
455 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
472 const unsigned LoadSize = NumBytes / NumLoads;
473 const unsigned NumDWORDs = ((LoadSize + 3) / 4) * NumLoads;
474 return NumDWORDs <= 8;
488 int64_t Offset0, int64_t Offset1,
489 unsigned NumLoads)
const {
490 assert(Offset1 > Offset0 &&
491 "Second offset should be larger than first offset!");
496 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
503 const char *Msg =
"illegal SGPR to VGPR copy") {
526 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
531 if (!
Def->definesRegister(SrcReg, &RI))
533 if (
Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
542 bool SafeToPropagate =
true;
543 for (
auto I =
Def;
I !=
MI && SafeToPropagate; ++
I)
544 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
545 SafeToPropagate =
false;
547 if (!SafeToPropagate)
559 if (ImpUseSuperReg) {
577 unsigned RegNo = DestReg % 3;
583 if (!
TII.getSubtarget().hasGFX90AInsts()) {
587 while (RegNo-- && RS.
FindUnusedReg(&AMDGPU::VGPR_32RegClass)) {
597 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
598 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
599 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
606 if (ImpUseSuperReg) {
607 UseBuilder.
addReg(ImpUseSuperReg,
628 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
629 int16_t SubIdx = BaseIndices[Idx];
631 unsigned Opcode = AMDGPU::S_MOV_B32;
634 Register Src = RI.getSubReg(SrcReg, SubIdx);
635 bool AlignedDest = ((
Reg - AMDGPU::SGPR0) % 2) == 0;
636 bool AlignedSrc = ((Src - AMDGPU::SGPR0) % 2) == 0;
637 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
641 Opcode = AMDGPU::S_MOV_B64;
645 LastMI =
BuildMI(
MBB,
I,
DL,
TII.get(Opcode), RI.getSubReg(DestReg, SubIdx))
646 .
addReg(RI.getSubReg(SrcReg, SubIdx))
656 assert(FirstMI && LastMI);
664 LastMI->addRegisterKilled(SrcReg, &RI);
676 ((RI.getRegSizeInBits(*RC) == 16) ^
678 MCRegister &RegToFix = (RI.getRegSizeInBits(*RC) == 16) ? DestReg : SrcReg;
683 if (DestReg == SrcReg) {
692 if (RC == &AMDGPU::VGPR_32RegClass) {
694 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
695 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
696 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
697 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
703 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
704 RC == &AMDGPU::SReg_32RegClass) {
712 if (DestReg == AMDGPU::VCC_LO) {
713 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
727 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
737 if (RC == &AMDGPU::SReg_64RegClass) {
745 if (DestReg == AMDGPU::VCC) {
746 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
760 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
773 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
791 if (RC == &AMDGPU::AGPR_32RegClass) {
792 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg)) {
811 const unsigned Size = RI.getRegSizeInBits(*RC);
814 AMDGPU::VGPR_HI16RegClass.
contains(SrcReg) ||
815 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
816 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
818 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
819 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
820 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
821 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
822 bool DstLow = AMDGPU::VGPR_LO16RegClass.contains(DestReg) ||
823 AMDGPU::SReg_LO16RegClass.contains(DestReg) ||
824 AMDGPU::AGPR_LO16RegClass.contains(DestReg);
825 bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
826 AMDGPU::SReg_LO16RegClass.contains(SrcReg) ||
827 AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
842 if (IsAGPRDst || IsAGPRSrc) {
843 if (!DstLow || !SrcLow) {
845 "Cannot use hi16 subreg with an AGPR!");
853 if (!DstLow || !SrcLow) {
855 "Cannot use hi16 subreg on VI!");
906 unsigned EltSize = 4;
907 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
910 AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
912 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
918 Opcode = AMDGPU::V_PK_MOV_B32;
928 std::unique_ptr<RegScavenger> RS;
929 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
936 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
938 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
941 SubIdx = SubIndices[Idx];
943 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
945 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
947 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
951 RI.getSubReg(SrcReg, SubIdx), UseKill, *RS,
952 ImpDefSuper, ImpUseSuper);
953 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
954 Register DstSubReg = RI.getSubReg(DestReg, SubIdx);
955 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
973 .
addReg(RI.getSubReg(SrcReg, SubIdx));
1003 int64_t
Value)
const {
1006 if (RegClass == &AMDGPU::SReg_32RegClass ||
1007 RegClass == &AMDGPU::SGPR_32RegClass ||
1008 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
1009 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
1015 if (RegClass == &AMDGPU::SReg_64RegClass ||
1016 RegClass == &AMDGPU::SGPR_64RegClass ||
1017 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
1023 if (RegClass == &AMDGPU::VGPR_32RegClass) {
1034 unsigned EltSize = 4;
1035 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1037 if (RI.getRegSizeInBits(*RegClass) > 32) {
1038 Opcode = AMDGPU::S_MOV_B64;
1041 Opcode = AMDGPU::S_MOV_B32;
1047 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1048 int64_t IdxValue = Idx == 0 ?
Value : 0;
1051 get(Opcode), RI.getSubReg(DestReg, SubIndices[Idx]));
1058 return &AMDGPU::VGPR_32RegClass;
1071 "Not a VGPR32 reg");
1073 if (
Cond.size() == 1) {
1083 }
else if (
Cond.size() == 2) {
1084 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1085 switch (
Cond[0].getImm()) {
1086 case SIInstrInfo::SCC_TRUE: {
1089 : AMDGPU::S_CSELECT_B64), SReg)
1100 case SIInstrInfo::SCC_FALSE: {
1103 : AMDGPU::S_CSELECT_B64), SReg)
1114 case SIInstrInfo::VCCNZ: {
1128 case SIInstrInfo::VCCZ: {
1142 case SIInstrInfo::EXECNZ: {
1146 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1149 : AMDGPU::S_CSELECT_B64), SReg)
1160 case SIInstrInfo::EXECZ: {
1164 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1167 : AMDGPU::S_CSELECT_B64), SReg)
1216 return AMDGPU::COPY;
1217 if (RI.getRegSizeInBits(*DstRC) == 32) {
1218 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1219 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC)) {
1220 return AMDGPU::S_MOV_B64;
1221 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC)) {
1222 return AMDGPU::V_MOV_B64_PSEUDO;
1224 return AMDGPU::COPY;
1229 bool IsIndirectSrc)
const {
1230 if (IsIndirectSrc) {
1232 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1234 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1236 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1238 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1240 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1242 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1244 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1245 if (VecSize <= 1024)
1246 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1252 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1254 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1256 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1258 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1260 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1262 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1264 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1265 if (VecSize <= 1024)
1266 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1273 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1275 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1277 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1279 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1281 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1283 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1285 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1286 if (VecSize <= 1024)
1287 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1294 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1296 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1298 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1300 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1302 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1304 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1306 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1307 if (VecSize <= 1024)
1308 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1315 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1317 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1319 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1321 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1322 if (VecSize <= 1024)
1323 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1330 bool IsSGPR)
const {
1342 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1349 return AMDGPU::SI_SPILL_S32_SAVE;
1351 return AMDGPU::SI_SPILL_S64_SAVE;
1353 return AMDGPU::SI_SPILL_S96_SAVE;
1355 return AMDGPU::SI_SPILL_S128_SAVE;
1357 return AMDGPU::SI_SPILL_S160_SAVE;
1359 return AMDGPU::SI_SPILL_S192_SAVE;
1361 return AMDGPU::SI_SPILL_S256_SAVE;
1363 return AMDGPU::SI_SPILL_S512_SAVE;
1365 return AMDGPU::SI_SPILL_S1024_SAVE;
1374 return AMDGPU::SI_SPILL_V32_SAVE;
1376 return AMDGPU::SI_SPILL_V64_SAVE;
1378 return AMDGPU::SI_SPILL_V96_SAVE;
1380 return AMDGPU::SI_SPILL_V128_SAVE;
1382 return AMDGPU::SI_SPILL_V160_SAVE;
1384 return AMDGPU::SI_SPILL_V192_SAVE;
1386 return AMDGPU::SI_SPILL_V256_SAVE;
1388 return AMDGPU::SI_SPILL_V512_SAVE;
1390 return AMDGPU::SI_SPILL_V1024_SAVE;
1399 return AMDGPU::SI_SPILL_A32_SAVE;
1401 return AMDGPU::SI_SPILL_A64_SAVE;
1403 return AMDGPU::SI_SPILL_A96_SAVE;
1405 return AMDGPU::SI_SPILL_A128_SAVE;
1407 return AMDGPU::SI_SPILL_A160_SAVE;
1409 return AMDGPU::SI_SPILL_A192_SAVE;
1411 return AMDGPU::SI_SPILL_A256_SAVE;
1413 return AMDGPU::SI_SPILL_A512_SAVE;
1415 return AMDGPU::SI_SPILL_A1024_SAVE;
1442 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1443 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1451 if (SrcReg.
isVirtual() && SpillSize == 4) {
1482 return AMDGPU::SI_SPILL_S32_RESTORE;
1484 return AMDGPU::SI_SPILL_S64_RESTORE;
1486 return AMDGPU::SI_SPILL_S96_RESTORE;
1488 return AMDGPU::SI_SPILL_S128_RESTORE;
1490 return AMDGPU::SI_SPILL_S160_RESTORE;
1492 return AMDGPU::SI_SPILL_S192_RESTORE;
1494 return AMDGPU::SI_SPILL_S256_RESTORE;
1496 return AMDGPU::SI_SPILL_S512_RESTORE;
1498 return AMDGPU::SI_SPILL_S1024_RESTORE;
1507 return AMDGPU::SI_SPILL_V32_RESTORE;
1509 return AMDGPU::SI_SPILL_V64_RESTORE;
1511 return AMDGPU::SI_SPILL_V96_RESTORE;
1513 return AMDGPU::SI_SPILL_V128_RESTORE;
1515 return AMDGPU::SI_SPILL_V160_RESTORE;
1517 return AMDGPU::SI_SPILL_V192_RESTORE;
1519 return AMDGPU::SI_SPILL_V256_RESTORE;
1521 return AMDGPU::SI_SPILL_V512_RESTORE;
1523 return AMDGPU::SI_SPILL_V1024_RESTORE;
1532 return AMDGPU::SI_SPILL_A32_RESTORE;
1534 return AMDGPU::SI_SPILL_A64_RESTORE;
1536 return AMDGPU::SI_SPILL_A96_RESTORE;
1538 return AMDGPU::SI_SPILL_A128_RESTORE;
1540 return AMDGPU::SI_SPILL_A160_RESTORE;
1542 return AMDGPU::SI_SPILL_A192_RESTORE;
1544 return AMDGPU::SI_SPILL_A256_RESTORE;
1546 return AMDGPU::SI_SPILL_A512_RESTORE;
1548 return AMDGPU::SI_SPILL_A1024_RESTORE;
1575 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1576 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1581 if (DestReg.
isVirtual() && SpillSize == 4) {
1612 unsigned Quantity)
const {
1614 while (Quantity > 0) {
1629 if (HasNoTerminator) {
1630 if (
Info->returnsVoid()) {
1640 switch (
MI.getOpcode()) {
1644 return MI.getOperand(0).getImm() + 1;
1652 switch (
MI.getOpcode()) {
1654 case AMDGPU::S_MOV_B64_term:
1657 MI.setDesc(
get(AMDGPU::S_MOV_B64));
1660 case AMDGPU::S_MOV_B32_term:
1663 MI.setDesc(
get(AMDGPU::S_MOV_B32));
1666 case AMDGPU::S_XOR_B64_term:
1669 MI.setDesc(
get(AMDGPU::S_XOR_B64));
1672 case AMDGPU::S_XOR_B32_term:
1675 MI.setDesc(
get(AMDGPU::S_XOR_B32));
1677 case AMDGPU::S_OR_B64_term:
1680 MI.setDesc(
get(AMDGPU::S_OR_B64));
1682 case AMDGPU::S_OR_B32_term:
1685 MI.setDesc(
get(AMDGPU::S_OR_B32));
1688 case AMDGPU::S_ANDN2_B64_term:
1691 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
1694 case AMDGPU::S_ANDN2_B32_term:
1697 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
1700 case AMDGPU::S_AND_B64_term:
1703 MI.setDesc(
get(AMDGPU::S_AND_B64));
1706 case AMDGPU::S_AND_B32_term:
1709 MI.setDesc(
get(AMDGPU::S_AND_B32));
1712 case AMDGPU::V_MOV_B64_PSEUDO: {
1714 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
1715 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
1720 if (
SrcOp.isImm()) {
1766 MI.eraseFromParent();
1769 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
1773 case AMDGPU::V_SET_INACTIVE_B32: {
1774 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
1775 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1779 .
add(
MI.getOperand(2));
1782 MI.eraseFromParent();
1785 case AMDGPU::V_SET_INACTIVE_B64: {
1786 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
1787 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1791 MI.getOperand(0).getReg())
1792 .
add(
MI.getOperand(2));
1796 MI.eraseFromParent();
1799 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
1800 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
1801 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
1802 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
1803 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
1804 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
1805 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
1806 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
1807 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
1808 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
1809 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
1810 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
1811 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
1812 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
1813 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
1814 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
1815 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
1816 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
1817 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
1818 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
1819 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
1824 Opc = AMDGPU::V_MOVRELD_B32_e32;
1826 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
1827 : AMDGPU::S_MOVRELD_B32;
1832 bool IsUndef =
MI.getOperand(1).isUndef();
1833 unsigned SubReg =
MI.getOperand(3).getImm();
1834 assert(VecReg ==
MI.getOperand(1).getReg());
1839 .
add(
MI.getOperand(2))
1843 const int ImpDefIdx =
1845 const int ImpUseIdx = ImpDefIdx + 1;
1847 MI.eraseFromParent();
1850 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
1851 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
1852 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
1853 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
1854 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
1855 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
1856 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
1857 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
1860 bool IsUndef =
MI.getOperand(1).isUndef();
1873 .
add(
MI.getOperand(2))
1879 const int ImpUseIdx = ImpDefIdx + 1;
1886 MI.eraseFromParent();
1889 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
1890 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
1891 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
1892 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
1893 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
1894 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
1895 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
1896 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
1900 bool IsUndef =
MI.getOperand(1).isUndef();
1919 MI.eraseFromParent();
1922 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
1937 .
add(
MI.getOperand(1)));
1941 MIB.
add(
MI.getOperand(2));
1946 MI.eraseFromParent();
1949 case AMDGPU::ENTER_STRICT_WWM: {
1953 : AMDGPU::S_OR_SAVEEXEC_B64));
1956 case AMDGPU::ENTER_STRICT_WQM: {
1959 const unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1960 const unsigned WQMOp = ST.
isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
1961 const unsigned MovOp = ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1965 MI.eraseFromParent();
1968 case AMDGPU::EXIT_STRICT_WWM:
1969 case AMDGPU::EXIT_STRICT_WQM: {
1972 MI.setDesc(
get(ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
1979 std::pair<MachineInstr*, MachineInstr*>
1981 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
1991 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
1994 MovDPP.addDef(RI.getSubReg(Dst, Sub));
2001 for (
unsigned I = 1;
I <= 2; ++
I) {
2004 if (
SrcOp.isImm()) {
2012 MovDPP.addReg(RI.getSubReg(Src, Sub));
2018 for (
unsigned I = 3;
I <
MI.getNumExplicitOperands(); ++
I)
2019 MovDPP.addImm(
MI.getOperand(
I).getImm());
2021 Split[Part] = MovDPP;
2032 MI.eraseFromParent();
2033 return std::make_pair(Split[0], Split[1]);
2038 unsigned Src0OpName,
2040 unsigned Src1OpName)
const {
2047 "All commutable instructions have both src0 and src1 modifiers");
2049 int Src0ModsVal = Src0Mods->
getImm();
2050 int Src1ModsVal = Src1Mods->
getImm();
2052 Src1Mods->
setImm(Src0ModsVal);
2053 Src0Mods->
setImm(Src1ModsVal);
2062 bool IsKill = RegOp.
isKill();
2064 bool IsUndef = RegOp.
isUndef();
2065 bool IsDebug = RegOp.
isDebug();
2067 if (NonRegOp.
isImm())
2069 else if (NonRegOp.
isFI())
2088 unsigned Src1Idx)
const {
2089 assert(!NewMI &&
"this should never be used");
2091 unsigned Opc =
MI.getOpcode();
2093 if (CommutedOpcode == -1)
2097 static_cast<int>(Src0Idx) &&
2099 static_cast<int>(Src1Idx) &&
2100 "inconsistency with findCommutedOpIndices");
2127 Src1, AMDGPU::OpName::src1_modifiers);
2139 unsigned &SrcOpIdx0,
2140 unsigned &SrcOpIdx1)
const {
2145 unsigned &SrcOpIdx1)
const {
2158 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2162 int64_t BrOffset)
const {
2165 assert(BranchOp != AMDGPU::S_SETPC_B64);
2179 if (
MI.getOpcode() == AMDGPU::S_SETPC_B64) {
2185 return MI.getOperand(0).getMBB();
2193 assert(RS &&
"RegScavenger required for long branching");
2195 "new block should be inserted for expanding unconditional branch");
2212 if (BrOffset >= 0) {
2215 .
addReg(PCReg, 0, AMDGPU::sub0)
2219 .
addReg(PCReg, 0, AMDGPU::sub1)
2225 .
addReg(PCReg, 0, AMDGPU::sub0)
2229 .
addReg(PCReg, 0, AMDGPU::sub1)
2275 AMDGPU::SReg_64RegClass,
2281 return 4 + 8 + 4 + 4;
2284 unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
2286 case SIInstrInfo::SCC_TRUE:
2287 return AMDGPU::S_CBRANCH_SCC1;
2288 case SIInstrInfo::SCC_FALSE:
2289 return AMDGPU::S_CBRANCH_SCC0;
2290 case SIInstrInfo::VCCNZ:
2291 return AMDGPU::S_CBRANCH_VCCNZ;
2292 case SIInstrInfo::VCCZ:
2293 return AMDGPU::S_CBRANCH_VCCZ;
2294 case SIInstrInfo::EXECNZ:
2295 return AMDGPU::S_CBRANCH_EXECNZ;
2296 case SIInstrInfo::EXECZ:
2297 return AMDGPU::S_CBRANCH_EXECZ;
2303 SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
2305 case AMDGPU::S_CBRANCH_SCC0:
2307 case AMDGPU::S_CBRANCH_SCC1:
2309 case AMDGPU::S_CBRANCH_VCCNZ:
2311 case AMDGPU::S_CBRANCH_VCCZ:
2313 case AMDGPU::S_CBRANCH_EXECNZ:
2315 case AMDGPU::S_CBRANCH_EXECZ:
2327 bool AllowModify)
const {
2328 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
2330 TBB =
I->getOperand(0).getMBB();
2336 if (
I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
2337 CondBB =
I->getOperand(1).getMBB();
2338 Cond.push_back(
I->getOperand(0));
2340 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
2341 if (Pred == INVALID_BR)
2344 CondBB =
I->getOperand(0).getMBB();
2346 Cond.push_back(
I->getOperand(1));
2356 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
2358 FBB =
I->getOperand(0).getMBB();
2368 bool AllowModify)
const {
2376 while (
I !=
E && !
I->isBranch() && !
I->isReturn()) {
2377 switch (
I->getOpcode()) {
2378 case AMDGPU::S_MOV_B64_term:
2379 case AMDGPU::S_XOR_B64_term:
2380 case AMDGPU::S_OR_B64_term:
2381 case AMDGPU::S_ANDN2_B64_term:
2382 case AMDGPU::S_AND_B64_term:
2383 case AMDGPU::S_MOV_B32_term:
2384 case AMDGPU::S_XOR_B32_term:
2385 case AMDGPU::S_OR_B32_term:
2386 case AMDGPU::S_ANDN2_B32_term:
2387 case AMDGPU::S_AND_B32_term:
2390 case AMDGPU::SI_ELSE:
2391 case AMDGPU::SI_KILL_I1_TERMINATOR:
2392 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
2409 int *BytesRemoved)
const {
2413 unsigned RemovedSize = 0;
2417 I->eraseFromParent();
2423 *BytesRemoved = RemovedSize;
2440 int *BytesAdded)
const {
2441 if (!FBB &&
Cond.empty()) {
2449 if(
Cond.size() == 1 &&
Cond[0].isReg()) {
2459 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].getImm()));
2497 if (
Cond.size() != 2) {
2501 if (
Cond[0].isImm()) {
2512 Register FalseReg,
int &CondCycles,
2513 int &TrueCycles,
int &FalseCycles)
const {
2514 switch (
Cond[0].getImm()) {
2523 CondCycles = TrueCycles = FalseCycles = NumInsts;
2526 return RI.
hasVGPRs(RC) && NumInsts <= 6;
2540 if (NumInsts % 2 == 0)
2543 CondCycles = TrueCycles = FalseCycles = NumInsts;
2555 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
2556 if (Pred == VCCZ || Pred == SCC_FALSE) {
2557 Pred =
static_cast<BranchPredicate
>(-Pred);
2563 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
2565 if (DstSize == 32) {
2567 if (Pred == SCC_TRUE) {
2582 if (DstSize == 64 && Pred == SCC_TRUE) {
2592 static const int16_t Sub0_15[] = {
2593 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
2594 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
2595 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
2596 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
2599 static const int16_t Sub0_15_64[] = {
2600 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
2601 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
2602 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
2603 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
2606 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
2608 const int16_t *SubIndices = Sub0_15;
2609 int NElts = DstSize / 32;
2613 if (Pred == SCC_TRUE) {
2615 SelOp = AMDGPU::S_CSELECT_B32;
2616 EltRC = &AMDGPU::SGPR_32RegClass;
2618 SelOp = AMDGPU::S_CSELECT_B64;
2619 EltRC = &AMDGPU::SGPR_64RegClass;
2620 SubIndices = Sub0_15_64;
2626 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
2631 for (
int Idx = 0; Idx != NElts; ++Idx) {
2633 Regs.push_back(DstElt);
2635 unsigned SubIdx = SubIndices[Idx];
2638 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
2641 .
addReg(FalseReg, 0, SubIdx)
2642 .
addReg(TrueReg, 0, SubIdx);
2646 .
addReg(TrueReg, 0, SubIdx)
2647 .
addReg(FalseReg, 0, SubIdx);
2659 switch (
MI.getOpcode()) {
2660 case AMDGPU::V_MOV_B32_e32:
2661 case AMDGPU::V_MOV_B32_e64:
2662 case AMDGPU::V_MOV_B64_PSEUDO: {
2665 unsigned NumOps =
MI.getDesc().getNumOperands() +
2666 MI.getDesc().getNumImplicitUses();
2668 return MI.getNumOperands() == NumOps;
2670 case AMDGPU::S_MOV_B32:
2671 case AMDGPU::S_MOV_B64:
2673 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
2674 case AMDGPU::V_ACCVGPR_READ_B32_e64:
2675 case AMDGPU::V_ACCVGPR_MOV_B32:
2683 unsigned Kind)
const {
2700 unsigned Opc =
MI.getOpcode();
2702 AMDGPU::OpName::src0_modifiers);
2704 AMDGPU::OpName::src1_modifiers);
2706 AMDGPU::OpName::src2_modifiers);
2708 MI.RemoveOperand(Src2ModIdx);
2709 MI.RemoveOperand(Src1ModIdx);
2710 MI.RemoveOperand(Src0ModIdx);
2718 switch (
DefMI.getOpcode()) {
2721 case AMDGPU::S_MOV_B64:
2726 case AMDGPU::V_MOV_B32_e32:
2727 case AMDGPU::S_MOV_B32:
2728 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
2735 if (!ImmOp->
isImm())
2738 unsigned Opc =
UseMI.getOpcode();
2739 if (Opc == AMDGPU::COPY) {
2743 unsigned NewOpc =
isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2746 if (
UseMI.getOperand(1).getSubReg() == AMDGPU::hi16)
2752 NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
2763 UseMI.getOperand(0).setSubReg(0);
2766 UseMI.getOperand(0).setReg(DstReg);
2777 if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
2778 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
2779 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
2780 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64) {
2795 bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
2796 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64;
2797 bool IsFMA = Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
2798 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64;
2812 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32 : AMDGPU::V_FMAMK_F16)
2813 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
2819 const int64_t Imm = ImmOp->
getImm();
2825 UseMI.RemoveOperand(
2827 UseMI.RemoveOperand(
2831 unsigned Src1SubReg = Src1->
getSubReg();
2836 if (Opc == AMDGPU::V_MAC_F32_e64 ||
2837 Opc == AMDGPU::V_MAC_F16_e64 ||
2838 Opc == AMDGPU::V_FMAC_F32_e64 ||
2839 Opc == AMDGPU::V_FMAC_F16_e64)
2840 UseMI.untieRegOperand(
2850 DefMI.eraseFromParent();
2859 bool Src0Inlined =
false;
2860 if (Src0->
isReg()) {
2865 if (
Def &&
Def->isMoveImmediate() &&
2880 if (Src1->
isReg() && !Src0Inlined ) {
2883 if (
Def &&
Def->isMoveImmediate() &&
2886 commuteInstruction(
UseMI)) {
2897 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32 : AMDGPU::V_FMAAK_F16)
2898 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
2902 const int64_t Imm = ImmOp->
getImm();
2908 UseMI.RemoveOperand(
2910 UseMI.RemoveOperand(
2913 if (Opc == AMDGPU::V_MAC_F32_e64 ||
2914 Opc == AMDGPU::V_MAC_F16_e64 ||
2915 Opc == AMDGPU::V_FMAC_F32_e64 ||
2916 Opc == AMDGPU::V_FMAC_F16_e64)
2917 UseMI.untieRegOperand(
2933 DefMI.eraseFromParent();
2945 if (BaseOps1.
size() != BaseOps2.
size())
2947 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
2948 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
2955 int WidthB,
int OffsetB) {
2956 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
2957 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
2958 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
2959 return LowOffset + LowWidth <= HighOffset;
2962 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
2965 int64_t Offset0, Offset1;
2966 unsigned Dummy0, Dummy1;
2967 bool Offset0IsScalable, Offset1IsScalable;
2981 unsigned Width0 = MIa.
memoperands().front()->getSize();
2982 unsigned Width1 = MIb.
memoperands().front()->getSize();
2989 "MIa must load from or modify a memory location");
2991 "MIb must load from or modify a memory location");
3007 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3014 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3021 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3028 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3042 if (
Def &&
Def->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
3043 Def->getOperand(1).isImm())
3044 return Def->getOperand(1).getImm();
3045 return AMDGPU::NoRegister;
3051 unsigned NumOps =
MI.getNumOperands();
3052 for (
unsigned I = 1;
I < NumOps; ++
I) {
3054 if (
Op.isReg() &&
Op.isKill())
3063 unsigned Opc =
MI.getOpcode();
3065 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3066 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3067 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3068 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3073 case AMDGPU::V_MAC_F16_e64:
3074 case AMDGPU::V_FMAC_F16_e64:
3077 case AMDGPU::V_MAC_F32_e64:
3078 case AMDGPU::V_FMAC_F32_e64:
3079 case AMDGPU::V_FMAC_F64_e64:
3081 case AMDGPU::V_MAC_F16_e32:
3082 case AMDGPU::V_FMAC_F16_e32:
3085 case AMDGPU::V_MAC_F32_e32:
3086 case AMDGPU::V_FMAC_F32_e32:
3087 case AMDGPU::V_FMAC_F64_e32: {
3089 AMDGPU::OpName::src0);
3113 if (!Src0Mods && !Src1Mods && !Clamp && !Omod && !IsF64 &&
3119 IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
3120 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
3131 unsigned NewOpc = IsFMA
3132 ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
3133 : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
3161 unsigned NewOpc = IsFMA ? (IsF16 ? AMDGPU::V_FMA_F16_e64
3162 : IsF64 ? AMDGPU::V_FMA_F64_e64
3163 : AMDGPU::V_FMA_F32_e64)
3164 : (IsF16 ? AMDGPU::V_MAD_F16_e64 : AMDGPU::V_MAD_F32_e64);
3186 switch (
MI.getOpcode()) {
3187 case AMDGPU::S_SET_GPR_IDX_ON:
3188 case AMDGPU::S_SET_GPR_IDX_MODE:
3189 case AMDGPU::S_SET_GPR_IDX_OFF:
3207 if (
MI.isTerminator() ||
MI.isPosition())
3217 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
3218 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
3219 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
3225 Opcode == AMDGPU::DS_GWS_INIT ||
3226 Opcode == AMDGPU::DS_GWS_SEMA_V ||
3227 Opcode == AMDGPU::DS_GWS_SEMA_BR ||
3228 Opcode == AMDGPU::DS_GWS_SEMA_P ||
3229 Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
3230 Opcode == AMDGPU::DS_GWS_BARRIER;
3237 if (
const MCPhysReg *ImpDef =
MI.getDesc().getImplicitDefs()) {
3238 for (; ImpDef && *ImpDef; ++ImpDef) {
3239 if (*ImpDef == AMDGPU::MODE)
3248 unsigned Opcode =
MI.getOpcode();
3263 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
3266 Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER)
3269 if (
MI.isCall() ||
MI.isInlineAsm())
3281 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
3282 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32)
3290 if (
MI.isMetaInstruction())
3294 if (
MI.isCopyLike()) {
3299 return MI.readsRegister(AMDGPU::EXEC, &RI);
3310 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
3345 int64_t Imm = MO.
getImm();
3357 int32_t Trunc =
static_cast<int32_t
>(Imm);
3394 int16_t Trunc =
static_cast<int16_t
>(Imm);
3461 AMDGPU::OpName::src2))
3477 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.
hasGFX90AInsts())
3492 AMDGPU::OpName::src0_modifiers) != -1;
3498 return Mods && Mods->
getImm();
3520 switch (
MI.getOpcode()) {
3521 default:
return false;
3523 case AMDGPU::V_ADDC_U32_e64:
3524 case AMDGPU::V_SUBB_U32_e64:
3525 case AMDGPU::V_SUBBREV_U32_e64: {
3533 case AMDGPU::V_MAC_F32_e64:
3534 case AMDGPU::V_MAC_F16_e64:
3535 case AMDGPU::V_FMAC_F32_e64:
3536 case AMDGPU::V_FMAC_F16_e64:
3537 case AMDGPU::V_FMAC_F64_e64:
3543 case AMDGPU::V_CNDMASK_B32_e64:
3574 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
3583 unsigned Op32)
const {
3592 if (Op32DstIdx != -1) {
3594 Inst32.
add(
MI.getOperand(0));
3596 assert(((
MI.getOperand(0).getReg() == AMDGPU::VCC) ||
3597 (
MI.getOperand(0).getReg() == AMDGPU::VCC_LO)) &&
3611 if (Op32Src2Idx != -1) {
3646 if (MO.
getReg() == AMDGPU::SGPR_NULL)
3652 MO.
getReg() == AMDGPU::VCC ||
3653 MO.
getReg() == AMDGPU::VCC_LO;
3655 return AMDGPU::SReg_32RegClass.contains(MO.
getReg()) ||
3656 AMDGPU::SReg_64RegClass.contains(MO.
getReg());
3666 switch (MO.getReg()) {
3668 case AMDGPU::VCC_LO:
3669 case AMDGPU::VCC_HI:
3671 case AMDGPU::FLAT_SCR:
3679 return AMDGPU::NoRegister;
3684 switch (
MI.getOpcode()) {
3685 case AMDGPU::V_READLANE_B32:
3686 case AMDGPU::V_WRITELANE_B32:
3693 if (
MI.isPreISelOpcode() ||
3694 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
3705 if (
SubReg.getReg().isPhysical())
3708 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
3715 if (SIInstrInfo::isGenericOpcode(
MI.getOpcode()))
3729 ErrInfo =
"Instruction has wrong number of operands.";
3733 if (
MI.isInlineAsm()) {
3747 ErrInfo =
"inlineasm operand has incorrect register class.";
3755 if (
isMIMG(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
3756 ErrInfo =
"missing memory operand from MIMG instruction.";
3764 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
3765 "all fp values to integers.";
3773 if (
MI.getOperand(
i).isImm() ||
MI.getOperand(
i).isGlobal()) {
3774 ErrInfo =
"Illegal immediate value for operand.";
3793 ErrInfo =
"Illegal immediate value for operand.";
3803 if (!
MI.getOperand(
i).isImm() && !
MI.getOperand(
i).isFI()) {
3804 ErrInfo =
"Expected immediate, but got non-immediate";
3824 const bool IsVGPR = RI.
hasVGPRs(RC);
3825 const bool IsAGPR = !IsVGPR && RI.
hasAGPRs(RC);
3826 if ((IsVGPR || IsAGPR) && MO.
getSubReg()) {
3836 ErrInfo =
"Subtarget requires even aligned vector registers";
3841 if (RegClass != -1) {
3842 if (
Reg.isVirtual())
3847 ErrInfo =
"Operand has incorrect register class.";
3856 ErrInfo =
"SDWA is not supported on this target";
3862 const int OpIndicies[] = { DstIdx, Src0Idx, Src1Idx, Src2Idx };
3864 for (
int OpIdx: OpIndicies) {
3872 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
3879 "Only reg allowed as operands in SDWA instructions on GFX9+";
3888 if (OMod !=
nullptr &&
3890 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
3896 if (
isVOPC(BasicOpcode)) {
3900 if (!Dst.
isReg() || Dst.
getReg() != AMDGPU::VCC) {
3901 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
3907 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
3908 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
3914 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
3915 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
3926 ErrInfo =
"Dst register should have tied register";
3931 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
3934 "Dst register should be tied to implicit use of preserved register";
3938 ErrInfo =
"Dst register should use same physical register as preserved";
3945 if (
isMIMG(
MI.getOpcode()) && !
MI.mayStore()) {
3950 uint64_t DMaskImm = DMask->
getImm();
3970 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
3971 if (RegCount > DstSize) {
3972 ErrInfo =
"MIMG instruction returns too many registers for dst "
3981 if (Desc.
getOpcode() != AMDGPU::V_WRITELANE_B32
3986 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3988 unsigned ConstantBusCount = 0;
3989 bool UsesLiteral =
false;
3998 for (
int OpIdx : OpIndices) {
4005 if (
llvm::all_of(SGPRsUsed, [SGPRUsed](
unsigned SGPR) {
4006 return SGPRUsed != SGPR;
4009 SGPRsUsed.push_back(SGPRUsed);
4018 ErrInfo =
"VOP3 instruction uses more than one literal";
4026 if (SGPRUsed != AMDGPU::NoRegister) {
4028 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
4029 return !RI.regsOverlap(SGPRUsed, SGPR);
4032 SGPRsUsed.push_back(SGPRUsed);
4039 Opcode != AMDGPU::V_WRITELANE_B32) {
4040 ErrInfo =
"VOP* instruction violates constant bus restriction";
4045 ErrInfo =
"VOP3 instruction uses literal";
4052 if (Desc.
getOpcode() == AMDGPU::V_WRITELANE_B32) {
4053 unsigned SGPRCount = 0;
4054 Register SGPRUsed = AMDGPU::NoRegister;
4056 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx}) {
4064 if (MO.
getReg() != SGPRUsed)
4070 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
4077 if (Desc.
getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
4078 Desc.
getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
4085 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
4095 ErrInfo =
"ABS not allowed in VOP3B instructions";
4103 unsigned Immediates = 0;
4105 if (!Src0.
isReg() &&
4108 if (!Src1.
isReg() &&
4112 if (Immediates > 1) {
4113 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
4122 ErrInfo =
"invalid branch target for SOPK instruction";
4126 uint64_t Imm =
Op->getImm();
4129 ErrInfo =
"invalid immediate for SOPK instruction";
4134 ErrInfo =
"invalid immediate for SOPK instruction";
4141 if (Desc.
getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
4142 Desc.
getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
4143 Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
4144 Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
4145 const bool IsDst = Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
4146 Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
4150 const unsigned NumImplicitOps = IsDst ? 2 : 1;
4155 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
4156 ErrInfo =
"missing implicit register operands";
4162 if (!Dst->
isUse()) {
4163 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
4168 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
4169 UseOpIdx != StaticNumOps + 1) {
4170 ErrInfo =
"movrel implicit operands should be tied";
4177 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
4179 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
4180 ErrInfo =
"src0 should be subreg of implicit vector use";
4188 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
4189 ErrInfo =
"VALU instruction does not implicitly read exec mask";
4195 if (
MI.mayStore()) {
4200 ErrInfo =
"scalar stores must use m0 as offset register";
4208 if (
Offset->getImm() != 0) {
4209 ErrInfo =
"subtarget does not support offsets in flat instructions";
4218 AMDGPU::OpName::vaddr0);
4227 ErrInfo =
"dim is out of range";
4234 IsA16 = R128A16->
getImm() != 0;
4237 IsA16 = A16->
getImm() != 0;
4240 bool PackDerivatives = IsA16 || BaseOpcode->
G16;
4241 bool IsNSA = SRsrcIdx - VAddr0Idx > 1;
4247 AddrWords += (AddrComponents + 1) / 2;
4249 AddrWords += AddrComponents;
4252 if (PackDerivatives)
4260 unsigned VAddrWords;
4262 VAddrWords = SRsrcIdx - VAddr0Idx;
4268 else if (AddrWords > 4)
4270 else if (AddrWords == 4)
4272 else if (AddrWords == 3)
4276 if (VAddrWords != AddrWords) {
4278 <<
" but got " << VAddrWords <<
"\n");
4279 ErrInfo =
"bad vaddr size";
4297 ErrInfo =
"Invalid dpp_ctrl value";
4302 ErrInfo =
"Invalid dpp_ctrl value: "
4303 "wavefront shifts are not supported on GFX10+";
4308 ErrInfo =
"Invalid dpp_ctrl value: "
4309 "broadcasts are not supported on GFX10+";
4316 !ST.hasGFX90AInsts()) {
4317 ErrInfo =
"Invalid dpp_ctrl value: "
4318 "row_newbroadcast/row_share is not supported before "
4322 ErrInfo =
"Invalid dpp_ctrl value: "
4323 "row_share and row_xmask are not supported before GFX10";
4331 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
4334 Desc.
OpInfo[DstIdx].
RegClass == AMDGPU::VReg_64_Align2RegClassID)) ||
4338 AMDGPU::VReg_64_Align2RegClassID)))) &&
4340 ErrInfo =
"Invalid dpp_ctrl value: "
4341 "64 bit dpp only support row_newbcast";
4348 uint16_t DataNameIdx =
isDS(Opcode) ? AMDGPU::OpName::data0
4349 : AMDGPU::OpName::vdata;
4358 ErrInfo =
"Invalid register class: "
4359 "vdata and vdst should be both VGPR or AGPR";
4362 if (
Data && Data2 &&
4364 ErrInfo =
"Invalid register class: "
4365 "both data operands should be VGPR or AGPR";
4372 ErrInfo =
"Invalid register class: "
4373 "agpr loads and stores not supported on this GPU";
4383 switch (
MI.getOpcode()) {
4384 default:
return AMDGPU::INSTRUCTION_LIST_END;
4385 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
4386 case AMDGPU::COPY:
return AMDGPU::COPY;
4387 case AMDGPU::PHI:
return AMDGPU::PHI;
4388 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
4390 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
4391 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
4392 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
4393 case AMDGPU::S_MOV_B32: {
4395 return MI.getOperand(1).isReg() ||
4397 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
4399 case AMDGPU::S_ADD_I32:
4400 return ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
4401 case AMDGPU::S_ADDC_U32:
4402 return AMDGPU::V_ADDC_U32_e32;
4403 case AMDGPU::S_SUB_I32:
4404 return ST.
hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
4407 case AMDGPU::S_ADD_U32:
4408 return AMDGPU::V_ADD_CO_U32_e32;
4409 case AMDGPU::S_SUB_U32:
4410 return AMDGPU::V_SUB_CO_U32_e32;
4411 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
4412 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
4413 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
4414 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
4415 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
4416 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
4417 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
4418 case AMDGPU::S_XNOR_B32:
4419 return ST.
hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
4420 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
4421 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
4422 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
4423 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
4424 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
4425 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
4426 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
4427 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
4428 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
4429 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
4430 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
4431 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
4432 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
4433 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
4434 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
4435 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
4436 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
4437 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
4438 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e32;
4439 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e32;
4440 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e32;
4441 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e32;
4442 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e32;
4443 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e32;
4444 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e32;
4445 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e32;
4446 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e32;
4447 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e32;
4448 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e32;
4449 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e32;
4450 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e32;
4451 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e32;
4452 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
4453 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
4454 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
4455 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
4456 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
4457 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
4460 "Unexpected scalar opcode without corresponding vector one!");
4467 bool IsAllocatable) {
4472 case AMDGPU::AV_32RegClassID:
return AMDGPU::VGPR_32RegClassID;
4473 case AMDGPU::AV_64RegClassID:
return AMDGPU::VReg_64RegClassID;
4474 case AMDGPU::AV_96RegClassID:
return AMDGPU::VReg_96RegClassID;
4475 case AMDGPU::AV_128RegClassID:
return AMDGPU::VReg_128RegClassID;
4476 case AMDGPU::AV_160RegClassID:
return AMDGPU::VReg_160RegClassID;
4491 bool IsAllocatable =
false;
4501 AMDGPU::OpName::vdst);
4504 : AMDGPU::OpName::vdata);
4505 if (DataIdx != -1) {
4506 IsAllocatable = VDstIdx != -1 ||
4508 AMDGPU::OpName::data1) != -1;
4517 unsigned OpNo)
const {
4524 if (
Reg.isVirtual())
4539 unsigned RCID =
get(
MI.getOpcode()).OpInfo[OpIdx].RegClass;
4541 unsigned Size = RI.getRegSizeInBits(*RC);
4542 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO : AMDGPU::V_MOV_B32_e32;
4544 Opcode = AMDGPU::COPY;
4546 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
4550 if (RI.getCommonSubClass(VRC64, VRC))
4553 VRC = &AMDGPU::VGPR_32RegClass;
4572 if (SuperReg.
getSubReg() == AMDGPU::NoSubRegister) {
4588 .
addReg(NewSuperReg, 0, SubIdx);
4601 if (SubIdx == AMDGPU::sub0)
4603 if (SubIdx == AMDGPU::sub1)
4615 void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
4631 if (
Reg.isPhysical())
4642 DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg());
4669 MO = &
MI.getOperand(OpIdx);
4681 for (
unsigned i = 0,
e =
MI.getNumOperands();
i !=
e; ++
i) {
4687 if (!SGPRsUsed.
count(SGPR) &&
4689 if (--ConstantBusLimit <= 0)
4694 if (--ConstantBusLimit <= 0)
4698 if (!VOP3LiteralLimit--)
4700 if (--ConstantBusLimit <= 0)
4713 unsigned Opc =
MI.getOpcode();
4721 isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
4722 if ((
int)OpIdx == VDstIdx && DataIdx != -1 &&
4723 MI.getOperand(DataIdx).isReg() &&
4724 RI.
isAGPR(
MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
4726 if ((
int)OpIdx == DataIdx) {
4727 if (VDstIdx != -1 &&
4728 RI.
isAGPR(
MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
4732 AMDGPU::OpName::data1);
4733 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
4734 RI.
isAGPR(
MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
4737 if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
4757 unsigned Opc =
MI.getOpcode();
4777 if (Opc == AMDGPU::V_WRITELANE_B32) {