29#include "llvm/IR/IntrinsicsAMDGPU.h"
36#define DEBUG_TYPE "si-instr-info"
38#define GET_INSTRINFO_CTOR_DTOR
39#include "AMDGPUGenInstrInfo.inc"
43#define GET_D16ImageDimIntrinsics_IMPL
44#define GET_ImageDimIntrinsicTable_IMPL
45#define GET_RsrcIntrinsics_IMPL
46#include "AMDGPUGenSearchableTables.inc"
56 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
59 "amdgpu-fix-16-bit-physreg-copies",
60 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
75 unsigned N =
Node->getNumOperands();
90 if (Op0Idx == -1 && Op1Idx == -1)
94 if ((Op0Idx == -1 && Op1Idx != -1) ||
95 (Op1Idx == -1 && Op0Idx != -1))
122 return !
MI.hasImplicitDef() &&
123 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
124 !
MI.mayRaiseFPException();
134 if (
MI.isCompare()) {
140 switch (
Use.getOpcode()) {
141 case AMDGPU::S_AND_SAVEEXEC_B32:
142 case AMDGPU::S_AND_SAVEEXEC_B64:
144 case AMDGPU::S_AND_B32:
145 case AMDGPU::S_AND_B64:
146 if (!
Use.readsRegister(AMDGPU::EXEC))
156 switch (
MI.getOpcode()) {
159 case AMDGPU::V_READFIRSTLANE_B32:
174 int64_t &Offset1)
const {
182 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
200 if (Offset0Idx == -1 || Offset1Idx == -1)
207 Offset0Idx -=
get(Opc0).NumDefs;
208 Offset1Idx -=
get(Opc1).NumDefs;
209 Offset0 = cast<ConstantSDNode>(Load0->
getOperand(Offset0Idx))->getZExtValue();
210 Offset1 = cast<ConstantSDNode>(Load1->
getOperand(Offset1Idx))->getZExtValue();
229 assert(NumOps == 4 || NumOps == 5);
234 dyn_cast<ConstantSDNode>(Load0->
getOperand(NumOps - 3));
236 dyn_cast<ConstantSDNode>(Load1->
getOperand(NumOps - 3));
238 if (!Load0Offset || !Load1Offset)
258 if (OffIdx0 == -1 || OffIdx1 == -1)
264 OffIdx0 -=
get(Opc0).NumDefs;
265 OffIdx1 -=
get(Opc1).NumDefs;
271 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
274 Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
275 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
284 case AMDGPU::DS_READ2ST64_B32:
285 case AMDGPU::DS_READ2ST64_B64:
286 case AMDGPU::DS_WRITE2ST64_B32:
287 case AMDGPU::DS_WRITE2ST64_B64:
296 int64_t &
Offset,
bool &OffsetIsScalable,
unsigned &Width,
302 OffsetIsScalable =
false;
332 unsigned Offset0 = Offset0Op->
getImm();
333 unsigned Offset1 = Offset1Op->
getImm();
334 if (Offset0 + 1 != Offset1)
353 Offset = EltSize * Offset0;
356 if (DataOpIdx == -1) {
374 if (BaseOp && !BaseOp->
isFI())
382 if (SOffset->
isReg())
401 if (VAddr0Idx >= 0) {
403 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
457 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
465 if (MO1->getAddrSpace() != MO2->getAddrSpace())
468 auto Base1 = MO1->getValue();
469 auto Base2 = MO2->getValue();
470 if (!Base1 || !Base2)
475 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
478 return Base1 == Base2;
484 unsigned NumBytes)
const {
492 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
509 const unsigned LoadSize = NumBytes / NumLoads;
510 const unsigned NumDWORDs = ((LoadSize + 3) / 4) * NumLoads;
511 return NumDWORDs <= 8;
525 int64_t Offset0, int64_t Offset1,
526 unsigned NumLoads)
const {
527 assert(Offset1 > Offset0 &&
528 "Second offset should be larger than first offset!");
533 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
540 const char *Msg =
"illegal VGPR to SGPR copy") {
544 C.diagnose(IllegalCopy);
561 assert((
TII.getSubtarget().hasMAIInsts() &&
562 !
TII.getSubtarget().hasGFX90AInsts()) &&
563 "Expected GFX908 subtarget.");
566 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
567 "Source register of the copy should be either an SGPR or an AGPR.");
570 "Destination register of the copy should be an AGPR.");
581 if (!Def->definesRegister(SrcReg, &RI))
583 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
590 bool SafeToPropagate =
true;
593 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
594 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
595 SafeToPropagate =
false;
597 if (!SafeToPropagate)
609 if (ImpUseSuperReg) {
628 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
632 "VGPR used for an intermediate copy should have been reserved.");
637 while (RegNo-- && RS.
FindUnusedReg(&AMDGPU::VGPR_32RegClass)) {
646 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
647 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
648 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
655 if (ImpUseSuperReg) {
656 UseBuilder.
addReg(ImpUseSuperReg,
678 int16_t SubIdx = BaseIndices[
Idx];
679 Register Reg = RI.getSubReg(DestReg, SubIdx);
680 unsigned Opcode = AMDGPU::S_MOV_B32;
683 Register Src = RI.getSubReg(SrcReg, SubIdx);
684 bool AlignedDest = ((Reg - AMDGPU::SGPR0) % 2) == 0;
685 bool AlignedSrc = ((Src - AMDGPU::SGPR0) % 2) == 0;
686 if (AlignedDest && AlignedSrc && (
Idx + 1 < BaseIndices.
size())) {
690 Opcode = AMDGPU::S_MOV_B64;
694 LastMI =
BuildMI(
MBB,
I,
DL,
TII.get(Opcode), RI.getSubReg(DestReg, SubIdx))
695 .
addReg(RI.getSubReg(SrcReg, SubIdx))
705 assert(FirstMI && LastMI);
713 LastMI->addRegisterKilled(SrcReg, &RI);
725 ((RI.getRegSizeInBits(*RC) == 16) ^
726 (RI.getRegSizeInBits(*RI.getPhysRegBaseClass(SrcReg)) == 16))) {
727 MCRegister &RegToFix = (RI.getRegSizeInBits(*RC) == 16) ? DestReg : SrcReg;
729 assert(RI.getSubReg(Super, AMDGPU::lo16) == RegToFix);
732 if (DestReg == SrcReg) {
738 RC = RI.getPhysRegBaseClass(DestReg);
741 if (RC == &AMDGPU::VGPR_32RegClass) {
743 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
744 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
745 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
746 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
752 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
753 RC == &AMDGPU::SReg_32RegClass) {
754 if (SrcReg == AMDGPU::SCC) {
761 if (DestReg == AMDGPU::VCC_LO) {
762 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
776 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
786 if (RC == &AMDGPU::SReg_64RegClass) {
787 if (SrcReg == AMDGPU::SCC) {
794 if (DestReg == AMDGPU::VCC) {
795 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
809 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
819 if (DestReg == AMDGPU::SCC) {
822 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
840 if (RC == &AMDGPU::AGPR_32RegClass) {
841 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
842 (ST.
hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
857 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
862 const unsigned Size = RI.getRegSizeInBits(*RC);
865 AMDGPU::VGPR_HI16RegClass.
contains(SrcReg) ||
866 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
867 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
869 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
870 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
871 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
872 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
873 bool DstLow = AMDGPU::VGPR_LO16RegClass.contains(DestReg) ||
874 AMDGPU::SReg_LO16RegClass.contains(DestReg) ||
875 AMDGPU::AGPR_LO16RegClass.contains(DestReg);
876 bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
877 AMDGPU::SReg_LO16RegClass.contains(SrcReg) ||
878 AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
893 if (IsAGPRDst || IsAGPRSrc) {
894 if (!DstLow || !SrcLow) {
896 "Cannot use hi16 subreg with an AGPR!");
904 if (!DstLow || !SrcLow) {
906 "Cannot use hi16 subreg on VI!");
958 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
964 unsigned EltSize = 4;
965 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
968 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
971 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
973 Opcode = AMDGPU::INSTRUCTION_LIST_END;
975 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
981 Opcode = AMDGPU::V_MOV_B64_e32;
984 Opcode = AMDGPU::V_PK_MOV_B32;
994 std::unique_ptr<RegScavenger> RS;
995 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1002 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1003 const bool CanKillSuperReg = KillSrc && !Overlap;
1008 SubIdx = SubIndices[
Idx];
1010 SubIdx = SubIndices[SubIndices.
size() -
Idx - 1];
1012 bool IsFirstSubreg =
Idx == 0;
1013 bool UseKill = CanKillSuperReg &&
Idx == SubIndices.
size() - 1;
1015 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1019 RI.getSubReg(SrcReg, SubIdx), UseKill, *RS, Overlap,
1020 ImpDefSuper, ImpUseSuper);
1021 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1022 Register DstSubReg = RI.getSubReg(DestReg, SubIdx);
1023 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1041 .
addReg(RI.getSubReg(SrcReg, SubIdx));
1071 int64_t
Value)
const {
1074 if (RegClass == &AMDGPU::SReg_32RegClass ||
1075 RegClass == &AMDGPU::SGPR_32RegClass ||
1076 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
1077 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
1083 if (RegClass == &AMDGPU::SReg_64RegClass ||
1084 RegClass == &AMDGPU::SGPR_64RegClass ||
1085 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
1091 if (RegClass == &AMDGPU::VGPR_32RegClass) {
1102 unsigned EltSize = 4;
1103 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1105 if (RI.getRegSizeInBits(*RegClass) > 32) {
1106 Opcode = AMDGPU::S_MOV_B64;
1109 Opcode = AMDGPU::S_MOV_B32;
1116 int64_t IdxValue =
Idx == 0 ?
Value : 0;
1119 get(Opcode), RI.getSubReg(DestReg, SubIndices[
Idx]));
1126 return &AMDGPU::VGPR_32RegClass;
1138 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1139 "Not a VGPR32 reg");
1141 if (
Cond.size() == 1) {
1142 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1151 }
else if (
Cond.size() == 2) {
1153 switch (
Cond[0].getImm()) {
1154 case SIInstrInfo::SCC_TRUE: {
1155 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1157 : AMDGPU::S_CSELECT_B64), SReg)
1168 case SIInstrInfo::SCC_FALSE: {
1169 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1171 : AMDGPU::S_CSELECT_B64), SReg)
1182 case SIInstrInfo::VCCNZ: {
1185 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1196 case SIInstrInfo::VCCZ: {
1199 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1210 case SIInstrInfo::EXECNZ: {
1211 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1214 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1217 : AMDGPU::S_CSELECT_B64), SReg)
1228 case SIInstrInfo::EXECZ: {
1229 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1232 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1235 : AMDGPU::S_CSELECT_B64), SReg)
1284 return AMDGPU::COPY;
1285 if (RI.getRegSizeInBits(*DstRC) == 32) {
1286 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1287 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC)) {
1288 return AMDGPU::S_MOV_B64;
1289 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC)) {
1290 return AMDGPU::V_MOV_B64_PSEUDO;
1292 return AMDGPU::COPY;
1297 bool IsIndirectSrc)
const {
1298 if (IsIndirectSrc) {
1300 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1302 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1304 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1306 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1308 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1310 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1312 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1314 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1316 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1318 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1320 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1321 if (VecSize <= 1024)
1322 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1328 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1330 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1332 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1334 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1336 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1338 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1340 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1342 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1344 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1346 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1348 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1349 if (VecSize <= 1024)
1350 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1357 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1359 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1361 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1363 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1365 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1367 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1369 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1371 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1373 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1375 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1377 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1378 if (VecSize <= 1024)
1379 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1386 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1388 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1390 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1392 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1394 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1396 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1398 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1399 if (VecSize <= 1024)
1400 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1407 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1409 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1411 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1413 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1414 if (VecSize <= 1024)
1415 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1422 bool IsSGPR)
const {
1434 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1441 return AMDGPU::SI_SPILL_S32_SAVE;
1443 return AMDGPU::SI_SPILL_S64_SAVE;
1445 return AMDGPU::SI_SPILL_S96_SAVE;
1447 return AMDGPU::SI_SPILL_S128_SAVE;
1449 return AMDGPU::SI_SPILL_S160_SAVE;
1451 return AMDGPU::SI_SPILL_S192_SAVE;
1453 return AMDGPU::SI_SPILL_S224_SAVE;
1455 return AMDGPU::SI_SPILL_S256_SAVE;
1457 return AMDGPU::SI_SPILL_S288_SAVE;
1459 return AMDGPU::SI_SPILL_S320_SAVE;
1461 return AMDGPU::SI_SPILL_S352_SAVE;
1463 return AMDGPU::SI_SPILL_S384_SAVE;
1465 return AMDGPU::SI_SPILL_S512_SAVE;
1467 return AMDGPU::SI_SPILL_S1024_SAVE;
1476 return AMDGPU::SI_SPILL_V32_SAVE;
1478 return AMDGPU::SI_SPILL_V64_SAVE;
1480 return AMDGPU::SI_SPILL_V96_SAVE;
1482 return AMDGPU::SI_SPILL_V128_SAVE;
1484 return AMDGPU::SI_SPILL_V160_SAVE;
1486 return AMDGPU::SI_SPILL_V192_SAVE;
1488 return AMDGPU::SI_SPILL_V224_SAVE;
1490 return AMDGPU::SI_SPILL_V256_SAVE;
1492 return AMDGPU::SI_SPILL_V288_SAVE;
1494 return AMDGPU::SI_SPILL_V320_SAVE;
1496 return AMDGPU::SI_SPILL_V352_SAVE;
1498 return AMDGPU::SI_SPILL_V384_SAVE;
1500 return AMDGPU::SI_SPILL_V512_SAVE;
1502 return AMDGPU::SI_SPILL_V1024_SAVE;
1511 return AMDGPU::SI_SPILL_A32_SAVE;
1513 return AMDGPU::SI_SPILL_A64_SAVE;
1515 return AMDGPU::SI_SPILL_A96_SAVE;
1517 return AMDGPU::SI_SPILL_A128_SAVE;
1519 return AMDGPU::SI_SPILL_A160_SAVE;
1521 return AMDGPU::SI_SPILL_A192_SAVE;
1523 return AMDGPU::SI_SPILL_A224_SAVE;
1525 return AMDGPU::SI_SPILL_A256_SAVE;
1527 return AMDGPU::SI_SPILL_A288_SAVE;
1529 return AMDGPU::SI_SPILL_A320_SAVE;
1531 return AMDGPU::SI_SPILL_A352_SAVE;
1533 return AMDGPU::SI_SPILL_A384_SAVE;
1535 return AMDGPU::SI_SPILL_A512_SAVE;
1537 return AMDGPU::SI_SPILL_A1024_SAVE;
1546 return AMDGPU::SI_SPILL_AV32_SAVE;
1548 return AMDGPU::SI_SPILL_AV64_SAVE;
1550 return AMDGPU::SI_SPILL_AV96_SAVE;
1552 return AMDGPU::SI_SPILL_AV128_SAVE;
1554 return AMDGPU::SI_SPILL_AV160_SAVE;
1556 return AMDGPU::SI_SPILL_AV192_SAVE;
1558 return AMDGPU::SI_SPILL_AV224_SAVE;
1560 return AMDGPU::SI_SPILL_AV256_SAVE;
1562 return AMDGPU::SI_SPILL_AV288_SAVE;
1564 return AMDGPU::SI_SPILL_AV320_SAVE;
1566 return AMDGPU::SI_SPILL_AV352_SAVE;
1568 return AMDGPU::SI_SPILL_AV384_SAVE;
1570 return AMDGPU::SI_SPILL_AV512_SAVE;
1572 return AMDGPU::SI_SPILL_AV1024_SAVE;
1591 FrameInfo.getObjectAlign(FrameIndex));
1592 unsigned SpillSize =
TRI->getSpillSize(*RC);
1597 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1598 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1599 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1607 if (SrcReg.
isVirtual() && SpillSize == 4) {
1608 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1640 return AMDGPU::SI_SPILL_S32_RESTORE;
1642 return AMDGPU::SI_SPILL_S64_RESTORE;
1644 return AMDGPU::SI_SPILL_S96_RESTORE;
1646 return AMDGPU::SI_SPILL_S128_RESTORE;
1648 return AMDGPU::SI_SPILL_S160_RESTORE;
1650 return AMDGPU::SI_SPILL_S192_RESTORE;
1652 return AMDGPU::SI_SPILL_S224_RESTORE;
1654 return AMDGPU::SI_SPILL_S256_RESTORE;
1656 return AMDGPU::SI_SPILL_S288_RESTORE;
1658 return AMDGPU::SI_SPILL_S320_RESTORE;
1660 return AMDGPU::SI_SPILL_S352_RESTORE;
1662 return AMDGPU::SI_SPILL_S384_RESTORE;
1664 return AMDGPU::SI_SPILL_S512_RESTORE;
1666 return AMDGPU::SI_SPILL_S1024_RESTORE;
1675 return AMDGPU::SI_SPILL_V32_RESTORE;
1677 return AMDGPU::SI_SPILL_V64_RESTORE;
1679 return AMDGPU::SI_SPILL_V96_RESTORE;
1681 return AMDGPU::SI_SPILL_V128_RESTORE;
1683 return AMDGPU::SI_SPILL_V160_RESTORE;
1685 return AMDGPU::SI_SPILL_V192_RESTORE;
1687 return AMDGPU::SI_SPILL_V224_RESTORE;
1689 return AMDGPU::SI_SPILL_V256_RESTORE;
1691 return AMDGPU::SI_SPILL_V288_RESTORE;
1693 return AMDGPU::SI_SPILL_V320_RESTORE;
1695 return AMDGPU::SI_SPILL_V352_RESTORE;
1697 return AMDGPU::SI_SPILL_V384_RESTORE;
1699 return AMDGPU::SI_SPILL_V512_RESTORE;
1701 return AMDGPU::SI_SPILL_V1024_RESTORE;
1710 return AMDGPU::SI_SPILL_A32_RESTORE;
1712 return AMDGPU::SI_SPILL_A64_RESTORE;
1714 return AMDGPU::SI_SPILL_A96_RESTORE;
1716 return AMDGPU::SI_SPILL_A128_RESTORE;
1718 return AMDGPU::SI_SPILL_A160_RESTORE;
1720 return AMDGPU::SI_SPILL_A192_RESTORE;
1722 return AMDGPU::SI_SPILL_A224_RESTORE;
1724 return AMDGPU::SI_SPILL_A256_RESTORE;
1726 return AMDGPU::SI_SPILL_A288_RESTORE;
1728 return AMDGPU::SI_SPILL_A320_RESTORE;
1730 return AMDGPU::SI_SPILL_A352_RESTORE;
1732 return AMDGPU::SI_SPILL_A384_RESTORE;
1734 return AMDGPU::SI_SPILL_A512_RESTORE;
1736 return AMDGPU::SI_SPILL_A1024_RESTORE;
1745 return AMDGPU::SI_SPILL_AV32_RESTORE;
1747 return AMDGPU::SI_SPILL_AV64_RESTORE;
1749 return AMDGPU::SI_SPILL_AV96_RESTORE;
1751 return AMDGPU::SI_SPILL_AV128_RESTORE;
1753 return AMDGPU::SI_SPILL_AV160_RESTORE;
1755 return AMDGPU::SI_SPILL_AV192_RESTORE;
1757 return AMDGPU::SI_SPILL_AV224_RESTORE;
1759 return AMDGPU::SI_SPILL_AV256_RESTORE;
1761 return AMDGPU::SI_SPILL_AV288_RESTORE;
1763 return AMDGPU::SI_SPILL_AV320_RESTORE;
1765 return AMDGPU::SI_SPILL_AV352_RESTORE;
1767 return AMDGPU::SI_SPILL_AV384_RESTORE;
1769 return AMDGPU::SI_SPILL_AV512_RESTORE;
1771 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1787 unsigned SpillSize =
TRI->getSpillSize(*RC);
1794 FrameInfo.getObjectAlign(FrameIndex));
1798 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1799 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1800 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1805 if (DestReg.
isVirtual() && SpillSize == 4) {
1807 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1839 unsigned Quantity)
const {
1841 while (Quantity > 0) {
1842 unsigned Arg = std::min(Quantity, 8u);
1856 if (HasNoTerminator) {
1857 if (
Info->returnsVoid()) {
1867 switch (
MI.getOpcode()) {
1869 if (
MI.isMetaInstruction())
1874 return MI.getOperand(0).getImm() + 1;
1884 switch (
MI.getOpcode()) {
1886 case AMDGPU::S_MOV_B64_term:
1889 MI.setDesc(
get(AMDGPU::S_MOV_B64));
1892 case AMDGPU::S_MOV_B32_term:
1895 MI.setDesc(
get(AMDGPU::S_MOV_B32));
1898 case AMDGPU::S_XOR_B64_term:
1901 MI.setDesc(
get(AMDGPU::S_XOR_B64));
1904 case AMDGPU::S_XOR_B32_term:
1907 MI.setDesc(
get(AMDGPU::S_XOR_B32));
1909 case AMDGPU::S_OR_B64_term:
1912 MI.setDesc(
get(AMDGPU::S_OR_B64));
1914 case AMDGPU::S_OR_B32_term:
1917 MI.setDesc(
get(AMDGPU::S_OR_B32));
1920 case AMDGPU::S_ANDN2_B64_term:
1923 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
1926 case AMDGPU::S_ANDN2_B32_term:
1929 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
1932 case AMDGPU::S_AND_B64_term:
1935 MI.setDesc(
get(AMDGPU::S_AND_B64));
1938 case AMDGPU::S_AND_B32_term:
1941 MI.setDesc(
get(AMDGPU::S_AND_B32));
1944 case AMDGPU::V_MOV_B64_PSEUDO: {
1946 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
1947 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
1953 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
1958 if (
SrcOp.isImm()) {
1960 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
1961 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2004 MI.eraseFromParent();
2007 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2011 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2016 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2021 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2022 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2024 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2025 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2032 MI.eraseFromParent();
2035 case AMDGPU::V_SET_INACTIVE_B32: {
2036 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
2037 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2041 .
add(
MI.getOperand(1));
2045 .
add(
MI.getOperand(2));
2048 MI.eraseFromParent();
2051 case AMDGPU::V_SET_INACTIVE_B64: {
2052 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
2053 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2055 MI.getOperand(0).getReg())
2056 .
add(
MI.getOperand(1));
2061 MI.getOperand(0).getReg())
2062 .
add(
MI.getOperand(2));
2066 MI.eraseFromParent();
2069 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2070 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2071 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2072 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2073 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2074 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2075 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2076 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2077 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2078 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2079 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2080 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2081 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2082 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2083 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2084 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2085 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2086 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2087 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2088 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2089 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2090 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2091 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2092 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2093 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2098 Opc = AMDGPU::V_MOVRELD_B32_e32;
2100 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2101 : AMDGPU::S_MOVRELD_B32;
2106 bool IsUndef =
MI.getOperand(1).isUndef();
2107 unsigned SubReg =
MI.getOperand(3).getImm();
2108 assert(VecReg ==
MI.getOperand(1).getReg());
2113 .
add(
MI.getOperand(2))
2117 const int ImpDefIdx =
2119 const int ImpUseIdx = ImpDefIdx + 1;
2121 MI.eraseFromParent();
2124 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2125 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2126 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2127 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2128 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2129 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2130 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2131 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2132 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2133 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2134 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2135 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2138 bool IsUndef =
MI.getOperand(1).isUndef();
2147 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2151 .
add(
MI.getOperand(2))
2156 const int ImpDefIdx =
2158 const int ImpUseIdx = ImpDefIdx + 1;
2165 MI.eraseFromParent();
2168 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2169 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2170 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2171 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2172 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2173 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2174 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2175 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2176 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2177 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2178 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2179 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2183 bool IsUndef =
MI.getOperand(1).isUndef();
2201 MI.eraseFromParent();
2204 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2207 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2208 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2219 .add(
MI.getOperand(1)));
2223 MIB.
add(
MI.getOperand(2));
2228 MI.eraseFromParent();
2231 case AMDGPU::ENTER_STRICT_WWM: {
2235 : AMDGPU::S_OR_SAVEEXEC_B64));
2238 case AMDGPU::ENTER_STRICT_WQM: {
2241 const unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2242 const unsigned WQMOp = ST.
isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
2243 const unsigned MovOp = ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2247 MI.eraseFromParent();
2250 case AMDGPU::EXIT_STRICT_WWM:
2251 case AMDGPU::EXIT_STRICT_WQM: {
2254 MI.setDesc(
get(ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
2257 case AMDGPU::ENTER_PSEUDO_WM:
2258 case AMDGPU::EXIT_PSEUDO_WM: {
2260 MI.eraseFromParent();
2263 case AMDGPU::SI_RETURN: {
2277 MI.eraseFromParent();
2284std::pair<MachineInstr*, MachineInstr*>
2286 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2291 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2292 return std::pair(&
MI,
nullptr);
2303 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2305 if (Dst.isPhysical()) {
2306 MovDPP.addDef(RI.getSubReg(Dst, Sub));
2309 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2313 for (
unsigned I = 1;
I <= 2; ++
I) {
2316 if (
SrcOp.isImm()) {
2318 Imm.ashrInPlace(Part * 32);
2319 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2323 if (Src.isPhysical())
2324 MovDPP.addReg(RI.getSubReg(Src, Sub));
2331 MovDPP.addImm(MO.getImm());
2333 Split[Part] = MovDPP;
2337 if (Dst.isVirtual())
2344 MI.eraseFromParent();
2345 return std::pair(Split[0], Split[1]);
2350 unsigned Src0OpName,
2352 unsigned Src1OpName)
const {
2359 "All commutable instructions have both src0 and src1 modifiers");
2361 int Src0ModsVal = Src0Mods->
getImm();
2362 int Src1ModsVal = Src1Mods->
getImm();
2364 Src1Mods->
setImm(Src0ModsVal);
2365 Src0Mods->
setImm(Src1ModsVal);
2374 bool IsKill = RegOp.
isKill();
2376 bool IsUndef = RegOp.
isUndef();
2377 bool IsDebug = RegOp.
isDebug();
2379 if (NonRegOp.
isImm())
2381 else if (NonRegOp.
isFI())
2400 unsigned Src1Idx)
const {
2401 assert(!NewMI &&
"this should never be used");
2403 unsigned Opc =
MI.getOpcode();
2405 if (CommutedOpcode == -1)
2409 static_cast<int>(Src0Idx) &&
2411 static_cast<int>(Src1Idx) &&
2412 "inconsistency with findCommutedOpIndices");
2439 Src1, AMDGPU::OpName::src1_modifiers);
2451 unsigned &SrcOpIdx0,
2452 unsigned &SrcOpIdx1)
const {
2457 unsigned &SrcOpIdx0,
2458 unsigned &SrcOpIdx1)
const {
2471 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2475 int64_t BrOffset)
const {
2478 assert(BranchOp != AMDGPU::S_SETPC_B64);
2492 if (
MI.getOpcode() == AMDGPU::S_SETPC_B64) {
2498 return MI.getOperand(0).getMBB();
2503 if (
MI.getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO ||
2504 MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2505 MI.getOpcode() == AMDGPU::SI_LOOP)
2516 assert(RS &&
"RegScavenger required for long branching");
2518 "new block should be inserted for expanding unconditional branch");
2521 "restore block should be inserted for restoring clobbered registers");
2528 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2538 MCCtx.createTempSymbol(
"post_getpc",
true);
2542 MCCtx.createTempSymbol(
"offset_lo",
true);
2544 MCCtx.createTempSymbol(
"offset_hi",
true);
2547 .
addReg(PCReg, 0, AMDGPU::sub0)
2551 .
addReg(PCReg, 0, AMDGPU::sub1)
2594 MRI.replaceRegWith(PCReg, Scav);
2595 MRI.clearVirtRegs();
2601 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
2602 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
2603 MRI.clearVirtRegs();
2618unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
2620 case SIInstrInfo::SCC_TRUE:
2621 return AMDGPU::S_CBRANCH_SCC1;
2622 case SIInstrInfo::SCC_FALSE:
2623 return AMDGPU::S_CBRANCH_SCC0;
2624 case SIInstrInfo::VCCNZ:
2625 return AMDGPU::S_CBRANCH_VCCNZ;
2626 case SIInstrInfo::VCCZ:
2627 return AMDGPU::S_CBRANCH_VCCZ;
2628 case SIInstrInfo::EXECNZ:
2629 return AMDGPU::S_CBRANCH_EXECNZ;
2630 case SIInstrInfo::EXECZ:
2631 return AMDGPU::S_CBRANCH_EXECZ;
2637SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
2639 case AMDGPU::S_CBRANCH_SCC0:
2641 case AMDGPU::S_CBRANCH_SCC1:
2643 case AMDGPU::S_CBRANCH_VCCNZ:
2645 case AMDGPU::S_CBRANCH_VCCZ:
2647 case AMDGPU::S_CBRANCH_EXECNZ:
2649 case AMDGPU::S_CBRANCH_EXECZ:
2661 bool AllowModify)
const {
2662 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
2664 TBB =
I->getOperand(0).getMBB();
2670 if (
I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
2671 CondBB =
I->getOperand(1).getMBB();
2672 Cond.push_back(
I->getOperand(0));
2674 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
2675 if (Pred == INVALID_BR)
2678 CondBB =
I->getOperand(0).getMBB();
2680 Cond.push_back(
I->getOperand(1));
2690 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
2692 FBB =
I->getOperand(0).getMBB();
2702 bool AllowModify)
const {
2710 while (
I !=
E && !
I->isBranch() && !
I->isReturn()) {
2711 switch (
I->getOpcode()) {
2712 case AMDGPU::S_MOV_B64_term:
2713 case AMDGPU::S_XOR_B64_term:
2714 case AMDGPU::S_OR_B64_term:
2715 case AMDGPU::S_ANDN2_B64_term:
2716 case AMDGPU::S_AND_B64_term:
2717 case AMDGPU::S_MOV_B32_term:
2718 case AMDGPU::S_XOR_B32_term:
2719 case AMDGPU::S_OR_B32_term:
2720 case AMDGPU::S_ANDN2_B32_term:
2721 case AMDGPU::S_AND_B32_term:
2724 case AMDGPU::SI_ELSE:
2725 case AMDGPU::SI_KILL_I1_TERMINATOR:
2726 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
2743 int *BytesRemoved)
const {
2745 unsigned RemovedSize = 0;
2748 if (
MI.isBranch() ||
MI.isReturn()) {
2750 MI.eraseFromParent();
2756 *BytesRemoved = RemovedSize;
2773 int *BytesAdded)
const {
2774 if (!FBB &&
Cond.empty()) {
2782 if(
Cond.size() == 1 &&
Cond[0].isReg()) {
2792 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].getImm()));
2830 if (
Cond.size() != 2) {
2845 Register FalseReg,
int &CondCycles,
2846 int &TrueCycles,
int &FalseCycles)
const {
2847 switch (
Cond[0].getImm()) {
2852 if (
MRI.getRegClass(FalseReg) != RC)
2856 CondCycles = TrueCycles = FalseCycles = NumInsts;
2859 return RI.
hasVGPRs(RC) && NumInsts <= 6;
2867 if (
MRI.getRegClass(FalseReg) != RC)
2873 if (NumInsts % 2 == 0)
2876 CondCycles = TrueCycles = FalseCycles = NumInsts;
2888 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
2889 if (Pred == VCCZ || Pred == SCC_FALSE) {
2890 Pred =
static_cast<BranchPredicate
>(-Pred);
2896 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
2898 if (DstSize == 32) {
2900 if (Pred == SCC_TRUE) {
2915 if (DstSize == 64 && Pred == SCC_TRUE) {
2925 static const int16_t Sub0_15[] = {
2926 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
2927 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
2928 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
2929 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
2932 static const int16_t Sub0_15_64[] = {
2933 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
2934 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
2935 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
2936 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
2939 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
2941 const int16_t *SubIndices = Sub0_15;
2942 int NElts = DstSize / 32;
2946 if (Pred == SCC_TRUE) {
2948 SelOp = AMDGPU::S_CSELECT_B32;
2949 EltRC = &AMDGPU::SGPR_32RegClass;
2951 SelOp = AMDGPU::S_CSELECT_B64;
2952 EltRC = &AMDGPU::SGPR_64RegClass;
2953 SubIndices = Sub0_15_64;
2959 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
2964 for (
int Idx = 0;
Idx != NElts; ++
Idx) {
2965 Register DstElt =
MRI.createVirtualRegister(EltRC);
2968 unsigned SubIdx = SubIndices[
Idx];
2971 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
2974 .
addReg(FalseReg, 0, SubIdx)
2975 .
addReg(TrueReg, 0, SubIdx);
2979 .
addReg(TrueReg, 0, SubIdx)
2980 .
addReg(FalseReg, 0, SubIdx);
2992 switch (
MI.getOpcode()) {
2993 case AMDGPU::V_MOV_B32_e32:
2994 case AMDGPU::V_MOV_B32_e64:
2995 case AMDGPU::V_MOV_B64_PSEUDO:
2996 case AMDGPU::V_MOV_B64_e32:
2997 case AMDGPU::V_MOV_B64_e64:
2998 case AMDGPU::S_MOV_B32:
2999 case AMDGPU::S_MOV_B64:
3001 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3002 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3003 case AMDGPU::V_ACCVGPR_MOV_B32:
3011 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3012 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3013 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3016 unsigned Opc =
MI.getOpcode();
3020 MI.removeOperand(
Idx);
3026 if (!
MRI->hasOneNonDBGUse(Reg))
3029 switch (
DefMI.getOpcode()) {
3032 case AMDGPU::S_MOV_B64:
3037 case AMDGPU::V_MOV_B32_e32:
3038 case AMDGPU::S_MOV_B32:
3039 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3046 if (!ImmOp->
isImm())
3049 unsigned Opc =
UseMI.getOpcode();
3050 if (Opc == AMDGPU::COPY) {
3054 unsigned NewOpc =
isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
3057 if (
UseMI.getOperand(1).getSubReg() == AMDGPU::hi16)
3063 NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
3070 if (DstReg.
isVirtual() &&
UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
3073 UseMI.getOperand(0).setSubReg(0);
3076 UseMI.getOperand(0).setReg(DstReg);
3082 UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
3087 if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3088 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3089 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3090 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3091 Opc == AMDGPU::V_FMAC_F16_t16_e64) {
3106 bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3107 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64;
3109 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3110 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3111 Opc == AMDGPU::V_FMAC_F16_t16_e64;
3125 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3127 : AMDGPU::V_FMAMK_F16)
3128 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
3134 const int64_t Imm = ImmOp->
getImm();
3140 unsigned Src1SubReg = Src1->
getSubReg();
3145 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3146 Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3147 Opc == AMDGPU::V_FMAC_F16_e64)
3148 UseMI.untieRegOperand(
3156 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3158 DefMI.eraseFromParent();
3167 bool Src0Inlined =
false;
3168 if (Src0->
isReg()) {
3173 if (Def && Def->isMoveImmediate() &&
3188 if (Src1->
isReg() && !Src0Inlined ) {
3191 if (Def && Def->isMoveImmediate() &&
3194 commuteInstruction(
UseMI)) {
3205 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3207 : AMDGPU::V_FMAAK_F16)
3208 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
3212 const int64_t Imm = ImmOp->
getImm();
3217 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3218 Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3219 Opc == AMDGPU::V_FMAC_F16_e64)
3220 UseMI.untieRegOperand(
3234 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3236 DefMI.eraseFromParent();
3248 if (BaseOps1.
size() != BaseOps2.
size())
3250 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3251 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3258 int WidthB,
int OffsetB) {
3259 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3260 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3261 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3262 return LowOffset + LowWidth <= HighOffset;
3265bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3268 int64_t Offset0, Offset1;
3269 unsigned Dummy0, Dummy1;
3270 bool Offset0IsScalable, Offset1IsScalable;
3284 unsigned Width0 = MIa.
memoperands().front()->getSize();
3285 unsigned Width1 = MIb.
memoperands().front()->getSize();
3292 "MIa must load from or modify a memory location");
3294 "MIb must load from or modify a memory location");
3310 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3317 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3324 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3331 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3341 if (Reg.isPhysical())
3343 auto *Def =
MRI.getUniqueVRegDef(Reg);
3345 Imm = Def->getOperand(1).getImm();
3365 unsigned NumOps =
MI.getNumOperands();
3366 for (
unsigned I = 1;
I < NumOps; ++
I) {
3368 if (Op.isReg() && Op.isKill())
3378 unsigned Opc =
MI.getOpcode();
3382 if (NewMFMAOpc != -1) {
3385 for (
unsigned I = 0,
E =
MI.getNumOperands();
I !=
E; ++
I)
3386 MIB.
add(
MI.getOperand(
I));
3397 for (
unsigned I = 0,
E =
MI.getNumOperands();
I !=
E; ++
I)
3407 assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
3408 "V_FMAC_F16_t16_e32 is not supported and not expected to be present "
3412 bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
3413 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3414 Opc == AMDGPU::V_FMAC_F16_t16_e64;
3415 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3416 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3417 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
3418 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3419 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3420 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3421 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3422 bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
3423 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
3424 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3425 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
3426 bool Src0Literal =
false;
3431 case AMDGPU::V_MAC_F16_e64:
3432 case AMDGPU::V_FMAC_F16_e64:
3433 case AMDGPU::V_FMAC_F16_t16_e64:
3434 case AMDGPU::V_MAC_F32_e64:
3435 case AMDGPU::V_MAC_LEGACY_F32_e64:
3436 case AMDGPU::V_FMAC_F32_e64:
3437 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3438 case AMDGPU::V_FMAC_F64_e64:
3440 case AMDGPU::V_MAC_F16_e32:
3441 case AMDGPU::V_FMAC_F16_e32:
3442 case AMDGPU::V_MAC_F32_e32:
3443 case AMDGPU::V_MAC_LEGACY_F32_e32:
3444 case AMDGPU::V_FMAC_F32_e32:
3445 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3446 case AMDGPU::V_FMAC_F64_e32: {
3448 AMDGPU::OpName::src0);
3475 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
3481 const auto killDef = [&]() ->
void {
3485 if (!
MRI.hasOneNonDBGUse(DefReg))
3499 : AMDGPU::V_FMAAK_F16)
3500 : AMDGPU::V_FMAAK_F32)
3501 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
3517 : AMDGPU::V_FMAMK_F16)
3518 : AMDGPU::V_FMAMK_F32)
3519 : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
3563 unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
3564 : IsF64 ? AMDGPU::V_FMA_F64_e64
3566 ? AMDGPU::V_FMA_LEGACY_F32_e64
3567 : AMDGPU::V_FMA_F32_e64
3568 : IsF16 ? AMDGPU::V_MAD_F16_e64
3569 : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
3570 : AMDGPU::V_MAD_F32_e64;
3585 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
3596 switch (
MI.getOpcode()) {
3597 case AMDGPU::S_SET_GPR_IDX_ON:
3598 case AMDGPU::S_SET_GPR_IDX_MODE:
3599 case AMDGPU::S_SET_GPR_IDX_OFF:
3617 if (
MI.isTerminator() ||
MI.isPosition())
3621 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
3624 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
3630 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
3631 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
3632 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
3633 MI.getOpcode() == AMDGPU::S_SETPRIO ||
3638 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
3639 Opcode == AMDGPU::DS_GWS_INIT ||
3640 Opcode == AMDGPU::DS_GWS_SEMA_V ||
3641 Opcode == AMDGPU::DS_GWS_SEMA_BR ||
3642 Opcode == AMDGPU::DS_GWS_SEMA_P ||
3643 Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
3644 Opcode == AMDGPU::DS_GWS_BARRIER;
3655 unsigned Opcode =
MI.getOpcode();
3670 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
3672 Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP ||
3673 Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER)
3676 if (
MI.isCall() ||
MI.isInlineAsm())
3688 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
3689 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32)
3697 if (
MI.isMetaInstruction())
3701 if (
MI.isCopyLike()) {
3706 return MI.readsRegister(AMDGPU::EXEC, &RI);
3717 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
3721 switch (Imm.getBitWidth()) {
3741 uint8_t OperandType)
const {
3742 assert(!MO.
isReg() &&
"isInlineConstant called on register operand!");
3753 int64_t Imm = MO.
getImm();
3754 switch (OperandType) {
3766 int32_t Trunc =
static_cast<int32_t
>(Imm);
3799 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
3804 int16_t Trunc =
static_cast<int16_t
>(Imm);
3856 AMDGPU::OpName::src2))
3872 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.
hasGFX90AInsts())
3892 return Mods && Mods->
getImm();
3905 switch (
MI.getOpcode()) {
3906 default:
return false;
3908 case AMDGPU::V_ADDC_U32_e64:
3909 case AMDGPU::V_SUBB_U32_e64:
3910 case AMDGPU::V_SUBBREV_U32_e64: {
3918 case AMDGPU::V_MAC_F16_e64:
3919 case AMDGPU::V_MAC_F32_e64:
3920 case AMDGPU::V_MAC_LEGACY_F32_e64:
3921 case AMDGPU::V_FMAC_F16_e64:
3922 case AMDGPU::V_FMAC_F16_t16_e64:
3923 case AMDGPU::V_FMAC_F32_e64:
3924 case AMDGPU::V_FMAC_F64_e64:
3925 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3931 case AMDGPU::V_CNDMASK_B32_e64:
3962 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
3971 unsigned Op32)
const {
3981 Inst32.
add(
MI.getOperand(0));
3985 assert(((
MI.getOperand(0).getReg() == AMDGPU::VCC) ||
3986 (
MI.getOperand(0).getReg() == AMDGPU::VCC_LO)) &&
4000 if (Op32Src2Idx != -1) {
4030 if (MO.
getReg() == AMDGPU::SGPR_NULL || MO.
getReg() == AMDGPU::SGPR_NULL64)
4035 return MO.
getReg() == AMDGPU::M0 ||
4036 MO.
getReg() == AMDGPU::VCC ||
4037 MO.
getReg() == AMDGPU::VCC_LO;
4039 return AMDGPU::SReg_32RegClass.contains(MO.
getReg()) ||
4040 AMDGPU::SReg_64RegClass.contains(MO.
getReg());
4050 switch (MO.getReg()) {
4052 case AMDGPU::VCC_LO:
4053 case AMDGPU::VCC_HI:
4055 case AMDGPU::FLAT_SCR:
4068 switch (
MI.getOpcode()) {
4069 case AMDGPU::V_READLANE_B32:
4070 case AMDGPU::V_WRITELANE_B32:
4077 if (
MI.isPreISelOpcode() ||
4078 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4089 if (
SubReg.getReg().isPhysical())
4092 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4099 if (SIInstrInfo::isGenericOpcode(
MI.getOpcode()))
4109 if (Src0Idx == -1) {
4121 ErrInfo =
"Instruction has wrong number of operands.";
4125 if (
MI.isInlineAsm()) {
4138 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
4139 ErrInfo =
"inlineasm operand has incorrect register class.";
4147 if (
isMIMG(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4148 ErrInfo =
"missing memory operand from MIMG instruction.";
4156 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4157 "all fp values to integers.";
4161 int RegClass = Desc.
operands()[i].RegClass;
4163 switch (Desc.
operands()[i].OperandType) {
4165 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
4166 ErrInfo =
"Illegal immediate value for operand.";
4187 ErrInfo =
"Illegal immediate value for operand.";
4197 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
4198 ErrInfo =
"Expected immediate, but got non-immediate";
4220 RI.getSubRegisterClass(RC, MO.
getSubReg());
4228 ErrInfo =
"Subtarget requires even aligned vector registers";
4233 if (RegClass != -1) {
4234 if (Reg.isVirtual())
4239 ErrInfo =
"Operand has incorrect register class.";
4248 ErrInfo =
"SDWA is not supported on this target";
4254 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
4262 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
4269 "Only reg allowed as operands in SDWA instructions on GFX9+";
4278 if (OMod !=
nullptr &&
4280 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
4286 if (
isVOPC(BasicOpcode)) {
4290 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
4291 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
4297 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
4298 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
4304 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
4305 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
4312 if (DstUnused && DstUnused->isImm() &&
4315 if (!Dst.isReg() || !Dst.isTied()) {
4316 ErrInfo =
"Dst register should have tied register";
4321 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
4324 "Dst register should be tied to implicit use of preserved register";
4327 Dst.getReg() != TiedMO.
getReg()) {
4328 ErrInfo =
"Dst register should use same physical register as preserved";
4335 if (
isMIMG(
MI.getOpcode()) && !
MI.mayStore()) {
4360 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
4361 if (RegCount > DstSize) {
4362 ErrInfo =
"Image instruction returns too many registers for dst "
4372 unsigned ConstantBusCount = 0;
4373 bool UsesLiteral =
false;
4380 LiteralVal = &
MI.getOperand(ImmIdx);
4389 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
4407 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
4417 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
4418 return !RI.regsOverlap(SGPRUsed, SGPR);
4428 Opcode != AMDGPU::V_WRITELANE_B32) {
4429 ErrInfo =
"VOP* instruction violates constant bus restriction";
4434 ErrInfo =
"VOP3 instruction uses literal";
4441 if (Desc.
getOpcode() == AMDGPU::V_WRITELANE_B32) {
4442 unsigned SGPRCount = 0;
4445 for (
int OpIdx : {Src0Idx, Src1Idx}) {
4453 if (MO.
getReg() != SGPRUsed)
4459 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
4466 if (Desc.
getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
4467 Desc.
getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
4474 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
4484 ErrInfo =
"ABS not allowed in VOP3B instructions";
4497 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
4506 ErrInfo =
"invalid branch target for SOPK instruction";
4512 if (!isUInt<16>(Imm)) {
4513 ErrInfo =
"invalid immediate for SOPK instruction";
4517 if (!isInt<16>(Imm)) {
4518 ErrInfo =
"invalid immediate for SOPK instruction";
4525 if (Desc.
getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
4526 Desc.
getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
4527 Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
4528 Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
4529 const bool IsDst = Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
4530 Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
4532 const unsigned StaticNumOps =
4534 const unsigned NumImplicitOps = IsDst ? 2 : 1;
4539 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
4540 ErrInfo =
"missing implicit register operands";
4546 if (!Dst->isUse()) {
4547 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
4552 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &
UseOpIdx) ||
4554 ErrInfo =
"movrel implicit operands should be tied";
4561 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
4563 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
4564 ErrInfo =
"src0 should be subreg of implicit vector use";
4572 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
4573 ErrInfo =
"VALU instruction does not implicitly read exec mask";
4579 if (
MI.mayStore() &&
4584 if (Soff && Soff->
getReg() != AMDGPU::M0) {
4585 ErrInfo =
"scalar stores must use m0 as offset register";
4593 if (
Offset->getImm() != 0) {
4594 ErrInfo =
"subtarget does not support offsets in flat instructions";
4603 AMDGPU::OpName::vaddr0);
4612 ErrInfo =
"dim is out of range";
4619 IsA16 = R128A16->
getImm() != 0;
4620 }
else if (ST.
hasA16()) {
4622 IsA16 = A16->
getImm() != 0;
4625 bool IsNSA = SRsrcIdx - VAddr0Idx > 1;
4627 unsigned AddrWords =
4630 unsigned VAddrWords;
4632 VAddrWords = SRsrcIdx - VAddr0Idx;
4634 unsigned LastVAddrIdx = SRsrcIdx - 1;
4635 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
4643 if (VAddrWords != AddrWords) {
4645 <<
" but got " << VAddrWords <<
"\n");
4646 ErrInfo =
"bad vaddr size";
4654 using namespace AMDGPU::DPP;
4656 unsigned DC = DppCt->
getImm();
4657 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
4658 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
4659 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
4660 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
4661 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
4662 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
4663 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
4664 ErrInfo =
"Invalid dpp_ctrl value";
4667 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&