29 #include "llvm/IR/IntrinsicsAMDGPU.h"
36 #define DEBUG_TYPE "si-instr-info"
38 #define GET_INSTRINFO_CTOR_DTOR
39 #include "AMDGPUGenInstrInfo.inc"
46 #define GET_D16ImageDimIntrinsics_IMPL
47 #define GET_ImageDimIntrinsicTable_IMPL
48 #define GET_RsrcIntrinsics_IMPL
49 #include "AMDGPUGenSearchableTables.inc"
59 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
62 "amdgpu-fix-16-bit-physreg-copies",
63 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
78 unsigned N = Node->getNumOperands();
79 while (
N && Node->getOperand(
N - 1).getValueType() ==
MVT::Glue)
93 if (Op0Idx == -1 && Op1Idx == -1)
97 if ((Op0Idx == -1 && Op1Idx != -1) ||
98 (Op1Idx == -1 && Op0Idx != -1))
125 return !
MI.hasImplicitDef() &&
126 MI.getNumImplicitOperands() ==
MI.getDesc().getNumImplicitUses() &&
127 !
MI.mayRaiseFPException();
137 if (
MI.isCompare()) {
143 switch (
Use.getOpcode()) {
144 case AMDGPU::S_AND_SAVEEXEC_B32:
145 case AMDGPU::S_AND_SAVEEXEC_B64:
147 case AMDGPU::S_AND_B32:
148 case AMDGPU::S_AND_B64:
149 if (!
Use.readsRegister(AMDGPU::EXEC))
159 switch (
MI.getOpcode()) {
162 case AMDGPU::V_READFIRSTLANE_B32:
177 int64_t &Offset1)
const {
185 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
203 if (Offset0Idx == -1 || Offset1Idx == -1)
210 Offset0Idx -=
get(Opc0).NumDefs;
211 Offset1Idx -=
get(Opc1).NumDefs;
212 Offset0 = cast<ConstantSDNode>(Load0->
getOperand(Offset0Idx))->getZExtValue();
213 Offset1 = cast<ConstantSDNode>(Load1->
getOperand(Offset1Idx))->getZExtValue();
230 dyn_cast<ConstantSDNode>(Load0->
getOperand(1));
232 dyn_cast<ConstantSDNode>(Load1->
getOperand(1));
234 if (!Load0Offset || !Load1Offset)
254 if (OffIdx0 == -1 || OffIdx1 == -1)
260 OffIdx0 -=
get(Opc0).NumDefs;
261 OffIdx1 -=
get(Opc1).NumDefs;
267 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
270 Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue();
271 Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue();
280 case AMDGPU::DS_READ2ST64_B32:
281 case AMDGPU::DS_READ2ST64_B64:
282 case AMDGPU::DS_WRITE2ST64_B32:
283 case AMDGPU::DS_WRITE2ST64_B64:
292 int64_t &Offset,
bool &OffsetIsScalable,
unsigned &
Width,
298 OffsetIsScalable =
false;
312 BaseOps.push_back(BaseOp);
313 Offset = OffsetOp->
getImm();
328 unsigned Offset0 = Offset0Op->
getImm();
329 unsigned Offset1 = Offset1Op->
getImm();
330 if (Offset0 + 1 != Offset1)
348 BaseOps.push_back(BaseOp);
349 Offset = EltSize * Offset0;
352 if (DataOpIdx == -1) {
368 BaseOps.push_back(RSrc);
370 if (BaseOp && !BaseOp->
isFI())
371 BaseOps.push_back(BaseOp);
374 Offset = OffsetImm->
getImm();
378 if (SOffset->
isReg())
379 BaseOps.push_back(SOffset);
381 Offset += SOffset->
getImm();
395 BaseOps.push_back(&LdSt.
getOperand(SRsrcIdx));
397 if (VAddr0Idx >= 0) {
399 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
415 BaseOps.push_back(BaseOp);
417 Offset = OffsetOp ? OffsetOp->
getImm() : 0;
428 BaseOps.push_back(BaseOp);
431 BaseOps.push_back(BaseOp);
461 if (MO1->getAddrSpace() != MO2->getAddrSpace())
464 auto Base1 = MO1->getValue();
465 auto Base2 = MO2->getValue();
466 if (!Base1 || !Base2)
471 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
474 return Base1 == Base2;
480 unsigned NumBytes)
const {
488 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
505 const unsigned LoadSize = NumBytes / NumLoads;
506 const unsigned NumDWORDs = ((LoadSize + 3) / 4) * NumLoads;
507 return NumDWORDs <= 8;
521 int64_t Offset0, int64_t Offset1,
522 unsigned NumLoads)
const {
523 assert(Offset1 > Offset0 &&
524 "Second offset should be larger than first offset!");
529 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
536 const char *
Msg =
"illegal SGPR to VGPR copy") {
557 assert((
TII.getSubtarget().hasMAIInsts() &&
558 !
TII.getSubtarget().hasGFX90AInsts()) &&
559 "Expected GFX908 subtarget.");
562 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
563 "Source register of the copy should be either an SGPR or an AGPR.");
566 "Destination register of the copy should be an AGPR.");
573 if (!
Def->definesRegister(SrcReg, &RI))
575 if (
Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
584 bool SafeToPropagate =
true;
585 for (
auto I =
Def;
I !=
MI && SafeToPropagate; ++
I)
586 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
587 SafeToPropagate =
false;
589 if (!SafeToPropagate)
601 if (ImpUseSuperReg) {
619 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
623 "VGPR used for an intermediate copy should have been reserved.");
628 while (RegNo-- && RS.
FindUnusedReg(&AMDGPU::VGPR_32RegClass)) {
637 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
638 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
639 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
646 if (ImpUseSuperReg) {
647 UseBuilder.
addReg(ImpUseSuperReg,
668 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
669 int16_t SubIdx = BaseIndices[Idx];
671 unsigned Opcode = AMDGPU::S_MOV_B32;
674 Register Src = RI.getSubReg(SrcReg, SubIdx);
675 bool AlignedDest = ((
Reg - AMDGPU::SGPR0) % 2) == 0;
676 bool AlignedSrc = ((Src - AMDGPU::SGPR0) % 2) == 0;
677 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
681 Opcode = AMDGPU::S_MOV_B64;
685 LastMI =
BuildMI(
MBB,
I,
DL,
TII.get(Opcode), RI.getSubReg(DestReg, SubIdx))
686 .
addReg(RI.getSubReg(SrcReg, SubIdx))
696 assert(FirstMI && LastMI);
704 LastMI->addRegisterKilled(SrcReg, &RI);
716 ((RI.getRegSizeInBits(*RC) == 16) ^
718 MCRegister &RegToFix = (RI.getRegSizeInBits(*RC) == 16) ? DestReg : SrcReg;
723 if (DestReg == SrcReg) {
732 if (RC == &AMDGPU::VGPR_32RegClass) {
734 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
735 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
736 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
737 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
743 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
744 RC == &AMDGPU::SReg_32RegClass) {
752 if (DestReg == AMDGPU::VCC_LO) {
753 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
767 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
777 if (RC == &AMDGPU::SReg_64RegClass) {
785 if (DestReg == AMDGPU::VCC) {
786 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
800 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
813 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
831 if (RC == &AMDGPU::AGPR_32RegClass) {
832 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
833 (ST.
hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
852 const unsigned Size = RI.getRegSizeInBits(*RC);
855 AMDGPU::VGPR_HI16RegClass.
contains(SrcReg) ||
856 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
857 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
859 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
860 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
861 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
862 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
863 bool DstLow = AMDGPU::VGPR_LO16RegClass.contains(DestReg) ||
864 AMDGPU::SReg_LO16RegClass.contains(DestReg) ||
865 AMDGPU::AGPR_LO16RegClass.contains(DestReg);
866 bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
867 AMDGPU::SReg_LO16RegClass.contains(SrcReg) ||
868 AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
883 if (IsAGPRDst || IsAGPRSrc) {
884 if (!DstLow || !SrcLow) {
886 "Cannot use hi16 subreg with an AGPR!");
894 if (!DstLow || !SrcLow) {
896 "Cannot use hi16 subreg on VI!");
948 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
954 unsigned EltSize = 4;
955 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
958 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
961 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
963 Opcode = AMDGPU::INSTRUCTION_LIST_END;
965 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
966 }
else if ((Size % 64 == 0) && RI.
hasVGPRs(RC) &&
971 Opcode = AMDGPU::V_MOV_B64_e32;
974 Opcode = AMDGPU::V_PK_MOV_B32;
984 std::unique_ptr<RegScavenger> RS;
985 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
992 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
994 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
997 SubIdx = SubIndices[Idx];
999 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1001 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1003 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1007 RI.getSubReg(SrcReg, SubIdx), UseKill, *RS,
1008 ImpDefSuper, ImpUseSuper);
1009 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1010 Register DstSubReg = RI.getSubReg(DestReg, SubIdx);
1011 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1029 .
addReg(RI.getSubReg(SrcReg, SubIdx));
1059 int64_t
Value)
const {
1062 if (RegClass == &AMDGPU::SReg_32RegClass ||
1063 RegClass == &AMDGPU::SGPR_32RegClass ||
1064 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
1065 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
1071 if (RegClass == &AMDGPU::SReg_64RegClass ||
1072 RegClass == &AMDGPU::SGPR_64RegClass ||
1073 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
1079 if (RegClass == &AMDGPU::VGPR_32RegClass) {
1090 unsigned EltSize = 4;
1091 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1093 if (RI.getRegSizeInBits(*RegClass) > 32) {
1094 Opcode = AMDGPU::S_MOV_B64;
1097 Opcode = AMDGPU::S_MOV_B32;
1103 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1104 int64_t IdxValue = Idx == 0 ?
Value : 0;
1107 get(Opcode), RI.getSubReg(DestReg, SubIndices[Idx]));
1114 return &AMDGPU::VGPR_32RegClass;
1127 "Not a VGPR32 reg");
1129 if (
Cond.size() == 1) {
1139 }
else if (
Cond.size() == 2) {
1141 switch (
Cond[0].getImm()) {
1142 case SIInstrInfo::SCC_TRUE: {
1145 : AMDGPU::S_CSELECT_B64), SReg)
1156 case SIInstrInfo::SCC_FALSE: {
1159 : AMDGPU::S_CSELECT_B64), SReg)
1170 case SIInstrInfo::VCCNZ: {
1184 case SIInstrInfo::VCCZ: {
1198 case SIInstrInfo::EXECNZ: {
1202 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1205 : AMDGPU::S_CSELECT_B64), SReg)
1216 case SIInstrInfo::EXECZ: {
1220 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1223 : AMDGPU::S_CSELECT_B64), SReg)
1272 return AMDGPU::COPY;
1273 if (RI.getRegSizeInBits(*DstRC) == 32) {
1274 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1275 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC)) {
1276 return AMDGPU::S_MOV_B64;
1277 }
else if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC)) {
1278 return AMDGPU::V_MOV_B64_PSEUDO;
1280 return AMDGPU::COPY;
1285 bool IsIndirectSrc)
const {
1286 if (IsIndirectSrc) {
1288 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1290 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1292 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1294 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1296 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1298 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1300 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1302 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1308 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1310 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1312 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1314 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1316 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1318 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1320 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1322 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1329 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1331 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1333 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1335 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1337 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1339 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1341 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1343 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1350 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1352 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1354 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1356 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1358 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1360 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1362 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1364 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1371 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1373 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1375 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1377 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1379 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1386 bool IsSGPR)
const {
1398 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1405 return AMDGPU::SI_SPILL_S32_SAVE;
1407 return AMDGPU::SI_SPILL_S64_SAVE;
1409 return AMDGPU::SI_SPILL_S96_SAVE;
1411 return AMDGPU::SI_SPILL_S128_SAVE;
1413 return AMDGPU::SI_SPILL_S160_SAVE;
1415 return AMDGPU::SI_SPILL_S192_SAVE;
1417 return AMDGPU::SI_SPILL_S224_SAVE;
1419 return AMDGPU::SI_SPILL_S256_SAVE;
1421 return AMDGPU::SI_SPILL_S512_SAVE;
1423 return AMDGPU::SI_SPILL_S1024_SAVE;
1432 return AMDGPU::SI_SPILL_V32_SAVE;
1434 return AMDGPU::SI_SPILL_V64_SAVE;
1436 return AMDGPU::SI_SPILL_V96_SAVE;
1438 return AMDGPU::SI_SPILL_V128_SAVE;
1440 return AMDGPU::SI_SPILL_V160_SAVE;
1442 return AMDGPU::SI_SPILL_V192_SAVE;
1444 return AMDGPU::SI_SPILL_V224_SAVE;
1446 return AMDGPU::SI_SPILL_V256_SAVE;
1448 return AMDGPU::SI_SPILL_V512_SAVE;
1450 return AMDGPU::SI_SPILL_V1024_SAVE;
1459 return AMDGPU::SI_SPILL_A32_SAVE;
1461 return AMDGPU::SI_SPILL_A64_SAVE;
1463 return AMDGPU::SI_SPILL_A96_SAVE;
1465 return AMDGPU::SI_SPILL_A128_SAVE;
1467 return AMDGPU::SI_SPILL_A160_SAVE;
1469 return AMDGPU::SI_SPILL_A192_SAVE;
1471 return AMDGPU::SI_SPILL_A224_SAVE;
1473 return AMDGPU::SI_SPILL_A256_SAVE;
1475 return AMDGPU::SI_SPILL_A512_SAVE;
1477 return AMDGPU::SI_SPILL_A1024_SAVE;
1486 return AMDGPU::SI_SPILL_AV32_SAVE;
1488 return AMDGPU::SI_SPILL_AV64_SAVE;
1490 return AMDGPU::SI_SPILL_AV96_SAVE;
1492 return AMDGPU::SI_SPILL_AV128_SAVE;
1494 return AMDGPU::SI_SPILL_AV160_SAVE;
1496 return AMDGPU::SI_SPILL_AV192_SAVE;
1498 return AMDGPU::SI_SPILL_AV224_SAVE;
1500 return AMDGPU::SI_SPILL_AV256_SAVE;
1502 return AMDGPU::SI_SPILL_AV512_SAVE;
1504 return AMDGPU::SI_SPILL_AV1024_SAVE;
1532 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1533 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1541 if (SrcReg.
isVirtual() && SpillSize == 4) {
1572 return AMDGPU::SI_SPILL_S32_RESTORE;
1574 return AMDGPU::SI_SPILL_S64_RESTORE;
1576 return AMDGPU::SI_SPILL_S96_RESTORE;
1578 return AMDGPU::SI_SPILL_S128_RESTORE;
1580 return AMDGPU::SI_SPILL_S160_RESTORE;
1582 return AMDGPU::SI_SPILL_S192_RESTORE;
1584 return AMDGPU::SI_SPILL_S224_RESTORE;
1586 return AMDGPU::SI_SPILL_S256_RESTORE;
1588 return AMDGPU::SI_SPILL_S512_RESTORE;
1590 return AMDGPU::SI_SPILL_S1024_RESTORE;
1599 return AMDGPU::SI_SPILL_V32_RESTORE;
1601 return AMDGPU::SI_SPILL_V64_RESTORE;
1603 return AMDGPU::SI_SPILL_V96_RESTORE;
1605 return AMDGPU::SI_SPILL_V128_RESTORE;
1607 return AMDGPU::SI_SPILL_V160_RESTORE;
1609 return AMDGPU::SI_SPILL_V192_RESTORE;
1611 return AMDGPU::SI_SPILL_V224_RESTORE;
1613 return AMDGPU::SI_SPILL_V256_RESTORE;
1615 return AMDGPU::SI_SPILL_V512_RESTORE;
1617 return AMDGPU::SI_SPILL_V1024_RESTORE;
1626 return AMDGPU::SI_SPILL_A32_RESTORE;
1628 return AMDGPU::SI_SPILL_A64_RESTORE;
1630 return AMDGPU::SI_SPILL_A96_RESTORE;
1632 return AMDGPU::SI_SPILL_A128_RESTORE;
1634 return AMDGPU::SI_SPILL_A160_RESTORE;
1636 return AMDGPU::SI_SPILL_A192_RESTORE;
1638 return AMDGPU::SI_SPILL_A224_RESTORE;
1640 return AMDGPU::SI_SPILL_A256_RESTORE;
1642 return AMDGPU::SI_SPILL_A512_RESTORE;
1644 return AMDGPU::SI_SPILL_A1024_RESTORE;
1653 return AMDGPU::SI_SPILL_AV32_RESTORE;
1655 return AMDGPU::SI_SPILL_AV64_RESTORE;
1657 return AMDGPU::SI_SPILL_AV96_RESTORE;
1659 return AMDGPU::SI_SPILL_AV128_RESTORE;
1661 return AMDGPU::SI_SPILL_AV160_RESTORE;
1663 return AMDGPU::SI_SPILL_AV192_RESTORE;
1665 return AMDGPU::SI_SPILL_AV224_RESTORE;
1667 return AMDGPU::SI_SPILL_AV256_RESTORE;
1669 return AMDGPU::SI_SPILL_AV512_RESTORE;
1671 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1698 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1699 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1704 if (DestReg.
isVirtual() && SpillSize == 4) {
1737 unsigned Quantity)
const {
1739 while (Quantity > 0) {
1754 if (HasNoTerminator) {
1755 if (
Info->returnsVoid()) {
1765 switch (
MI.getOpcode()) {
1767 if (
MI.isMetaInstruction())
1772 return MI.getOperand(0).getImm() + 1;
1777 case AMDGPU::SI_MASKED_UNREACHABLE:
1778 case AMDGPU::WAVE_BARRIER:
1779 case AMDGPU::SCHED_BARRIER:
1788 switch (
MI.getOpcode()) {
1790 case AMDGPU::S_MOV_B64_term:
1793 MI.setDesc(
get(AMDGPU::S_MOV_B64));
1796 case AMDGPU::S_MOV_B32_term:
1799 MI.setDesc(
get(AMDGPU::S_MOV_B32));
1802 case AMDGPU::S_XOR_B64_term:
1805 MI.setDesc(
get(AMDGPU::S_XOR_B64));
1808 case AMDGPU::S_XOR_B32_term:
1811 MI.setDesc(
get(AMDGPU::S_XOR_B32));
1813 case AMDGPU::S_OR_B64_term:
1816 MI.setDesc(
get(AMDGPU::S_OR_B64));
1818 case AMDGPU::S_OR_B32_term:
1821 MI.setDesc(
get(AMDGPU::S_OR_B32));
1824 case AMDGPU::S_ANDN2_B64_term:
1827 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
1830 case AMDGPU::S_ANDN2_B32_term:
1833 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
1836 case AMDGPU::S_AND_B64_term:
1839 MI.setDesc(
get(AMDGPU::S_AND_B64));
1842 case AMDGPU::S_AND_B32_term:
1845 MI.setDesc(
get(AMDGPU::S_AND_B32));
1848 case AMDGPU::V_MOV_B64_PSEUDO: {
1850 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
1851 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
1857 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
1861 if (
SrcOp.isImm()) {
1863 APInt Lo(32,
Imm.getLoBits(32).getZExtValue());
1864 APInt Hi(32,
Imm.getHiBits(32).getZExtValue());
1868 .
addImm(Lo.getSExtValue())
1870 .
addImm(Lo.getSExtValue())
1878 .
addImm(Lo.getSExtValue())
1881 .
addImm(Hi.getSExtValue())
1907 MI.eraseFromParent();
1910 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
1914 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
1919 MI.setDesc(
get(AMDGPU::S_MOV_B64));
1924 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
1925 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
1927 APInt Lo(32,
Imm.getLoBits(32).getZExtValue());
1928 APInt Hi(32,
Imm.getHiBits(32).getZExtValue());
1930 .
addImm(Lo.getSExtValue())
1933 .
addImm(Hi.getSExtValue())
1935 MI.eraseFromParent();
1938 case AMDGPU::V_SET_INACTIVE_B32: {
1939 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
1940 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1944 .
add(
MI.getOperand(1));
1948 .
add(
MI.getOperand(2));
1951 MI.eraseFromParent();
1954 case AMDGPU::V_SET_INACTIVE_B64: {
1955 unsigned NotOpc = ST.
isWave32() ? AMDGPU::S_NOT_B32 : AMDGPU::S_NOT_B64;
1956 unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1958 MI.getOperand(0).getReg())
1959 .
add(
MI.getOperand(1));
1964 MI.getOperand(0).getReg())
1965 .
add(
MI.getOperand(2));
1969 MI.eraseFromParent();
1972 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
1973 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
1974 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
1975 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
1976 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
1977 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
1978 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
1979 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
1980 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
1981 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
1982 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
1983 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
1984 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
1985 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
1986 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
1987 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
1988 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
1989 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
1990 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
1991 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
1992 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
1997 Opc = AMDGPU::V_MOVRELD_B32_e32;
1999 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2000 : AMDGPU::S_MOVRELD_B32;
2005 bool IsUndef =
MI.getOperand(1).isUndef();
2006 unsigned SubReg =
MI.getOperand(3).getImm();
2007 assert(VecReg ==
MI.getOperand(1).getReg());
2012 .
add(
MI.getOperand(2))
2016 const int ImpDefIdx =
2018 const int ImpUseIdx = ImpDefIdx + 1;
2020 MI.eraseFromParent();
2023 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2024 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2025 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2026 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2027 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2028 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2029 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2030 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2033 bool IsUndef =
MI.getOperand(1).isUndef();
2042 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2046 .
add(
MI.getOperand(2))
2052 const int ImpUseIdx = ImpDefIdx + 1;
2059 MI.eraseFromParent();
2062 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2063 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2064 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2065 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2066 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2067 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2068 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2069 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2073 bool IsUndef =
MI.getOperand(1).isUndef();
2091 MI.eraseFromParent();
2094 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2109 .
add(
MI.getOperand(1)));
2113 MIB.
add(
MI.getOperand(2));
2118 MI.eraseFromParent();
2121 case AMDGPU::ENTER_STRICT_WWM: {
2125 : AMDGPU::S_OR_SAVEEXEC_B64));
2128 case AMDGPU::ENTER_STRICT_WQM: {
2131 const unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2132 const unsigned WQMOp = ST.
isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
2133 const unsigned MovOp = ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2137 MI.eraseFromParent();
2140 case AMDGPU::EXIT_STRICT_WWM:
2141 case AMDGPU::EXIT_STRICT_WQM: {
2144 MI.setDesc(
get(ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
2147 case AMDGPU::SI_RETURN: {
2161 MI.eraseFromParent();
2168 std::pair<MachineInstr*, MachineInstr*>
2170 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2175 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2176 return std::make_pair(&
MI,
nullptr);
2187 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2189 if (Dst.isPhysical()) {
2190 MovDPP.addDef(RI.getSubReg(Dst, Sub));
2197 for (
unsigned I = 1;
I <= 2; ++
I) {
2200 if (
SrcOp.isImm()) {
2202 Imm.ashrInPlace(Part * 32);
2203 MovDPP.addImm(
Imm.getLoBits(32).getZExtValue());
2207 if (Src.isPhysical())
2208 MovDPP.addReg(RI.getSubReg(Src, Sub));
2214 for (
unsigned I = 3;
I <
MI.getNumExplicitOperands(); ++
I)
2215 MovDPP.addImm(
MI.getOperand(
I).getImm());
2217 Split[Part] = MovDPP;
2221 if (Dst.isVirtual())
2228 MI.eraseFromParent();
2229 return std::make_pair(Split[0], Split[1]);
2234 unsigned Src0OpName,
2236 unsigned Src1OpName)
const {
2243 "All commutable instructions have both src0 and src1 modifiers");
2245 int Src0ModsVal = Src0Mods->
getImm();
2246 int Src1ModsVal = Src1Mods->
getImm();
2248 Src1Mods->
setImm(Src0ModsVal);
2249 Src0Mods->
setImm(Src1ModsVal);
2258 bool IsKill = RegOp.
isKill();
2260 bool IsUndef = RegOp.
isUndef();
2261 bool IsDebug = RegOp.
isDebug();
2263 if (NonRegOp.
isImm())
2265 else if (NonRegOp.
isFI())
2284 unsigned Src1Idx)
const {
2285 assert(!NewMI &&
"this should never be used");
2287 unsigned Opc =
MI.getOpcode();
2289 if (CommutedOpcode == -1)
2293 static_cast<int>(Src0Idx) &&
2295 static_cast<int>(Src1Idx) &&
2296 "inconsistency with findCommutedOpIndices");
2323 Src1, AMDGPU::OpName::src1_modifiers);
2335 unsigned &SrcOpIdx0,
2336 unsigned &SrcOpIdx1)
const {
2341 unsigned &SrcOpIdx1)
const {
2354 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2358 int64_t BrOffset)
const {
2361 assert(BranchOp != AMDGPU::S_SETPC_B64);
2375 if (
MI.getOpcode() == AMDGPU::S_SETPC_B64) {
2381 return MI.getOperand(0).getMBB();
2389 assert(RS &&
"RegScavenger required for long branching");
2391 "new block should be inserted for expanding unconditional branch");
2394 "restore block should be inserted for restoring clobbered registers");
2411 MCCtx.createTempSymbol(
"post_getpc",
true);
2415 MCCtx.createTempSymbol(
"offset_lo",
true);
2417 MCCtx.createTempSymbol(
"offset_hi",
true);
2420 .
addReg(PCReg, 0, AMDGPU::sub0)
2424 .
addReg(PCReg, 0, AMDGPU::sub1)
2480 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
2497 unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
2499 case SIInstrInfo::SCC_TRUE:
2500 return AMDGPU::S_CBRANCH_SCC1;
2501 case SIInstrInfo::SCC_FALSE:
2502 return AMDGPU::S_CBRANCH_SCC0;
2503 case SIInstrInfo::VCCNZ:
2504 return AMDGPU::S_CBRANCH_VCCNZ;
2505 case SIInstrInfo::VCCZ:
2506 return AMDGPU::S_CBRANCH_VCCZ;
2507 case SIInstrInfo::EXECNZ:
2508 return AMDGPU::S_CBRANCH_EXECNZ;
2509 case SIInstrInfo::EXECZ:
2510 return AMDGPU::S_CBRANCH_EXECZ;
2516 SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
2518 case AMDGPU::S_CBRANCH_SCC0:
2520 case AMDGPU::S_CBRANCH_SCC1:
2522 case AMDGPU::S_CBRANCH_VCCNZ:
2524 case AMDGPU::S_CBRANCH_VCCZ:
2526 case AMDGPU::S_CBRANCH_EXECNZ:
2528 case AMDGPU::S_CBRANCH_EXECZ:
2540 bool AllowModify)
const {
2541 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
2543 TBB =
I->getOperand(0).getMBB();
2549 if (
I->getOpcode() == AMDGPU::SI_NON_UNIFORM_BRCOND_PSEUDO) {
2550 CondBB =
I->getOperand(1).getMBB();
2551 Cond.push_back(
I->getOperand(0));
2553 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
2554 if (Pred == INVALID_BR)
2557 CondBB =
I->getOperand(0).getMBB();
2559 Cond.push_back(
I->getOperand(1));
2569 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
2571 FBB =
I->getOperand(0).getMBB();
2581 bool AllowModify)
const {
2589 while (
I !=
E && !
I->isBranch() && !
I->isReturn()) {
2590 switch (
I->getOpcode()) {
2591 case AMDGPU::S_MOV_B64_term:
2592 case AMDGPU::S_XOR_B64_term:
2593 case AMDGPU::S_OR_B64_term:
2594 case AMDGPU::S_ANDN2_B64_term:
2595 case AMDGPU::S_AND_B64_term:
2596 case AMDGPU::S_MOV_B32_term:
2597 case AMDGPU::S_XOR_B32_term:
2598 case AMDGPU::S_OR_B32_term:
2599 case AMDGPU::S_ANDN2_B32_term:
2600 case AMDGPU::S_AND_B32_term:
2603 case AMDGPU::SI_ELSE:
2604 case AMDGPU::SI_KILL_I1_TERMINATOR:
2605 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
2622 int *BytesRemoved)
const {
2624 unsigned RemovedSize = 0;
2627 if (
MI.isBranch() ||
MI.isReturn()) {
2629 MI.eraseFromParent();
2635 *BytesRemoved = RemovedSize;
2652 int *BytesAdded)
const {
2653 if (!FBB &&
Cond.empty()) {
2661 if(
Cond.size() == 1 &&
Cond[0].isReg()) {
2671 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].getImm()));
2709 if (
Cond.size() != 2) {
2724 Register FalseReg,
int &CondCycles,
2725 int &TrueCycles,
int &FalseCycles)
const {
2726 switch (
Cond[0].getImm()) {
2735 CondCycles = TrueCycles = FalseCycles = NumInsts;
2738 return RI.
hasVGPRs(RC) && NumInsts <= 6;
2752 if (NumInsts % 2 == 0)
2755 CondCycles = TrueCycles = FalseCycles = NumInsts;
2767 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
2768 if (Pred == VCCZ || Pred == SCC_FALSE) {
2769 Pred =
static_cast<BranchPredicate
>(-Pred);
2775 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
2777 if (DstSize == 32) {
2779 if (Pred == SCC_TRUE) {
2794 if (DstSize == 64 && Pred == SCC_TRUE) {
2804 static const int16_t Sub0_15[] = {
2805 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
2806 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
2807 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
2808 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
2811 static const int16_t Sub0_15_64[] = {
2812 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
2813 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
2814 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
2815 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
2818 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
2820 const int16_t *SubIndices = Sub0_15;
2821 int NElts = DstSize / 32;
2825 if (Pred == SCC_TRUE) {
2827 SelOp = AMDGPU::S_CSELECT_B32;
2828 EltRC = &AMDGPU::SGPR_32RegClass;
2830 SelOp = AMDGPU::S_CSELECT_B64;
2831 EltRC = &AMDGPU::SGPR_64RegClass;
2832 SubIndices = Sub0_15_64;
2838 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
2843 for (
int Idx = 0; Idx != NElts; ++Idx) {
2845 Regs.push_back(DstElt);
2847 unsigned SubIdx = SubIndices[Idx];
2850 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
2853 .
addReg(FalseReg, 0, SubIdx)
2854 .
addReg(TrueReg, 0, SubIdx);
2858 .
addReg(TrueReg, 0, SubIdx)
2859 .
addReg(FalseReg, 0, SubIdx);
2871 switch (
MI.getOpcode()) {
2872 case AMDGPU::V_MOV_B32_e32:
2873 case AMDGPU::V_MOV_B32_e64:
2874 case AMDGPU::V_MOV_B64_PSEUDO:
2875 case AMDGPU::V_MOV_B64_e32:
2876 case AMDGPU::V_MOV_B64_e64:
2877 case AMDGPU::S_MOV_B32:
2878 case AMDGPU::S_MOV_B64:
2880 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
2881 case AMDGPU::V_ACCVGPR_READ_B32_e64:
2882 case AMDGPU::V_ACCVGPR_MOV_B32:
2890 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
2892 AMDGPU::OpName::omod};
2895 unsigned Opc =
MI.getOpcode();
2905 switch (
DefMI.getOpcode()) {
2908 case AMDGPU::S_MOV_B64:
2913 case AMDGPU::V_MOV_B32_e32:
2914 case AMDGPU::S_MOV_B32:
2915 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
2922 if (!ImmOp->
isImm())
2925 unsigned Opc =
UseMI.getOpcode();
2926 if (Opc == AMDGPU::COPY) {
2930 unsigned NewOpc =
isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2933 if (
UseMI.getOperand(1).getSubReg() == AMDGPU::hi16)
2939 NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
2949 UseMI.getOperand(0).setSubReg(0);
2952 UseMI.getOperand(0).setReg(DstReg);
2958 UseMI.getOperand(1).ChangeToImmediate(
Imm.getSExtValue());
2963 if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
2964 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
2965 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
2966 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64) {
2981 bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
2982 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64;
2983 bool IsFMA = Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
2984 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64;
2998 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32 : AMDGPU::V_FMAMK_F16)
2999 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
3011 unsigned Src1SubReg = Src1->
getSubReg();
3016 if (Opc == AMDGPU::V_MAC_F32_e64 ||
3017 Opc == AMDGPU::V_MAC_F16_e64 ||
3018 Opc == AMDGPU::V_FMAC_F32_e64 ||
3019 Opc == AMDGPU::V_FMAC_F16_e64)
3020 UseMI.untieRegOperand(
3030 DefMI.eraseFromParent();
3039 bool Src0Inlined =
false;
3040 if (Src0->
isReg()) {
3045 if (
Def &&
Def->isMoveImmediate() &&
3060 if (Src1->
isReg() && !Src0Inlined ) {
3063 if (
Def &&
Def->isMoveImmediate() &&
3066 commuteInstruction(
UseMI)) {
3077 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32 : AMDGPU::V_FMAAK_F16)
3078 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
3087 if (Opc == AMDGPU::V_MAC_F32_e64 ||
3088 Opc == AMDGPU::V_MAC_F16_e64 ||
3089 Opc == AMDGPU::V_FMAC_F32_e64 ||
3090 Opc == AMDGPU::V_FMAC_F16_e64)
3091 UseMI.untieRegOperand(
3107 DefMI.eraseFromParent();
3119 if (BaseOps1.
size() != BaseOps2.
size())
3121 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
3122 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3129 int WidthB,
int OffsetB) {
3130 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3131 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3132 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3133 return LowOffset + LowWidth <= HighOffset;
3136 bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3139 int64_t Offset0, Offset1;
3140 unsigned Dummy0, Dummy1;
3141 bool Offset0IsScalable, Offset1IsScalable;
3155 unsigned Width0 = MIa.
memoperands().front()->getSize();
3156 unsigned Width1 = MIb.
memoperands().front()->getSize();
3163 "MIa must load from or modify a memory location");
3165 "MIb must load from or modify a memory location");
3181 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3188 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3195 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3202 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3212 if (
Reg.isPhysical())
3216 Imm =
Def->getOperand(1).getImm();
3236 unsigned NumOps =
MI.getNumOperands();
3237 for (
unsigned I = 1;
I < NumOps; ++
I) {
3239 if (
Op.isReg() &&
Op.isKill())
3249 unsigned Opc =
MI.getOpcode();
3253 if (NewMFMAOpc != -1) {
3256 for (
unsigned I = 0,
E =
MI.getNumOperands();
I !=
E; ++
I)
3257 MIB.
add(
MI.getOperand(
I));
3265 bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
3266 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64;
3267 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3268 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3269 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
3270 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3271 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3272 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3273 bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
3274 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
3275 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3276 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
3277 bool Src0Literal =
false;
3282 case AMDGPU::V_MAC_F16_e64:
3283 case AMDGPU::V_FMAC_F16_e64:
3284 case AMDGPU::V_MAC_F32_e64:
3285 case AMDGPU::V_MAC_LEGACY_F32_e64:
3286 case AMDGPU::V_FMAC_F32_e64:
3287 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3288 case AMDGPU::V_FMAC_F64_e64:
3290 case AMDGPU::V_MAC_F16_e32:
3291 case AMDGPU::V_FMAC_F16_e32:
3292 case AMDGPU::V_MAC_F32_e32:
3293 case AMDGPU::V_MAC_LEGACY_F32_e32:
3294 case AMDGPU::V_FMAC_F32_e32:
3295 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3296 case AMDGPU::V_FMAC_F64_e32: {
3298 AMDGPU::OpName::src0);
3324 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
3330 const auto killDef = [&
DefMI, &
MBB,
this]() ->
void {
3344 IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
3345 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
3359 unsigned NewOpc = IsFMA
3360 ? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
3361 : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
3406 unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
3407 : IsF64 ? AMDGPU::V_FMA_F64_e64
3409 ? AMDGPU::V_FMA_LEGACY_F32_e64
3410 : AMDGPU::V_FMA_F32_e64
3411 : IsF16 ? AMDGPU::V_MAD_F16_e64
3412 : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
3413 : AMDGPU::V_MAD_F32_e64;
3425 .
addImm(Clamp ? Clamp->getImm() : 0)
3437 switch (
MI.getOpcode()) {
3438 case AMDGPU::S_SET_GPR_IDX_ON:
3439 case AMDGPU::S_SET_GPR_IDX_MODE:
3440 case AMDGPU::S_SET_GPR_IDX_OFF:
3458 if (
MI.isTerminator() ||
MI.isPosition())
3465 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
3471 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
3472 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
3473 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
3479 Opcode == AMDGPU::DS_GWS_INIT ||
3480 Opcode == AMDGPU::DS_GWS_SEMA_V ||
3481 Opcode == AMDGPU::DS_GWS_SEMA_BR ||
3482 Opcode == AMDGPU::DS_GWS_SEMA_P ||
3483 Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
3484 Opcode == AMDGPU::DS_GWS_BARRIER;
3491 if (
const MCPhysReg *ImpDef =
MI.getDesc().getImplicitDefs()) {
3492 for (; ImpDef && *ImpDef; ++ImpDef) {
3493 if (*ImpDef == AMDGPU::MODE)
3502 unsigned Opcode =
MI.getOpcode();
3517 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
3520 Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER)
3523 if (
MI.isCall() ||
MI.isInlineAsm())
3535 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
3536 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32)
3544 if (
MI.isMetaInstruction())
3548 if (
MI.isCopyLike()) {
3553 return MI.readsRegister(AMDGPU::EXEC, &RI);
3564 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
3568 switch (
Imm.getBitWidth()) {
3612 int32_t Trunc =
static_cast<int32_t
>(
Imm);
3650 int16_t Trunc =
static_cast<int16_t
>(
Imm);
3720 AMDGPU::OpName::src2))
3736 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.
hasGFX90AInsts())
3751 AMDGPU::OpName::src0_modifiers) != -1;
3757 return Mods && Mods->
getImm();
3770 switch (
MI.getOpcode()) {
3771 default:
return false;
3773 case AMDGPU::V_ADDC_U32_e64:
3774 case AMDGPU::V_SUBB_U32_e64:
3775 case AMDGPU::V_SUBBREV_U32_e64: {
3783 case AMDGPU::V_MAC_F16_e64:
3784 case AMDGPU::V_MAC_F32_e64:
3785 case AMDGPU::V_MAC_LEGACY_F32_e64:
3786 case AMDGPU::V_FMAC_F16_e64:
3787 case AMDGPU::V_FMAC_F32_e64:
3788 case AMDGPU::V_FMAC_F64_e64:
3789 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3795 case AMDGPU::V_CNDMASK_B32_e64:
3826 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
3835 unsigned Op32)
const {
3845 Inst32.
add(
MI.getOperand(0));
3849 assert(((
MI.getOperand(0).getReg() == AMDGPU::VCC) ||
3850 (
MI.getOperand(0).getReg() == AMDGPU::VCC_LO)) &&
3864 if (Op32Src2Idx != -1) {
3899 if (MO.
getReg() == AMDGPU::SGPR_NULL || MO.
getReg() == AMDGPU::SGPR_NULL64)
3905 MO.
getReg() == AMDGPU::VCC ||
3906 MO.
getReg() == AMDGPU::VCC_LO;
3908 return AMDGPU::SReg_32RegClass.contains(MO.
getReg()) ||
3909 AMDGPU::SReg_64RegClass.contains(MO.
getReg());
3919 switch (MO.getReg()) {
3921 case AMDGPU::VCC_LO:
3922 case AMDGPU::VCC_HI:
3924 case AMDGPU::FLAT_SCR:
3932 return AMDGPU::NoRegister;
3937 switch (
MI.getOpcode()) {
3938 case AMDGPU::V_READLANE_B32:
3939 case AMDGPU::V_WRITELANE_B32:
3946 if (
MI.isPreISelOpcode() ||
3947 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
3958 if (
SubReg.getReg().isPhysical())
3961 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
3968 if (SIInstrInfo::isGenericOpcode(
MI.getOpcode()))
3982 ErrInfo =
"Instruction has wrong number of operands.";
3986 if (
MI.isInlineAsm()) {
4000 ErrInfo =
"inlineasm operand has incorrect register class.";
4008 if (
isMIMG(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
4009 ErrInfo =
"missing memory operand from MIMG instruction.";
4017 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
4018 "all fp values to integers.";
4026 if (
MI.getOperand(
i).isImm() ||
MI.getOperand(
i).isGlobal()) {
4027 ErrInfo =
"Illegal immediate value for operand.";
4048 ErrInfo =
"Illegal immediate value for operand.";
4058 if (!
MI.getOperand(
i).isImm() && !
MI.getOperand(
i).isFI()) {
4059 ErrInfo =
"Expected immediate, but got non-immediate";
4089 ErrInfo =
"Subtarget requires even aligned vector registers";
4094 if (RegClass != -1) {
4095 if (
Reg.isVirtual())
4100 ErrInfo =
"Operand has incorrect register class.";
4109 ErrInfo =
"SDWA is not supported on this target";
4115 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
4123 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
4130 "Only reg allowed as operands in SDWA instructions on GFX9+";
4139 if (OMod !=
nullptr &&
4141 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
4147 if (
isVOPC(BasicOpcode)) {
4151 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
4152 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
4158 if (Clamp && (!Clamp->isImm() || Clamp->getImm() != 0)) {
4159 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
4165 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
4166 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
4176 if (!Dst.isReg() || !Dst.isTied()) {
4177 ErrInfo =
"Dst register should have tied register";
4182 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
4185 "Dst register should be tied to implicit use of preserved register";
4188 Dst.getReg() != TiedMO.
getReg()) {
4189 ErrInfo =
"Dst register should use same physical register as preserved";
4196 if (
isMIMG(
MI.getOpcode()) && !
MI.mayStore()) {
4221 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
4222 if (RegCount > DstSize) {
4223 ErrInfo =
"MIMG instruction returns too many registers for dst "
4233 unsigned ConstantBusCount = 0;
4234 bool UsesLiteral =
false;
4241 LiteralVal = &
MI.getOperand(ImmIdx);
4250 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx}) {
4257 if (
llvm::all_of(SGPRsUsed, [SGPRUsed](
unsigned SGPR) {
4258 return SGPRUsed != SGPR;
4261 SGPRsUsed.push_back(SGPRUsed);
4270 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
4278 if (SGPRUsed != AMDGPU::NoRegister) {
4280 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
4281 return !RI.regsOverlap(SGPRUsed, SGPR);
4284 SGPRsUsed.push_back(SGPRUsed);
4291 Opcode != AMDGPU::V_WRITELANE_B32) {
4292 ErrInfo =
"VOP* instruction violates constant bus restriction";
4297 ErrInfo =
"VOP3 instruction uses literal";
4304 if (Desc.
getOpcode() == AMDGPU::V_WRITELANE_B32) {
4305 unsigned SGPRCount = 0;
4306 Register SGPRUsed = AMDGPU::NoRegister;
4308 for (
int OpIdx : {Src0Idx, Src1Idx}) {
4316 if (MO.
getReg() != SGPRUsed)
4322 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
4329 if (Desc.
getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
4330 Desc.
getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
4337 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
4347 ErrInfo =
"ABS not allowed in VOP3B instructions";
4360 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
4369 ErrInfo =
"invalid branch target for SOPK instruction";
4376 ErrInfo =
"invalid immediate for SOPK instruction";
4381 ErrInfo =
"invalid immediate for SOPK instruction";
4388 if (Desc.
getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
4389 Desc.
getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
4390 Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
4391 Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
4392 const bool IsDst = Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
4393 Desc.
getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
4397 const unsigned NumImplicitOps = IsDst ? 2 : 1;
4402 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
4403 ErrInfo =
"missing implicit register operands";
4409 if (!Dst->isUse()) {
4410 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
4415 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
4416 UseOpIdx != StaticNumOps + 1) {
4417 ErrInfo =
"movrel implicit operands should be tied";
4424 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
4426 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
4427 ErrInfo =
"src0 should be subreg of implicit vector use";
4435 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
4436 ErrInfo =
"VALU instruction does not implicitly read exec mask";
4442 if (
MI.mayStore()) {
4447 ErrInfo =
"scalar stores must use m0 as offset register";
4455 if (Offset->getImm() != 0) {
4456 ErrInfo =
"subtarget does not support offsets in flat instructions";
4465 AMDGPU::OpName::vaddr0);
4474 ErrInfo =
"dim is out of range";
4481 IsA16 = R128A16->
getImm() != 0;
4484 IsA16 = A16->
getImm() != 0;
4487 bool IsNSA = SRsrcIdx - VAddr0Idx > 1;
4489 unsigned AddrWords =
4492 unsigned VAddrWords;
4494 VAddrWords = SRsrcIdx - VAddr0Idx;
4502 if (VAddrWords != AddrWords) {
4504 <<
" but got " << VAddrWords <<
"\n");
4505 ErrInfo =
"bad vaddr size";
4523 ErrInfo =
"Invalid dpp_ctrl value";
4528 ErrInfo =
"Invalid dpp_ctrl value: "
4529 "wavefront shifts are not supported on GFX10+";
4534 ErrInfo =
"Invalid dpp_ctrl value: "
4535 "broadcasts are not supported on GFX10+";
4542 !ST.hasGFX90AInsts()) {
4543 ErrInfo =
"Invalid dpp_ctrl value: "
4544 "row_newbroadcast/row_share is not supported before "
4548 ErrInfo =
"Invalid dpp_ctrl value: "
4549 "row_share and row_xmask are not supported before GFX10";
4556 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
4559 Desc.
OpInfo[DstIdx].
RegClass == AMDGPU::VReg_64_Align2RegClassID)) ||
4563 AMDGPU::VReg_64_Align2RegClassID)))) &&
4565 ErrInfo =
"Invalid dpp_ctrl value: "
4566 "64 bit dpp only support row_newbcast";
4573 uint16_t DataNameIdx =
isDS(Opcode) ? AMDGPU::OpName::data0
4574 : AMDGPU::OpName::vdata;
4583 ErrInfo =
"Invalid register class: "
4584 "vdata and vdst should be both VGPR or AGPR";
4587 if (
Data && Data2 &&
4589 ErrInfo =
"Invalid register class: "
4590 "both data operands should be VGPR or AGPR";
4594 if ((Dst && RI.
isAGPR(
MRI, Dst->getReg())) ||
4597 ErrInfo =
"Invalid register class: "
4598 "agpr loads and stores not supported on this GPU";
4605 const auto isAlignedReg = [&
MI, &
MRI,
this](
unsigned OpName) ->
bool {
4610 if (
Reg.isPhysical())
4617 if (
MI.getOpcode() == AMDGPU::DS_GWS_INIT ||
4618 MI.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
4619 MI.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
4621 if (!isAlignedReg(AMDGPU::OpName::data0)) {
4622 ErrInfo =
"Subtarget requires even aligned vector registers "
4623 "for DS_GWS instructions";
4629 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
4630 ErrInfo =
"Subtarget requires even aligned vector registers "
4631 "for vaddr operand of image instructions";
4637 if (
MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
4640 if (Src->isReg() && RI.
isSGPRReg(
MRI, Src->getReg())) {
4641 ErrInfo =
"Invalid register class: "
4642 "v_accvgpr_write with an SGPR is not supported on this GPU";
4647 if (Desc.
getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
4650 ErrInfo =
"pseudo expects only physical SGPRs";
4659 switch (
MI.getOpcode()) {
4660 default:
return AMDGPU::INSTRUCTION_LIST_END;
4661 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
4662 case AMDGPU::COPY:
return AMDGPU::COPY;
4663 case AMDGPU::PHI:
return AMDGPU::PHI;
4664 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
4666 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
4667 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
4668 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
4669 case AMDGPU::S_MOV_B32: {
4671 return MI.getOperand(1).isReg() ||
4673 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
4675 case AMDGPU::S_ADD_I32:
4676 return ST.
hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
4677 case AMDGPU::S_ADDC_U32:
4678 return AMDGPU::V_ADDC_U32_e32;
4679 case AMDGPU::S_SUB_I32:
4680 return ST.
hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
4683 case AMDGPU::S_ADD_U32:
4684 return AMDGPU::V_ADD_CO_U32_e32;
4685 case AMDGPU::S_SUB_U32:
4686 return AMDGPU::V_SUB_CO_U32_e32;
4687 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
4688 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
4689 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
4690 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
4691 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
4692 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
4693 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
4694 case AMDGPU::S_XNOR_B32:
4695 return ST.
hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
4696 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
4697 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
4698 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
4699 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
4700 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
4701 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
4702 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
4703 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
4704 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
4705 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
4706 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
4707 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
4708 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
4709 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
4710 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
4711 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
4712 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
4713 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
4714 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
4715 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
4716 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
4717 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
4718 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
4719 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
4720 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
4721 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
4722 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
4723 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
4724 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
4725 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
4726 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
4727 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
4728 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
4729 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
4730 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
4731 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
4732 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
4733 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
4736 "Unexpected scalar opcode without corresponding vector one!");
4743 bool IsAllocatable) {
4749 case AMDGPU::AV_32RegClassID:
4750 RCID = AMDGPU::VGPR_32RegClassID;
4752 case AMDGPU::AV_64RegClassID:
4753 RCID = AMDGPU::VReg_64RegClassID;
4755 case AMDGPU::AV_96RegClassID:
4756 RCID = AMDGPU::VReg_96RegClassID;
4758 case AMDGPU::AV_128RegClassID:
4759 RCID = AMDGPU::VReg_128RegClassID;
4761 case AMDGPU::AV_160RegClassID:
4762 RCID = AMDGPU::VReg_160RegClassID;
4764 case AMDGPU::AV_512RegClassID:
4765 RCID = AMDGPU::VReg_512RegClassID;
4782 bool IsAllocatable =
false;
4792 AMDGPU::OpName::vdst);
4795 : AMDGPU::OpName::vdata);
4796 if (DataIdx != -1) {
4797 IsAllocatable = VDstIdx != -1 ||
4799 AMDGPU::OpName::data1) != -1;
4807 unsigned OpNo)
const {
4814 if (
Reg.isVirtual())