31#include "llvm/IR/IntrinsicsAMDGPU.h"
38#define DEBUG_TYPE "si-instr-info"
40#define GET_INSTRINFO_CTOR_DTOR
41#include "AMDGPUGenInstrInfo.inc"
44#define GET_D16ImageDimIntrinsics_IMPL
45#define GET_ImageDimIntrinsicTable_IMPL
46#define GET_RsrcIntrinsics_IMPL
47#include "AMDGPUGenSearchableTables.inc"
55 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
58 "amdgpu-fix-16-bit-physreg-copies",
59 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
74 unsigned N =
Node->getNumOperands();
75 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
89 if (Op0Idx == -1 && Op1Idx == -1)
93 if ((Op0Idx == -1 && Op1Idx != -1) ||
94 (Op1Idx == -1 && Op0Idx != -1))
115 return !
MI.memoperands_empty() &&
117 return MMO->isLoad() && MMO->isInvariant();
139 if (!
MI.hasImplicitDef() &&
140 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
141 !
MI.mayRaiseFPException())
152 if (
MI.isCompare()) {
158 switch (
Use.getOpcode()) {
159 case AMDGPU::S_AND_SAVEEXEC_B32:
160 case AMDGPU::S_AND_SAVEEXEC_B64:
162 case AMDGPU::S_AND_B32:
163 case AMDGPU::S_AND_B64:
164 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
174 switch (
MI.getOpcode()) {
177 case AMDGPU::V_READFIRSTLANE_B32:
194 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
199 for (
auto Op :
MI.uses()) {
200 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
206 if (FromCycle ==
nullptr)
212 while (FromCycle && !FromCycle->
contains(ToCycle)) {
232 int64_t &Offset1)
const {
240 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
244 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
262 if (Offset0Idx == -1 || Offset1Idx == -1)
269 Offset0Idx -=
get(Opc0).NumDefs;
270 Offset1Idx -=
get(Opc1).NumDefs;
291 assert(NumOps == 4 || NumOps == 5);
296 dyn_cast<ConstantSDNode>(Load0->
getOperand(NumOps - 3));
298 dyn_cast<ConstantSDNode>(Load1->
getOperand(NumOps - 3));
300 if (!Load0Offset || !Load1Offset)
320 if (OffIdx0 == -1 || OffIdx1 == -1)
326 OffIdx0 -=
get(Opc0).NumDefs;
327 OffIdx1 -=
get(Opc1).NumDefs;
333 if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1))
346 case AMDGPU::DS_READ2ST64_B32:
347 case AMDGPU::DS_READ2ST64_B64:
348 case AMDGPU::DS_WRITE2ST64_B32:
349 case AMDGPU::DS_WRITE2ST64_B64:
364 OffsetIsScalable =
false;
394 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
395 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
396 if (Offset0 + 1 != Offset1)
415 Offset = EltSize * Offset0;
418 if (DataOpIdx == -1) {
436 if (BaseOp && !BaseOp->
isFI())
444 if (SOffset->
isReg())
461 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
465 if (VAddr0Idx >= 0) {
467 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
525 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
533 if (MO1->getAddrSpace() != MO2->getAddrSpace())
536 const auto *Base1 = MO1->getValue();
537 const auto *Base2 = MO2->getValue();
538 if (!Base1 || !Base2)
543 if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
546 return Base1 == Base2;
550 int64_t Offset1,
bool OffsetIsScalable1,
552 int64_t Offset2,
bool OffsetIsScalable2,
553 unsigned ClusterSize,
554 unsigned NumBytes)
const {
567 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
586 const unsigned LoadSize = NumBytes / ClusterSize;
587 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
588 return NumDWords <= MaxMemoryClusterDWords;
602 int64_t Offset0, int64_t Offset1,
603 unsigned NumLoads)
const {
604 assert(Offset1 > Offset0 &&
605 "Second offset should be larger than first offset!");
610 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
617 const char *Msg =
"illegal VGPR to SGPR copy") {
621 C.diagnose(IllegalCopy);
638 assert((
TII.getSubtarget().hasMAIInsts() &&
639 !
TII.getSubtarget().hasGFX90AInsts()) &&
640 "Expected GFX908 subtarget.");
643 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
644 "Source register of the copy should be either an SGPR or an AGPR.");
647 "Destination register of the copy should be an AGPR.");
656 for (
auto Def =
MI, E =
MBB.
begin(); Def != E; ) {
659 if (!Def->modifiesRegister(SrcReg, &RI))
662 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
663 Def->getOperand(0).getReg() != SrcReg)
670 bool SafeToPropagate =
true;
673 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
674 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
675 SafeToPropagate =
false;
677 if (!SafeToPropagate)
689 if (ImpUseSuperReg) {
690 Builder.
addReg(ImpUseSuperReg,
708 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
712 "VGPR used for an intermediate copy should have been reserved.");
727 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
728 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
729 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
736 if (ImpUseSuperReg) {
737 UseBuilder.
addReg(ImpUseSuperReg,
759 int16_t SubIdx = BaseIndices[
Idx];
760 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
761 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
762 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
763 unsigned Opcode = AMDGPU::S_MOV_B32;
766 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
767 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
768 if (AlignedDest && AlignedSrc && (
Idx + 1 < BaseIndices.
size())) {
772 DestSubReg = RI.getSubReg(DestReg, SubIdx);
773 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
774 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
775 Opcode = AMDGPU::S_MOV_B64;
790 assert(FirstMI && LastMI);
798 LastMI->addRegisterKilled(SrcReg, &RI);
805 bool RenamableDest,
bool RenamableSrc)
const {
807 unsigned Size = RI.getRegSizeInBits(*RC);
809 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
815 if (((
Size == 16) != (SrcSize == 16))) {
822 if (DestReg == SrcReg) {
828 RC = RI.getPhysRegBaseClass(DestReg);
829 Size = RI.getRegSizeInBits(*RC);
830 SrcRC = RI.getPhysRegBaseClass(SrcReg);
831 SrcSize = RI.getRegSizeInBits(*SrcRC);
835 if (RC == &AMDGPU::VGPR_32RegClass) {
837 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
838 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
839 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
840 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
846 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
847 RC == &AMDGPU::SReg_32RegClass) {
848 if (SrcReg == AMDGPU::SCC) {
855 if (DestReg == AMDGPU::VCC_LO) {
856 if (AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
870 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
880 if (RC == &AMDGPU::SReg_64RegClass) {
881 if (SrcReg == AMDGPU::SCC) {
888 if (DestReg == AMDGPU::VCC) {
889 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
903 if (!AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
913 if (DestReg == AMDGPU::SCC) {
916 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
934 if (RC == &AMDGPU::AGPR_32RegClass) {
935 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
936 (ST.
hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
951 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
958 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
959 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
961 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
962 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
963 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
964 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
981 if (IsAGPRDst || IsAGPRSrc) {
982 if (!DstLow || !SrcLow) {
984 "Cannot use hi16 subreg with an AGPR!");
997 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
998 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1011 if (!DstLow || !SrcLow) {
1013 "Cannot use hi16 subreg on VI!");
1064 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1070 unsigned EltSize = 4;
1071 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1074 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1077 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1079 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1081 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1087 Opcode = AMDGPU::V_MOV_B64_e32;
1090 Opcode = AMDGPU::V_PK_MOV_B32;
1100 std::unique_ptr<RegScavenger> RS;
1101 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1102 RS = std::make_unique<RegScavenger>();
1108 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1109 const bool CanKillSuperReg = KillSrc && !Overlap;
1114 SubIdx = SubIndices[
Idx];
1116 SubIdx = SubIndices[SubIndices.
size() -
Idx - 1];
1117 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1118 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1119 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1121 bool IsFirstSubreg =
Idx == 0;
1122 bool UseKill = CanKillSuperReg &&
Idx == SubIndices.
size() - 1;
1124 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1128 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1129 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1176 int64_t
Value)
const {
1179 if (RegClass == &AMDGPU::SReg_32RegClass ||
1180 RegClass == &AMDGPU::SGPR_32RegClass ||
1181 RegClass == &AMDGPU::SReg_32_XM0RegClass ||
1182 RegClass == &AMDGPU::SReg_32_XM0_XEXECRegClass) {
1188 if (RegClass == &AMDGPU::SReg_64RegClass ||
1189 RegClass == &AMDGPU::SGPR_64RegClass ||
1190 RegClass == &AMDGPU::SReg_64_XEXECRegClass) {
1196 if (RegClass == &AMDGPU::VGPR_32RegClass) {
1207 unsigned EltSize = 4;
1208 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1210 if (RI.getRegSizeInBits(*RegClass) > 32) {
1211 Opcode = AMDGPU::S_MOV_B64;
1214 Opcode = AMDGPU::S_MOV_B32;
1221 int64_t IdxValue =
Idx == 0 ?
Value : 0;
1224 get(Opcode), RI.getSubReg(DestReg, SubIndices[
Idx]));
1225 Builder.
addImm(IdxValue);
1231 return &AMDGPU::VGPR_32RegClass;
1242 assert(
MRI.getRegClass(DstReg) == &AMDGPU::VGPR_32RegClass &&
1243 "Not a VGPR32 reg");
1245 if (
Cond.size() == 1) {
1246 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1255 }
else if (
Cond.size() == 2) {
1257 switch (
Cond[0].getImm()) {
1258 case SIInstrInfo::SCC_TRUE: {
1259 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1261 : AMDGPU::S_CSELECT_B64), SReg)
1272 case SIInstrInfo::SCC_FALSE: {
1273 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1275 : AMDGPU::S_CSELECT_B64), SReg)
1286 case SIInstrInfo::VCCNZ: {
1289 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1300 case SIInstrInfo::VCCZ: {
1303 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1314 case SIInstrInfo::EXECNZ: {
1315 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1318 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1321 : AMDGPU::S_CSELECT_B64), SReg)
1332 case SIInstrInfo::EXECZ: {
1333 Register SReg =
MRI.createVirtualRegister(BoolXExecRC);
1336 : AMDGPU::S_OR_SAVEEXEC_B64), SReg2)
1339 : AMDGPU::S_CSELECT_B64), SReg)
1388 return AMDGPU::COPY;
1389 if (RI.getRegSizeInBits(*DstRC) == 16) {
1392 return RI.
isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1394 if (RI.getRegSizeInBits(*DstRC) == 32)
1395 return RI.
isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1396 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.
isSGPRClass(DstRC))
1397 return AMDGPU::S_MOV_B64;
1398 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.
isSGPRClass(DstRC))
1399 return AMDGPU::V_MOV_B64_PSEUDO;
1400 return AMDGPU::COPY;
1405 bool IsIndirectSrc)
const {
1406 if (IsIndirectSrc) {
1408 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1410 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1412 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1414 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1416 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1418 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1426 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1428 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1429 if (VecSize <= 1024)
1430 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1436 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1438 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1440 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1442 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1444 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1446 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1448 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1450 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1454 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1456 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1457 if (VecSize <= 1024)
1458 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1465 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1467 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1469 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1471 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1473 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1475 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1477 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1479 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1481 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1483 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1485 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1486 if (VecSize <= 1024)
1487 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1494 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1496 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1498 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1500 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1502 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1504 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1506 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1508 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1510 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1512 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1514 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1515 if (VecSize <= 1024)
1516 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1523 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1525 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1527 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1529 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1530 if (VecSize <= 1024)
1531 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1538 bool IsSGPR)
const {
1550 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1557 return AMDGPU::SI_SPILL_S32_SAVE;
1559 return AMDGPU::SI_SPILL_S64_SAVE;
1561 return AMDGPU::SI_SPILL_S96_SAVE;
1563 return AMDGPU::SI_SPILL_S128_SAVE;
1565 return AMDGPU::SI_SPILL_S160_SAVE;
1567 return AMDGPU::SI_SPILL_S192_SAVE;
1569 return AMDGPU::SI_SPILL_S224_SAVE;
1571 return AMDGPU::SI_SPILL_S256_SAVE;
1573 return AMDGPU::SI_SPILL_S288_SAVE;
1575 return AMDGPU::SI_SPILL_S320_SAVE;
1577 return AMDGPU::SI_SPILL_S352_SAVE;
1579 return AMDGPU::SI_SPILL_S384_SAVE;
1581 return AMDGPU::SI_SPILL_S512_SAVE;
1583 return AMDGPU::SI_SPILL_S1024_SAVE;
1592 return AMDGPU::SI_SPILL_V32_SAVE;
1594 return AMDGPU::SI_SPILL_V64_SAVE;
1596 return AMDGPU::SI_SPILL_V96_SAVE;
1598 return AMDGPU::SI_SPILL_V128_SAVE;
1600 return AMDGPU::SI_SPILL_V160_SAVE;
1602 return AMDGPU::SI_SPILL_V192_SAVE;
1604 return AMDGPU::SI_SPILL_V224_SAVE;
1606 return AMDGPU::SI_SPILL_V256_SAVE;
1608 return AMDGPU::SI_SPILL_V288_SAVE;
1610 return AMDGPU::SI_SPILL_V320_SAVE;
1612 return AMDGPU::SI_SPILL_V352_SAVE;
1614 return AMDGPU::SI_SPILL_V384_SAVE;
1616 return AMDGPU::SI_SPILL_V512_SAVE;
1618 return AMDGPU::SI_SPILL_V1024_SAVE;
1627 return AMDGPU::SI_SPILL_A32_SAVE;
1629 return AMDGPU::SI_SPILL_A64_SAVE;
1631 return AMDGPU::SI_SPILL_A96_SAVE;
1633 return AMDGPU::SI_SPILL_A128_SAVE;
1635 return AMDGPU::SI_SPILL_A160_SAVE;
1637 return AMDGPU::SI_SPILL_A192_SAVE;
1639 return AMDGPU::SI_SPILL_A224_SAVE;
1641 return AMDGPU::SI_SPILL_A256_SAVE;
1643 return AMDGPU::SI_SPILL_A288_SAVE;
1645 return AMDGPU::SI_SPILL_A320_SAVE;
1647 return AMDGPU::SI_SPILL_A352_SAVE;
1649 return AMDGPU::SI_SPILL_A384_SAVE;
1651 return AMDGPU::SI_SPILL_A512_SAVE;
1653 return AMDGPU::SI_SPILL_A1024_SAVE;
1662 return AMDGPU::SI_SPILL_AV32_SAVE;
1664 return AMDGPU::SI_SPILL_AV64_SAVE;
1666 return AMDGPU::SI_SPILL_AV96_SAVE;
1668 return AMDGPU::SI_SPILL_AV128_SAVE;
1670 return AMDGPU::SI_SPILL_AV160_SAVE;
1672 return AMDGPU::SI_SPILL_AV192_SAVE;
1674 return AMDGPU::SI_SPILL_AV224_SAVE;
1676 return AMDGPU::SI_SPILL_AV256_SAVE;
1678 return AMDGPU::SI_SPILL_AV288_SAVE;
1680 return AMDGPU::SI_SPILL_AV320_SAVE;
1682 return AMDGPU::SI_SPILL_AV352_SAVE;
1684 return AMDGPU::SI_SPILL_AV384_SAVE;
1686 return AMDGPU::SI_SPILL_AV512_SAVE;
1688 return AMDGPU::SI_SPILL_AV1024_SAVE;
1695 bool IsVectorSuperClass) {
1700 if (IsVectorSuperClass)
1701 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1703 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1711 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1717 if (IsVectorSuperClass)
1737 FrameInfo.getObjectAlign(FrameIndex));
1738 unsigned SpillSize =
TRI->getSpillSize(*RC);
1743 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1744 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1745 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1753 if (SrcReg.
isVirtual() && SpillSize == 4) {
1754 MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1769 SpillSize, RI, *MFI);
1783 return AMDGPU::SI_SPILL_S32_RESTORE;
1785 return AMDGPU::SI_SPILL_S64_RESTORE;
1787 return AMDGPU::SI_SPILL_S96_RESTORE;
1789 return AMDGPU::SI_SPILL_S128_RESTORE;
1791 return AMDGPU::SI_SPILL_S160_RESTORE;
1793 return AMDGPU::SI_SPILL_S192_RESTORE;
1795 return AMDGPU::SI_SPILL_S224_RESTORE;
1797 return AMDGPU::SI_SPILL_S256_RESTORE;
1799 return AMDGPU::SI_SPILL_S288_RESTORE;
1801 return AMDGPU::SI_SPILL_S320_RESTORE;
1803 return AMDGPU::SI_SPILL_S352_RESTORE;
1805 return AMDGPU::SI_SPILL_S384_RESTORE;
1807 return AMDGPU::SI_SPILL_S512_RESTORE;
1809 return AMDGPU::SI_SPILL_S1024_RESTORE;
1818 return AMDGPU::SI_SPILL_V32_RESTORE;
1820 return AMDGPU::SI_SPILL_V64_RESTORE;
1822 return AMDGPU::SI_SPILL_V96_RESTORE;
1824 return AMDGPU::SI_SPILL_V128_RESTORE;
1826 return AMDGPU::SI_SPILL_V160_RESTORE;
1828 return AMDGPU::SI_SPILL_V192_RESTORE;
1830 return AMDGPU::SI_SPILL_V224_RESTORE;
1832 return AMDGPU::SI_SPILL_V256_RESTORE;
1834 return AMDGPU::SI_SPILL_V288_RESTORE;
1836 return AMDGPU::SI_SPILL_V320_RESTORE;
1838 return AMDGPU::SI_SPILL_V352_RESTORE;
1840 return AMDGPU::SI_SPILL_V384_RESTORE;
1842 return AMDGPU::SI_SPILL_V512_RESTORE;
1844 return AMDGPU::SI_SPILL_V1024_RESTORE;
1853 return AMDGPU::SI_SPILL_A32_RESTORE;
1855 return AMDGPU::SI_SPILL_A64_RESTORE;
1857 return AMDGPU::SI_SPILL_A96_RESTORE;
1859 return AMDGPU::SI_SPILL_A128_RESTORE;
1861 return AMDGPU::SI_SPILL_A160_RESTORE;
1863 return AMDGPU::SI_SPILL_A192_RESTORE;
1865 return AMDGPU::SI_SPILL_A224_RESTORE;
1867 return AMDGPU::SI_SPILL_A256_RESTORE;
1869 return AMDGPU::SI_SPILL_A288_RESTORE;
1871 return AMDGPU::SI_SPILL_A320_RESTORE;
1873 return AMDGPU::SI_SPILL_A352_RESTORE;
1875 return AMDGPU::SI_SPILL_A384_RESTORE;
1877 return AMDGPU::SI_SPILL_A512_RESTORE;
1879 return AMDGPU::SI_SPILL_A1024_RESTORE;
1888 return AMDGPU::SI_SPILL_AV32_RESTORE;
1890 return AMDGPU::SI_SPILL_AV64_RESTORE;
1892 return AMDGPU::SI_SPILL_AV96_RESTORE;
1894 return AMDGPU::SI_SPILL_AV128_RESTORE;
1896 return AMDGPU::SI_SPILL_AV160_RESTORE;
1898 return AMDGPU::SI_SPILL_AV192_RESTORE;
1900 return AMDGPU::SI_SPILL_AV224_RESTORE;
1902 return AMDGPU::SI_SPILL_AV256_RESTORE;
1904 return AMDGPU::SI_SPILL_AV288_RESTORE;
1906 return AMDGPU::SI_SPILL_AV320_RESTORE;
1908 return AMDGPU::SI_SPILL_AV352_RESTORE;
1910 return AMDGPU::SI_SPILL_AV384_RESTORE;
1912 return AMDGPU::SI_SPILL_AV512_RESTORE;
1914 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1921 bool IsVectorSuperClass) {
1926 if (IsVectorSuperClass)
1927 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1929 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1936 bool IsVectorSuperClass =
TRI.isVectorSuperClass(RC);
1942 if (IsVectorSuperClass)
1959 unsigned SpillSize =
TRI->getSpillSize(*RC);
1966 FrameInfo.getObjectAlign(FrameIndex));
1970 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1971 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1972 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1977 if (DestReg.
isVirtual() && SpillSize == 4) {
1979 MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
1993 SpillSize, RI, *MFI);
2008 unsigned Quantity)
const {
2010 while (Quantity > 0) {
2011 unsigned Arg = std::min(Quantity, 8u);
2025 if (HasNoTerminator) {
2026 if (
Info->returnsVoid()) {
2040 constexpr unsigned DoorbellIDMask = 0x3ff;
2041 constexpr unsigned ECQueueWaveAbort = 0x400;
2059 Register DoorbellReg =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2063 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2066 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2067 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2071 MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
2072 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2073 .
addUse(DoorbellRegMasked)
2074 .
addImm(ECQueueWaveAbort);
2075 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2076 .
addUse(SetWaveAbortBit);
2079 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2094 switch (
MI.getOpcode()) {
2096 if (
MI.isMetaInstruction())
2101 return MI.getOperand(0).getImm() + 1;
2110 switch (
MI.getOpcode()) {
2112 case AMDGPU::S_MOV_B64_term:
2115 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2118 case AMDGPU::S_MOV_B32_term:
2121 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2124 case AMDGPU::S_XOR_B64_term:
2127 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2130 case AMDGPU::S_XOR_B32_term:
2133 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2135 case AMDGPU::S_OR_B64_term:
2138 MI.setDesc(
get(AMDGPU::S_OR_B64));
2140 case AMDGPU::S_OR_B32_term:
2143 MI.setDesc(
get(AMDGPU::S_OR_B32));
2146 case AMDGPU::S_ANDN2_B64_term:
2149 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2152 case AMDGPU::S_ANDN2_B32_term:
2155 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2158 case AMDGPU::S_AND_B64_term:
2161 MI.setDesc(
get(AMDGPU::S_AND_B64));
2164 case AMDGPU::S_AND_B32_term:
2167 MI.setDesc(
get(AMDGPU::S_AND_B32));
2170 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2173 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2176 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2179 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2182 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2183 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2186 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2187 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2190 case AMDGPU::V_MOV_B64_PSEUDO: {
2192 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2193 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2199 MI.setDesc(
get(AMDGPU::V_MOV_B64_e32));
2204 if (
SrcOp.isImm()) {
2206 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2207 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2250 MI.eraseFromParent();
2253 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2257 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2262 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2267 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2268 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2270 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2271 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2278 MI.eraseFromParent();
2281 case AMDGPU::V_SET_INACTIVE_B32: {
2285 .
add(
MI.getOperand(3))
2286 .
add(
MI.getOperand(4))
2287 .
add(
MI.getOperand(1))
2288 .
add(
MI.getOperand(2))
2289 .
add(
MI.getOperand(5));
2290 MI.eraseFromParent();
2293 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2294 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2295 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2296 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2297 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2298 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2299 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2300 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2301 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2302 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2303 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2304 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2305 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2306 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2307 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2308 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2309 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2310 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2311 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2312 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2313 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2314 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2315 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2316 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2317 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2318 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2319 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2320 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2321 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2326 Opc = AMDGPU::V_MOVRELD_B32_e32;
2328 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2329 : AMDGPU::S_MOVRELD_B32;
2334 bool IsUndef =
MI.getOperand(1).isUndef();
2335 unsigned SubReg =
MI.getOperand(3).getImm();
2336 assert(VecReg ==
MI.getOperand(1).getReg());
2341 .
add(
MI.getOperand(2))
2345 const int ImpDefIdx =
2347 const int ImpUseIdx = ImpDefIdx + 1;
2349 MI.eraseFromParent();
2352 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2353 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2354 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2355 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2356 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2357 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2358 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2359 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2360 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2361 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2362 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2363 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2366 bool IsUndef =
MI.getOperand(1).isUndef();
2375 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2379 .
add(
MI.getOperand(2))
2384 const int ImpDefIdx =
2386 const int ImpUseIdx = ImpDefIdx + 1;
2393 MI.eraseFromParent();
2396 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2397 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2398 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2399 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2400 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2401 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2402 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2403 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2404 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2405 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2406 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2407 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2411 bool IsUndef =
MI.getOperand(1).isUndef();
2429 MI.eraseFromParent();
2432 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2435 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2436 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2459 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2466 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2476 MI.eraseFromParent();
2479 case AMDGPU::ENTER_STRICT_WWM: {
2483 : AMDGPU::S_OR_SAVEEXEC_B64));
2486 case AMDGPU::ENTER_STRICT_WQM: {
2489 const unsigned Exec = ST.
isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
2490 const unsigned WQMOp = ST.
isWave32() ? AMDGPU::S_WQM_B32 : AMDGPU::S_WQM_B64;
2491 const unsigned MovOp = ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
2495 MI.eraseFromParent();
2498 case AMDGPU::EXIT_STRICT_WWM:
2499 case AMDGPU::EXIT_STRICT_WQM: {
2502 MI.setDesc(
get(ST.
isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
2505 case AMDGPU::SI_RETURN: {
2519 MI.eraseFromParent();
2523 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2524 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2525 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2528 case AMDGPU::S_GETPC_B64_pseudo:
2529 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2532 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2554 case AMDGPU::S_LOAD_DWORDX16_IMM:
2555 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2568 for (
auto &CandMO :
I->operands()) {
2569 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2577 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2585 assert(
MRI.use_nodbg_empty(DestReg) &&
"DestReg should have no users yet.");
2587 unsigned NewOpcode = -1;
2588 if (SubregSize == 256)
2589 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2590 else if (SubregSize == 128)
2591 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2598 MRI.setRegClass(DestReg, NewRC);
2601 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2606 MI->getOperand(0).setReg(DestReg);
2607 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2611 OffsetMO->
setImm(FinalOffset);
2617 MI->setMemRefs(*MF, NewMMOs);
2630std::pair<MachineInstr*, MachineInstr*>
2632 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2637 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2638 return std::pair(&
MI,
nullptr);
2649 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2651 if (Dst.isPhysical()) {
2652 MovDPP.addDef(RI.getSubReg(Dst, Sub));
2655 auto Tmp =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2659 for (
unsigned I = 1;
I <= 2; ++
I) {
2662 if (
SrcOp.isImm()) {
2664 Imm.ashrInPlace(Part * 32);
2665 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2669 if (Src.isPhysical())
2670 MovDPP.addReg(RI.getSubReg(Src, Sub));
2677 MovDPP.addImm(MO.getImm());
2679 Split[Part] = MovDPP;
2683 if (Dst.isVirtual())
2690 MI.eraseFromParent();
2691 return std::pair(Split[0], Split[1]);
2694std::optional<DestSourcePair>
2696 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2699 return std::nullopt;
2704 unsigned Src0OpName,
2706 unsigned Src1OpName)
const {
2713 "All commutable instructions have both src0 and src1 modifiers");
2715 int Src0ModsVal = Src0Mods->
getImm();
2716 int Src1ModsVal = Src1Mods->
getImm();
2718 Src1Mods->
setImm(Src0ModsVal);
2719 Src0Mods->
setImm(Src1ModsVal);
2728 bool IsKill = RegOp.
isKill();
2730 bool IsUndef = RegOp.
isUndef();
2731 bool IsDebug = RegOp.
isDebug();
2733 if (NonRegOp.
isImm())
2735 else if (NonRegOp.
isFI())
2754 unsigned Src1Idx)
const {
2755 assert(!NewMI &&
"this should never be used");
2757 unsigned Opc =
MI.getOpcode();
2759 if (CommutedOpcode == -1)
2762 if (Src0Idx > Src1Idx)
2766 static_cast<int>(Src0Idx) &&
2768 static_cast<int>(Src1Idx) &&
2769 "inconsistency with findCommutedOpIndices");
2795 Src1, AMDGPU::OpName::src1_modifiers);
2798 AMDGPU::OpName::src1_sel);
2810 unsigned &SrcOpIdx0,
2811 unsigned &SrcOpIdx1)
const {
2816 unsigned &SrcOpIdx0,
2817 unsigned &SrcOpIdx1)
const {
2818 if (!
Desc.isCommutable())
2821 unsigned Opc =
Desc.getOpcode();
2830 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2834 int64_t BrOffset)
const {
2837 assert(BranchOp != AMDGPU::S_SETPC_B64);
2851 return MI.getOperand(0).getMBB();
2856 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2857 MI.getOpcode() == AMDGPU::SI_LOOP)
2868 assert(RS &&
"RegScavenger required for long branching");
2870 "new block should be inserted for expanding unconditional branch");
2873 "restore block should be inserted for restoring clobbered registers");
2881 Register PCReg =
MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
2889 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
2890 if (FlushSGPRWrites)
2898 ApplyHazardWorkarounds();
2902 MCCtx.createTempSymbol(
"post_getpc",
true);
2906 MCCtx.createTempSymbol(
"offset_lo",
true);
2908 MCCtx.createTempSymbol(
"offset_hi",
true);
2911 .
addReg(PCReg, 0, AMDGPU::sub0)
2915 .
addReg(PCReg, 0, AMDGPU::sub1)
2917 ApplyHazardWorkarounds();
2958 if (LongBranchReservedReg) {
2960 Scav = LongBranchReservedReg;
2969 MRI.replaceRegWith(PCReg, Scav);
2970 MRI.clearVirtRegs();
2976 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
2977 MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
2978 MRI.clearVirtRegs();
2993unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
2995 case SIInstrInfo::SCC_TRUE:
2996 return AMDGPU::S_CBRANCH_SCC1;
2997 case SIInstrInfo::SCC_FALSE:
2998 return AMDGPU::S_CBRANCH_SCC0;
2999 case SIInstrInfo::VCCNZ:
3000 return AMDGPU::S_CBRANCH_VCCNZ;
3001 case SIInstrInfo::VCCZ:
3002 return AMDGPU::S_CBRANCH_VCCZ;
3003 case SIInstrInfo::EXECNZ:
3004 return AMDGPU::S_CBRANCH_EXECNZ;
3005 case SIInstrInfo::EXECZ:
3006 return AMDGPU::S_CBRANCH_EXECZ;
3012SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3014 case AMDGPU::S_CBRANCH_SCC0:
3016 case AMDGPU::S_CBRANCH_SCC1:
3018 case AMDGPU::S_CBRANCH_VCCNZ:
3020 case AMDGPU::S_CBRANCH_VCCZ:
3022 case AMDGPU::S_CBRANCH_EXECNZ:
3024 case AMDGPU::S_CBRANCH_EXECZ:
3036 bool AllowModify)
const {
3037 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3039 TBB =
I->getOperand(0).getMBB();
3043 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3044 if (Pred == INVALID_BR)
3049 Cond.push_back(
I->getOperand(1));
3059 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3061 FBB =
I->getOperand(0).getMBB();
3071 bool AllowModify)
const {
3079 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3080 switch (
I->getOpcode()) {
3081 case AMDGPU::S_MOV_B64_term:
3082 case AMDGPU::S_XOR_B64_term:
3083 case AMDGPU::S_OR_B64_term:
3084 case AMDGPU::S_ANDN2_B64_term:
3085 case AMDGPU::S_AND_B64_term:
3086 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3087 case AMDGPU::S_MOV_B32_term:
3088 case AMDGPU::S_XOR_B32_term:
3089 case AMDGPU::S_OR_B32_term:
3090 case AMDGPU::S_ANDN2_B32_term:
3091 case AMDGPU::S_AND_B32_term:
3092 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3095 case AMDGPU::SI_ELSE:
3096 case AMDGPU::SI_KILL_I1_TERMINATOR:
3097 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3114 int *BytesRemoved)
const {
3116 unsigned RemovedSize = 0;
3119 if (
MI.isBranch() ||
MI.isReturn()) {
3121 MI.eraseFromParent();
3127 *BytesRemoved = RemovedSize;
3144 int *BytesAdded)
const {
3145 if (!FBB &&
Cond.empty()) {
3156 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].getImm()));
3193 if (
Cond.size() != 2) {
3208 Register FalseReg,
int &CondCycles,
3209 int &TrueCycles,
int &FalseCycles)
const {
3210 switch (
Cond[0].getImm()) {
3215 if (
MRI.getRegClass(FalseReg) != RC)
3219 CondCycles = TrueCycles = FalseCycles = NumInsts;
3222 return RI.
hasVGPRs(RC) && NumInsts <= 6;
3230 if (
MRI.getRegClass(FalseReg) != RC)
3236 if (NumInsts % 2 == 0)
3239 CondCycles = TrueCycles = FalseCycles = NumInsts;
3251 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3252 if (Pred == VCCZ || Pred == SCC_FALSE) {
3253 Pred =
static_cast<BranchPredicate
>(-Pred);
3259 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3261 if (DstSize == 32) {
3263 if (Pred == SCC_TRUE) {
3278 if (DstSize == 64 && Pred == SCC_TRUE) {
3288 static const int16_t Sub0_15[] = {
3289 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3290 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3291 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3292 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3295 static const int16_t Sub0_15_64[] = {
3296 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3297 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3298 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3299 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3302 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3304 const int16_t *SubIndices = Sub0_15;
3305 int NElts = DstSize / 32;
3309 if (Pred == SCC_TRUE) {
3311 SelOp = AMDGPU::S_CSELECT_B32;
3312 EltRC = &AMDGPU::SGPR_32RegClass;
3314 SelOp = AMDGPU::S_CSELECT_B64;
3315 EltRC = &AMDGPU::SGPR_64RegClass;
3316 SubIndices = Sub0_15_64;
3322 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3327 for (
int Idx = 0;
Idx != NElts; ++
Idx) {
3328 Register DstElt =
MRI.createVirtualRegister(EltRC);
3331 unsigned SubIdx = SubIndices[
Idx];
3334 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3337 .
addReg(FalseReg, 0, SubIdx)
3338 .
addReg(TrueReg, 0, SubIdx);
3342 .
addReg(TrueReg, 0, SubIdx)
3343 .
addReg(FalseReg, 0, SubIdx);
3355 switch (
MI.getOpcode()) {
3356 case AMDGPU::V_MOV_B16_t16_e32:
3357 case AMDGPU::V_MOV_B16_t16_e64:
3358 case AMDGPU::V_MOV_B32_e32:
3359 case AMDGPU::V_MOV_B32_e64:
3360 case AMDGPU::V_MOV_B64_PSEUDO:
3361 case AMDGPU::V_MOV_B64_e32:
3362 case AMDGPU::V_MOV_B64_e64:
3363 case AMDGPU::S_MOV_B32:
3364 case AMDGPU::S_MOV_B64:
3365 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3367 case AMDGPU::WWM_COPY:
3368 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3369 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3370 case AMDGPU::V_ACCVGPR_MOV_B32:
3378 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3379 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3380 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3383 unsigned Opc =
MI.getOpcode();
3387 MI.removeOperand(
Idx);
3393 if (!
MRI->hasOneNonDBGUse(Reg))
3396 switch (
DefMI.getOpcode()) {
3399 case AMDGPU::V_MOV_B64_e32:
3400 case AMDGPU::S_MOV_B64:
3401 case AMDGPU::V_MOV_B64_PSEUDO:
3402 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3403 case AMDGPU::V_MOV_B32_e32:
3404 case AMDGPU::S_MOV_B32:
3405 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3412 if (!ImmOp->
isImm())
3415 auto getImmFor = [ImmOp](
const MachineOperand &UseOp) -> int64_t {
3416 int64_t Imm = ImmOp->
getImm();
3417 switch (UseOp.getSubReg()) {
3425 return SignExtend64<16>(Imm);
3427 return SignExtend64<16>(Imm >> 16);
3428 case AMDGPU::sub1_lo16:
3429 return SignExtend64<16>(Imm >> 32);
3430 case AMDGPU::sub1_hi16:
3431 return SignExtend64<16>(Imm >> 48);
3435 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3437 unsigned Opc =
UseMI.getOpcode();
3438 if (Opc == AMDGPU::COPY) {
3439 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3443 bool Is16Bit = OpSize == 2;
3444 bool Is64Bit = OpSize == 8;
3446 unsigned NewOpc =
isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO
3447 : AMDGPU::V_MOV_B32_e32
3448 : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
3449 : AMDGPU::S_MOV_B32;
3450 APInt Imm(Is64Bit ? 64 : 32, getImmFor(
UseMI.getOperand(1)),
3456 NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
3463 if (DstReg.
isVirtual() &&
UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
3466 UseMI.getOperand(0).setSubReg(0);
3469 UseMI.getOperand(0).setReg(DstReg);
3479 UseMI.setDesc(NewMCID);
3480 UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
3485 if (Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3486 Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3487 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3488 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3489 Opc == AMDGPU::V_FMAC_F16_fake16_e64) {
3504 bool IsF32 = Opc == AMDGPU::V_MAD_F32_e64 || Opc == AMDGPU::V_MAC_F32_e64 ||
3505 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64;
3507 Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3508 Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3509 Opc == AMDGPU::V_FMAC_F16_fake16_e64;
3517 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3518 if (!RegSrc->
isReg())
3536 if (Def && Def->isMoveImmediate() &&
3541 IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3543 : AMDGPU::V_FMAMK_F16)
3544 : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
3551 if (NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3554 const int64_t Imm = getImmFor(RegSrc == Src1 ? *Src0 : *Src1);
3560 unsigned SrcSubReg = RegSrc->
getSubReg();
3565 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3566 Opc == AMDGPU::V_FMAC_F32_e64 ||
3567 Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
3568 UseMI.untieRegOperand(
3571 Src1->ChangeToImmediate(Imm);
3576 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3578 DefMI.eraseFromParent();
3588 bool Src0Inlined =
false;
3589 if (Src0->
isReg()) {
3594 if (Def && Def->isMoveImmediate() &&
3606 if (Src1->
isReg() && !Src0Inlined) {
3609 if (Def && Def->isMoveImmediate() &&
3620 IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3622 : AMDGPU::V_FMAAK_F16)
3623 : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
3630 if (NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3636 if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3637 Opc == AMDGPU::V_FMAC_F32_e64 ||
3638 Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
3639 UseMI.untieRegOperand(
3653 bool DeleteDef =
MRI->use_nodbg_empty(Reg);
3655 DefMI.eraseFromParent();
3667 if (BaseOps1.
size() != BaseOps2.
size())
3669 for (
size_t I = 0, E = BaseOps1.
size();
I < E; ++
I) {
3670 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
3678 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
3679 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
3680 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
3682 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
3685bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
3688 int64_t Offset0, Offset1;
3690 bool Offset0IsScalable, Offset1IsScalable;
3712 "MIa must load from or modify a memory location");
3714 "MIb must load from or modify a memory location");
3733 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3740 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3750 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3764 return checkInstOffsetsDoNotOverlap(MIa, MIb);
3775 if (Reg.isPhysical())
3777 auto *Def =
MRI.getUniqueVRegDef(Reg);
3779 Imm = Def->getOperand(1).getImm();
3799 unsigned NumOps =
MI.getNumOperands();
3800 for (
unsigned I = 1;
I < NumOps; ++
I) {
3802 if (
Op.isReg() &&
Op.isKill())
3812 unsigned Opc =
MI.getOpcode();
3816 if (NewMFMAOpc != -1) {
3819 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3820 MIB.
add(
MI.getOperand(
I));
3826 if (Def.isEarlyClobber() && Def.isReg() &&
3831 auto UpdateDefIndex = [&](
LiveRange &LR) {
3832 auto *S = LR.
find(OldIndex);
3833 if (S != LR.end() && S->start == OldIndex) {
3834 assert(S->valno && S->valno->def == OldIndex);
3835 S->start = NewIndex;
3836 S->valno->def = NewIndex;
3840 for (
auto &SR : LI.subranges())
3851 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I)
3862 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
3863 "V_FMAC_F16_fake16_e32 is not supported and not expected to be present "
3867 bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
3868 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3869 Opc == AMDGPU::V_FMAC_F16_fake16_e64;
3870 bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
3871 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3872 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
3873 Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3874 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3875 Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3876 bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
3877 bool IsLegacy = Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
3878 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
3879 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
3880 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
3881 bool Src0Literal =
false;
3886 case AMDGPU::V_MAC_F16_e64:
3887 case AMDGPU::V_FMAC_F16_e64:
3888 case AMDGPU::V_FMAC_F16_fake16_e64:
3889 case AMDGPU::V_MAC_F32_e64:
3890 case AMDGPU::V_MAC_LEGACY_F32_e64:
3891 case AMDGPU::V_FMAC_F32_e64:
3892 case AMDGPU::V_FMAC_LEGACY_F32_e64:
3893 case AMDGPU::V_FMAC_F64_e64:
3895 case AMDGPU::V_MAC_F16_e32:
3896 case AMDGPU::V_FMAC_F16_e32:
3897 case AMDGPU::V_MAC_F32_e32:
3898 case AMDGPU::V_MAC_LEGACY_F32_e32:
3899 case AMDGPU::V_FMAC_F32_e32:
3900 case AMDGPU::V_FMAC_LEGACY_F32_e32:
3901 case AMDGPU::V_FMAC_F64_e32: {
3903 AMDGPU::OpName::src0);
3930 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
3936 const auto killDef = [&]() ->
void {
3941 if (
MRI.hasOneNonDBGUse(DefReg)) {
3958 Register DummyReg =
MRI.cloneVirtualRegister(DefReg);
3960 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
3961 MIOp.setIsUndef(
true);
3962 MIOp.setReg(DummyReg);
3974 : AMDGPU::V_FMAAK_F16)
3975 : AMDGPU::V_FMAAK_F32)
3976 : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
3993 : AMDGPU::V_FMAMK_F16)
3994 : AMDGPU::V_FMAMK_F32)
3995 : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
4043 unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
4044 : IsF64 ? AMDGPU::V_FMA_F64_e64
4046 ? AMDGPU::V_FMA_LEGACY_F32_e64
4047 : AMDGPU::V_FMA_F32_e64
4048 : IsF16 ? AMDGPU::V_MAD_F16_e64
4049 : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
4050 : AMDGPU::V_MAD_F32_e64;
4066 MIB.
addImm(OpSel ? OpSel->getImm() : 0);
4077 switch (
MI.getOpcode()) {
4078 case AMDGPU::S_SET_GPR_IDX_ON:
4079 case AMDGPU::S_SET_GPR_IDX_MODE:
4080 case AMDGPU::S_SET_GPR_IDX_OFF:
4098 if (
MI.isTerminator() ||
MI.isPosition())
4102 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4105 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4111 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4112 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4113 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4114 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4119 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
isGWS(Opcode);
4130 unsigned Opcode =
MI.getOpcode();
4145 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4146 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4147 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
4150 if (
MI.isCall() ||
MI.isInlineAsm())
4166 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4167 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4168 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4169 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4177 if (
MI.isMetaInstruction())
4181 if (
MI.isCopyLike()) {
4186 return MI.readsRegister(AMDGPU::EXEC, &RI);
4197 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4201 switch (Imm.getBitWidth()) {
4221 APInt IntImm = Imm.bitcastToAPInt();
4241 assert(!MO.
isReg() &&
"isInlineConstant called on register operand!");
4250 int64_t Imm = MO.
getImm();
4251 switch (OperandType) {
4264 int32_t Trunc =
static_cast<int32_t
>(Imm);
4304 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4309 int16_t Trunc =
static_cast<int16_t
>(Imm);
4320 if (isInt<16>(Imm) || isUInt<16>(Imm)) {
4321 int16_t Trunc =
static_cast<int16_t
>(Imm);
4381 AMDGPU::OpName::src2))
4397 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.
hasGFX90AInsts())
4417 return Mods && Mods->
getImm();
4430 switch (
MI.getOpcode()) {
4431 default:
return false;
4433 case AMDGPU::V_ADDC_U32_e64:
4434 case AMDGPU::V_SUBB_U32_e64:
4435 case AMDGPU::V_SUBBREV_U32_e64: {
4443 case AMDGPU::V_MAC_F16_e64:
4444 case AMDGPU::V_MAC_F32_e64:
4445 case AMDGPU::V_MAC_LEGACY_F32_e64:
4446 case AMDGPU::V_FMAC_F16_e64:
4447 case AMDGPU::V_FMAC_F16_fake16_e64:
4448 case AMDGPU::V_FMAC_F32_e64:
4449 case AMDGPU::V_FMAC_F64_e64:
4450 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4456 case AMDGPU::V_CNDMASK_B32_e64:
4492 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4501 unsigned Op32)
const {
4515 Inst32.
add(
MI.getOperand(
I));
4519 int Idx =
MI.getNumExplicitDefs();
4521 int OpTy =
MI.getDesc().operands()[
Idx++].OperandType;
4559 if (MO.
getReg() == AMDGPU::SGPR_NULL || MO.
getReg() == AMDGPU::SGPR_NULL64)
4564 return MO.
getReg() == AMDGPU::M0 || MO.
getReg() == AMDGPU::VCC ||
4565 MO.
getReg() == AMDGPU::VCC_LO;
4567 return AMDGPU::SReg_32RegClass.contains(MO.
getReg()) ||
4568 AMDGPU::SReg_64RegClass.contains(MO.
getReg());
4577 switch (MO.getReg()) {
4579 case AMDGPU::VCC_LO:
4580 case AMDGPU::VCC_HI:
4582 case AMDGPU::FLAT_SCR:
4595 switch (
MI.getOpcode()) {
4596 case AMDGPU::V_READLANE_B32:
4597 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
4598 case AMDGPU::V_WRITELANE_B32:
4599 case AMDGPU::SI_SPILL_S32_TO_VGPR:
4606 if (
MI.isPreISelOpcode() ||
4607 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
4622 if (
SubReg.getReg().isPhysical())
4625 return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
4637 ErrInfo =
"illegal copy from vector register to SGPR";