33#include "llvm/IR/IntrinsicsAMDGPU.h"
40#define DEBUG_TYPE "si-instr-info"
42#define GET_INSTRINFO_CTOR_DTOR
43#include "AMDGPUGenInstrInfo.inc"
46#define GET_D16ImageDimIntrinsics_IMPL
47#define GET_ImageDimIntrinsicTable_IMPL
48#define GET_RsrcIntrinsics_IMPL
49#include "AMDGPUGenSearchableTables.inc"
57 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
60 "amdgpu-fix-16-bit-physreg-copies",
61 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
77 unsigned N =
Node->getNumOperands();
78 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
90 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
91 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
93 if (Op0Idx == -1 && Op1Idx == -1)
97 if ((Op0Idx == -1 && Op1Idx != -1) ||
98 (Op1Idx == -1 && Op0Idx != -1))
119 return !
MI.memoperands_empty() &&
121 return MMO->isLoad() && MMO->isInvariant();
143 if (!
MI.hasImplicitDef() &&
144 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
145 !
MI.mayRaiseFPException())
153bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
156 if (
MI.isCompare()) {
167 switch (
Use.getOpcode()) {
168 case AMDGPU::S_AND_SAVEEXEC_B32:
169 case AMDGPU::S_AND_SAVEEXEC_B64:
171 case AMDGPU::S_AND_B32:
172 case AMDGPU::S_AND_B64:
173 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
184 if (!
MI.isConvergent())
187 switch (
MI.getOpcode()) {
190 case AMDGPU::V_READFIRSTLANE_B32:
207 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
212 for (
auto Op :
MI.uses()) {
213 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
219 if (FromCycle ==
nullptr)
225 while (FromCycle && !FromCycle->
contains(ToCycle)) {
245 int64_t &Offset1)
const {
253 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
257 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
273 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
274 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
275 if (Offset0Idx == -1 || Offset1Idx == -1)
282 Offset0Idx -=
get(Opc0).NumDefs;
283 Offset1Idx -=
get(Opc1).NumDefs;
313 if (!Load0Offset || !Load1Offset)
330 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
331 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
333 if (OffIdx0 == -1 || OffIdx1 == -1)
339 OffIdx0 -=
get(Opc0).NumDefs;
340 OffIdx1 -=
get(Opc1).NumDefs;
359 case AMDGPU::DS_READ2ST64_B32:
360 case AMDGPU::DS_READ2ST64_B64:
361 case AMDGPU::DS_WRITE2ST64_B32:
362 case AMDGPU::DS_WRITE2ST64_B64:
377 OffsetIsScalable =
false;
394 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
396 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
397 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
410 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
411 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
412 if (Offset0 + 1 != Offset1)
423 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
431 Offset = EltSize * Offset0;
433 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
434 if (DataOpIdx == -1) {
435 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
437 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
453 if (BaseOp && !BaseOp->
isFI())
461 if (SOffset->
isReg())
467 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
469 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
478 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
479 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
481 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
482 if (VAddr0Idx >= 0) {
484 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
491 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
506 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
523 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
525 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
542 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
550 if (MO1->getAddrSpace() != MO2->getAddrSpace())
553 const auto *Base1 = MO1->getValue();
554 const auto *Base2 = MO2->getValue();
555 if (!Base1 || !Base2)
563 return Base1 == Base2;
567 int64_t Offset1,
bool OffsetIsScalable1,
569 int64_t Offset2,
bool OffsetIsScalable2,
570 unsigned ClusterSize,
571 unsigned NumBytes)
const {
584 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
603 const unsigned LoadSize = NumBytes / ClusterSize;
604 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
605 return NumDWords <= MaxMemoryClusterDWords;
619 int64_t Offset0, int64_t Offset1,
620 unsigned NumLoads)
const {
621 assert(Offset1 > Offset0 &&
622 "Second offset should be larger than first offset!");
627 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
634 const char *Msg =
"illegal VGPR to SGPR copy") {
655 assert((
TII.getSubtarget().hasMAIInsts() &&
656 !
TII.getSubtarget().hasGFX90AInsts()) &&
657 "Expected GFX908 subtarget.");
660 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
661 "Source register of the copy should be either an SGPR or an AGPR.");
664 "Destination register of the copy should be an AGPR.");
673 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
676 if (!Def->modifiesRegister(SrcReg, &RI))
679 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
680 Def->getOperand(0).getReg() != SrcReg)
687 bool SafeToPropagate =
true;
690 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
691 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
692 SafeToPropagate =
false;
694 if (!SafeToPropagate)
697 for (
auto I = Def;
I !=
MI; ++
I)
698 I->clearRegisterKills(DefOp.
getReg(), &RI);
707 if (ImpUseSuperReg) {
708 Builder.addReg(ImpUseSuperReg,
716 RS.enterBasicBlockEnd(
MBB);
717 RS.backward(std::next(
MI));
726 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
729 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
730 "VGPR used for an intermediate copy should have been reserved.");
735 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
745 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
746 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
747 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
754 if (ImpUseSuperReg) {
755 UseBuilder.
addReg(ImpUseSuperReg,
776 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
777 int16_t SubIdx = BaseIndices[Idx];
778 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
779 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
780 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
781 unsigned Opcode = AMDGPU::S_MOV_B32;
784 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
785 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
786 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
790 DestSubReg = RI.getSubReg(DestReg, SubIdx);
791 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
792 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
793 Opcode = AMDGPU::S_MOV_B64;
808 assert(FirstMI && LastMI);
816 LastMI->addRegisterKilled(SrcReg, &RI);
822 Register SrcReg,
bool KillSrc,
bool RenamableDest,
823 bool RenamableSrc)
const {
825 unsigned Size = RI.getRegSizeInBits(*RC);
827 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
833 if (((
Size == 16) != (SrcSize == 16))) {
835 assert(ST.useRealTrue16Insts());
837 MCRegister SubReg = RI.getSubReg(RegToFix, AMDGPU::lo16);
840 if (DestReg == SrcReg) {
846 RC = RI.getPhysRegBaseClass(DestReg);
847 Size = RI.getRegSizeInBits(*RC);
848 SrcRC = RI.getPhysRegBaseClass(SrcReg);
849 SrcSize = RI.getRegSizeInBits(*SrcRC);
853 if (RC == &AMDGPU::VGPR_32RegClass) {
855 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
856 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
857 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
858 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
864 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
865 RC == &AMDGPU::SReg_32RegClass) {
866 if (SrcReg == AMDGPU::SCC) {
873 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
874 if (DestReg == AMDGPU::VCC_LO) {
892 if (RC == &AMDGPU::SReg_64RegClass) {
893 if (SrcReg == AMDGPU::SCC) {
900 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
901 if (DestReg == AMDGPU::VCC) {
919 if (DestReg == AMDGPU::SCC) {
922 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
926 assert(ST.hasScalarCompareEq64());
940 if (RC == &AMDGPU::AGPR_32RegClass) {
941 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
942 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
948 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
957 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
964 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
965 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
967 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
968 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
969 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
970 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
973 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
974 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
987 if (IsAGPRDst || IsAGPRSrc) {
988 if (!DstLow || !SrcLow) {
990 "Cannot use hi16 subreg with an AGPR!");
997 if (ST.useRealTrue16Insts()) {
1003 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1004 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1016 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1017 if (!DstLow || !SrcLow) {
1019 "Cannot use hi16 subreg on VI!");
1042 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1043 if (ST.hasMovB64()) {
1048 if (ST.hasPkMovB32()) {
1064 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1065 if (RI.isSGPRClass(RC)) {
1066 if (!RI.isSGPRClass(SrcRC)) {
1070 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1076 unsigned EltSize = 4;
1077 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1078 if (RI.isAGPRClass(RC)) {
1079 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1080 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1081 else if (RI.hasVGPRs(SrcRC) ||
1082 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1083 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1085 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1086 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1087 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1088 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1089 (RI.isProperlyAlignedRC(*RC) &&
1090 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1092 if (ST.hasMovB64()) {
1093 Opcode = AMDGPU::V_MOV_B64_e32;
1095 }
else if (ST.hasPkMovB32()) {
1096 Opcode = AMDGPU::V_PK_MOV_B32;
1106 std::unique_ptr<RegScavenger> RS;
1107 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1108 RS = std::make_unique<RegScavenger>();
1114 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1115 const bool CanKillSuperReg = KillSrc && !Overlap;
1117 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1120 SubIdx = SubIndices[Idx];
1122 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1123 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1124 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1125 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1127 bool IsFirstSubreg = Idx == 0;
1128 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1130 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1134 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1135 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1181 return &AMDGPU::VGPR_32RegClass;
1194 "Not a VGPR32 reg");
1196 if (
Cond.size() == 1) {
1206 }
else if (
Cond.size() == 2) {
1207 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1209 case SIInstrInfo::SCC_TRUE: {
1220 case SIInstrInfo::SCC_FALSE: {
1231 case SIInstrInfo::VCCNZ: {
1245 case SIInstrInfo::VCCZ: {
1259 case SIInstrInfo::EXECNZ: {
1272 case SIInstrInfo::EXECZ: {
1322 int64_t &ImmVal)
const {
1323 switch (
MI.getOpcode()) {
1324 case AMDGPU::V_MOV_B32_e32:
1325 case AMDGPU::S_MOV_B32:
1326 case AMDGPU::S_MOVK_I32:
1327 case AMDGPU::S_MOV_B64:
1328 case AMDGPU::V_MOV_B64_e32:
1329 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1330 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1331 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1332 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1333 case AMDGPU::V_MOV_B64_PSEUDO:
1334 case AMDGPU::V_MOV_B16_t16_e32: {
1338 return MI.getOperand(0).getReg() == Reg;
1343 case AMDGPU::V_MOV_B16_t16_e64: {
1345 if (Src0.
isImm() && !
MI.getOperand(1).getImm()) {
1347 return MI.getOperand(0).getReg() == Reg;
1352 case AMDGPU::S_BREV_B32:
1353 case AMDGPU::V_BFREV_B32_e32:
1354 case AMDGPU::V_BFREV_B32_e64: {
1358 return MI.getOperand(0).getReg() == Reg;
1363 case AMDGPU::S_NOT_B32:
1364 case AMDGPU::V_NOT_B32_e32:
1365 case AMDGPU::V_NOT_B32_e64: {
1368 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1369 return MI.getOperand(0).getReg() == Reg;
1379std::optional<int64_t>
1384 if (!
Op.isReg() || !
Op.getReg().isVirtual())
1385 return std::nullopt;
1388 if (Def && Def->isMoveImmediate()) {
1394 return std::nullopt;
1399 if (RI.isAGPRClass(DstRC))
1400 return AMDGPU::COPY;
1401 if (RI.getRegSizeInBits(*DstRC) == 16) {
1404 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1406 if (RI.getRegSizeInBits(*DstRC) == 32)
1407 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1408 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1409 return AMDGPU::S_MOV_B64;
1410 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1411 return AMDGPU::V_MOV_B64_PSEUDO;
1412 return AMDGPU::COPY;
1417 bool IsIndirectSrc)
const {
1418 if (IsIndirectSrc) {
1420 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1422 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1424 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1426 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1428 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1430 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6);
1432 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7);
1434 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1436 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1438 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1440 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1442 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1444 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1445 if (VecSize <= 1024)
1446 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1452 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1454 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1456 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1458 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1460 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1462 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6);
1464 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7);
1466 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1468 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1470 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1472 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1474 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1476 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1477 if (VecSize <= 1024)
1478 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1485 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1487 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1489 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1491 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1493 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1495 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1497 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1499 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1501 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1503 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1505 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1507 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1509 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1510 if (VecSize <= 1024)
1511 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1518 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1520 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1522 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1524 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1526 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1528 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1530 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1532 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1534 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1536 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1538 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1540 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1542 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1543 if (VecSize <= 1024)
1544 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1551 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1553 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1555 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1557 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1558 if (VecSize <= 1024)
1559 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1566 bool IsSGPR)
const {
1578 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1585 return AMDGPU::SI_SPILL_S32_SAVE;
1587 return AMDGPU::SI_SPILL_S64_SAVE;
1589 return AMDGPU::SI_SPILL_S96_SAVE;
1591 return AMDGPU::SI_SPILL_S128_SAVE;
1593 return AMDGPU::SI_SPILL_S160_SAVE;
1595 return AMDGPU::SI_SPILL_S192_SAVE;
1597 return AMDGPU::SI_SPILL_S224_SAVE;
1599 return AMDGPU::SI_SPILL_S256_SAVE;
1601 return AMDGPU::SI_SPILL_S288_SAVE;
1603 return AMDGPU::SI_SPILL_S320_SAVE;
1605 return AMDGPU::SI_SPILL_S352_SAVE;
1607 return AMDGPU::SI_SPILL_S384_SAVE;
1609 return AMDGPU::SI_SPILL_S512_SAVE;
1611 return AMDGPU::SI_SPILL_S1024_SAVE;
1620 return AMDGPU::SI_SPILL_V16_SAVE;
1622 return AMDGPU::SI_SPILL_V32_SAVE;
1624 return AMDGPU::SI_SPILL_V64_SAVE;
1626 return AMDGPU::SI_SPILL_V96_SAVE;
1628 return AMDGPU::SI_SPILL_V128_SAVE;
1630 return AMDGPU::SI_SPILL_V160_SAVE;
1632 return AMDGPU::SI_SPILL_V192_SAVE;
1634 return AMDGPU::SI_SPILL_V224_SAVE;
1636 return AMDGPU::SI_SPILL_V256_SAVE;
1638 return AMDGPU::SI_SPILL_V288_SAVE;
1640 return AMDGPU::SI_SPILL_V320_SAVE;
1642 return AMDGPU::SI_SPILL_V352_SAVE;
1644 return AMDGPU::SI_SPILL_V384_SAVE;
1646 return AMDGPU::SI_SPILL_V512_SAVE;
1648 return AMDGPU::SI_SPILL_V1024_SAVE;
1657 return AMDGPU::SI_SPILL_AV32_SAVE;
1659 return AMDGPU::SI_SPILL_AV64_SAVE;
1661 return AMDGPU::SI_SPILL_AV96_SAVE;
1663 return AMDGPU::SI_SPILL_AV128_SAVE;
1665 return AMDGPU::SI_SPILL_AV160_SAVE;
1667 return AMDGPU::SI_SPILL_AV192_SAVE;
1669 return AMDGPU::SI_SPILL_AV224_SAVE;
1671 return AMDGPU::SI_SPILL_AV256_SAVE;
1673 return AMDGPU::SI_SPILL_AV288_SAVE;
1675 return AMDGPU::SI_SPILL_AV320_SAVE;
1677 return AMDGPU::SI_SPILL_AV352_SAVE;
1679 return AMDGPU::SI_SPILL_AV384_SAVE;
1681 return AMDGPU::SI_SPILL_AV512_SAVE;
1683 return AMDGPU::SI_SPILL_AV1024_SAVE;
1690 bool IsVectorSuperClass) {
1695 if (IsVectorSuperClass)
1696 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1698 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1704 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1711 if (ST.hasMAIInsts())
1730 FrameInfo.getObjectAlign(FrameIndex));
1731 unsigned SpillSize = RI.getSpillSize(*RC);
1734 if (RI.isSGPRClass(RC)) {
1736 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1737 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1738 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1746 if (SrcReg.
isVirtual() && SpillSize == 4) {
1756 if (RI.spillSGPRToVGPR())
1776 return AMDGPU::SI_SPILL_S32_RESTORE;
1778 return AMDGPU::SI_SPILL_S64_RESTORE;
1780 return AMDGPU::SI_SPILL_S96_RESTORE;
1782 return AMDGPU::SI_SPILL_S128_RESTORE;
1784 return AMDGPU::SI_SPILL_S160_RESTORE;
1786 return AMDGPU::SI_SPILL_S192_RESTORE;
1788 return AMDGPU::SI_SPILL_S224_RESTORE;
1790 return AMDGPU::SI_SPILL_S256_RESTORE;
1792 return AMDGPU::SI_SPILL_S288_RESTORE;
1794 return AMDGPU::SI_SPILL_S320_RESTORE;
1796 return AMDGPU::SI_SPILL_S352_RESTORE;
1798 return AMDGPU::SI_SPILL_S384_RESTORE;
1800 return AMDGPU::SI_SPILL_S512_RESTORE;
1802 return AMDGPU::SI_SPILL_S1024_RESTORE;
1811 return AMDGPU::SI_SPILL_V16_RESTORE;
1813 return AMDGPU::SI_SPILL_V32_RESTORE;
1815 return AMDGPU::SI_SPILL_V64_RESTORE;
1817 return AMDGPU::SI_SPILL_V96_RESTORE;
1819 return AMDGPU::SI_SPILL_V128_RESTORE;
1821 return AMDGPU::SI_SPILL_V160_RESTORE;
1823 return AMDGPU::SI_SPILL_V192_RESTORE;
1825 return AMDGPU::SI_SPILL_V224_RESTORE;
1827 return AMDGPU::SI_SPILL_V256_RESTORE;
1829 return AMDGPU::SI_SPILL_V288_RESTORE;
1831 return AMDGPU::SI_SPILL_V320_RESTORE;
1833 return AMDGPU::SI_SPILL_V352_RESTORE;
1835 return AMDGPU::SI_SPILL_V384_RESTORE;
1837 return AMDGPU::SI_SPILL_V512_RESTORE;
1839 return AMDGPU::SI_SPILL_V1024_RESTORE;
1848 return AMDGPU::SI_SPILL_AV32_RESTORE;
1850 return AMDGPU::SI_SPILL_AV64_RESTORE;
1852 return AMDGPU::SI_SPILL_AV96_RESTORE;
1854 return AMDGPU::SI_SPILL_AV128_RESTORE;
1856 return AMDGPU::SI_SPILL_AV160_RESTORE;
1858 return AMDGPU::SI_SPILL_AV192_RESTORE;
1860 return AMDGPU::SI_SPILL_AV224_RESTORE;
1862 return AMDGPU::SI_SPILL_AV256_RESTORE;
1864 return AMDGPU::SI_SPILL_AV288_RESTORE;
1866 return AMDGPU::SI_SPILL_AV320_RESTORE;
1868 return AMDGPU::SI_SPILL_AV352_RESTORE;
1870 return AMDGPU::SI_SPILL_AV384_RESTORE;
1872 return AMDGPU::SI_SPILL_AV512_RESTORE;
1874 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1881 bool IsVectorSuperClass) {
1886 if (IsVectorSuperClass)
1887 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1889 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1895 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1902 if (ST.hasMAIInsts())
1905 assert(!RI.isAGPRClass(RC));
1919 unsigned SpillSize = RI.getSpillSize(*RC);
1926 FrameInfo.getObjectAlign(FrameIndex));
1928 if (RI.isSGPRClass(RC)) {
1930 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1931 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1932 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1937 if (DestReg.
isVirtual() && SpillSize == 4) {
1942 if (RI.spillSGPRToVGPR())
1968 unsigned Quantity)
const {
1970 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1971 while (Quantity > 0) {
1972 unsigned Arg = std::min(Quantity, MaxSNopCount);
1979 auto *MF =
MBB.getParent();
1982 assert(Info->isEntryFunction());
1984 if (
MBB.succ_empty()) {
1985 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1986 if (HasNoTerminator) {
1987 if (Info->returnsVoid()) {
2001 constexpr unsigned DoorbellIDMask = 0x3ff;
2002 constexpr unsigned ECQueueWaveAbort = 0x400;
2007 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
2008 MBB.splitAt(
MI,
false);
2012 MBB.addSuccessor(TrapBB);
2022 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2026 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2031 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2032 .
addUse(DoorbellRegMasked)
2033 .
addImm(ECQueueWaveAbort);
2034 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2035 .
addUse(SetWaveAbortBit);
2038 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2049 return MBB.getNextNode();
2053 switch (
MI.getOpcode()) {
2055 if (
MI.isMetaInstruction())
2060 return MI.getOperand(0).getImm() + 1;
2070 switch (
MI.getOpcode()) {
2072 case AMDGPU::S_MOV_B64_term:
2075 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2078 case AMDGPU::S_MOV_B32_term:
2081 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2084 case AMDGPU::S_XOR_B64_term:
2087 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2090 case AMDGPU::S_XOR_B32_term:
2093 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2095 case AMDGPU::S_OR_B64_term:
2098 MI.setDesc(
get(AMDGPU::S_OR_B64));
2100 case AMDGPU::S_OR_B32_term:
2103 MI.setDesc(
get(AMDGPU::S_OR_B32));
2106 case AMDGPU::S_ANDN2_B64_term:
2109 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2112 case AMDGPU::S_ANDN2_B32_term:
2115 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2118 case AMDGPU::S_AND_B64_term:
2121 MI.setDesc(
get(AMDGPU::S_AND_B64));
2124 case AMDGPU::S_AND_B32_term:
2127 MI.setDesc(
get(AMDGPU::S_AND_B32));
2130 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2133 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2136 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2139 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2142 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2143 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2146 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2147 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2149 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2153 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2156 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2159 int64_t Imm =
MI.getOperand(1).getImm();
2161 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2162 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2169 MI.eraseFromParent();
2175 case AMDGPU::V_MOV_B64_PSEUDO: {
2177 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2178 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2186 if (ST.hasMovB64() && Mov64RC->
contains(Dst)) {
2187 MI.setDesc(Mov64Desc);
2192 if (
SrcOp.isImm()) {
2194 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2195 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2221 if (ST.hasPkMovB32() &&
2242 MI.eraseFromParent();
2245 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2249 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2253 if (ST.has64BitLiterals()) {
2254 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2260 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2265 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2266 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2268 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2269 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2276 MI.eraseFromParent();
2279 case AMDGPU::V_SET_INACTIVE_B32: {
2283 .
add(
MI.getOperand(3))
2284 .
add(
MI.getOperand(4))
2285 .
add(
MI.getOperand(1))
2286 .
add(
MI.getOperand(2))
2287 .
add(
MI.getOperand(5));
2288 MI.eraseFromParent();
2291 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2292 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2293 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2294 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2295 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2296 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2297 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2298 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2299 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2300 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2301 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2302 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2303 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2304 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2305 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2306 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2307 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2308 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2309 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2310 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2311 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2312 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2313 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2314 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2315 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2316 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2317 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2318 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2319 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2320 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2321 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2322 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2323 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2327 if (RI.hasVGPRs(EltRC)) {
2328 Opc = AMDGPU::V_MOVRELD_B32_e32;
2330 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2331 : AMDGPU::S_MOVRELD_B32;
2336 bool IsUndef =
MI.getOperand(1).isUndef();
2337 unsigned SubReg =
MI.getOperand(3).getImm();
2338 assert(VecReg ==
MI.getOperand(1).getReg());
2343 .
add(
MI.getOperand(2))
2347 const int ImpDefIdx =
2349 const int ImpUseIdx = ImpDefIdx + 1;
2351 MI.eraseFromParent();
2354 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2355 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2356 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2357 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2358 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2359 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6:
2360 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7:
2361 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2362 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2363 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2364 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2365 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2366 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2367 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2368 assert(ST.useVGPRIndexMode());
2370 bool IsUndef =
MI.getOperand(1).isUndef();
2379 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2383 .
add(
MI.getOperand(2))
2387 const int ImpDefIdx =
2389 const int ImpUseIdx = ImpDefIdx + 1;
2396 MI.eraseFromParent();
2399 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2400 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2401 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2402 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2403 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2404 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6:
2405 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7:
2406 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2407 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2408 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2409 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2410 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2411 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2412 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2413 assert(ST.useVGPRIndexMode());
2416 bool IsUndef =
MI.getOperand(1).isUndef();
2420 .
add(
MI.getOperand(2))
2433 MI.eraseFromParent();
2436 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2439 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2440 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2459 if (ST.hasGetPCZeroExtension()) {
2463 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2470 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2480 MI.eraseFromParent();
2483 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2493 Op.setOffset(
Op.getOffset() + 4);
2495 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2499 MI.eraseFromParent();
2502 case AMDGPU::ENTER_STRICT_WWM: {
2508 case AMDGPU::ENTER_STRICT_WQM: {
2515 MI.eraseFromParent();
2518 case AMDGPU::EXIT_STRICT_WWM:
2519 case AMDGPU::EXIT_STRICT_WQM: {
2525 case AMDGPU::SI_RETURN: {
2539 MI.eraseFromParent();
2543 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2544 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2545 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2548 case AMDGPU::S_GETPC_B64_pseudo:
2549 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2550 if (ST.hasGetPCZeroExtension()) {
2552 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2561 case AMDGPU::V_MAX_BF16_PSEUDO_e64: {
2562 assert(ST.hasBF16PackedInsts());
2563 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2574 case AMDGPU::GET_STACK_BASE:
2577 if (ST.getFrameLowering()->mayReserveScratchForCWSR(*
MBB.getParent())) {
2584 Register DestReg =
MI.getOperand(0).getReg();
2594 MI.getOperand(
MI.getNumExplicitOperands()).setIsDead(
false);
2595 MI.getOperand(
MI.getNumExplicitOperands()).setIsUse();
2596 MI.setDesc(
get(AMDGPU::S_CMOVK_I32));
2599 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2602 MI.getNumExplicitOperands());
2620 case AMDGPU::S_LOAD_DWORDX16_IMM:
2621 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2634 for (
auto &CandMO :
I->operands()) {
2635 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2643 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2647 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2653 unsigned NewOpcode = -1;
2654 if (SubregSize == 256)
2655 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2656 else if (SubregSize == 128)
2657 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2667 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2672 MI->getOperand(0).setReg(DestReg);
2673 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2677 OffsetMO->
setImm(FinalOffset);
2683 MI->setMemRefs(*MF, NewMMOs);
2696std::pair<MachineInstr*, MachineInstr*>
2698 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2700 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2703 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2704 return std::pair(&
MI,
nullptr);
2715 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2717 if (Dst.isPhysical()) {
2718 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2725 for (
unsigned I = 1;
I <= 2; ++
I) {
2728 if (
SrcOp.isImm()) {
2730 Imm.ashrInPlace(Part * 32);
2731 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2735 if (Src.isPhysical())
2736 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2743 MovDPP.addImm(MO.getImm());
2745 Split[Part] = MovDPP;
2749 if (Dst.isVirtual())
2756 MI.eraseFromParent();
2757 return std::pair(Split[0], Split[1]);
2760std::optional<DestSourcePair>
2762 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2765 return std::nullopt;
2769 AMDGPU::OpName Src0OpName,
2771 AMDGPU::OpName Src1OpName)
const {
2778 "All commutable instructions have both src0 and src1 modifiers");
2780 int Src0ModsVal = Src0Mods->
getImm();
2781 int Src1ModsVal = Src1Mods->
getImm();
2783 Src1Mods->
setImm(Src0ModsVal);
2784 Src0Mods->
setImm(Src1ModsVal);
2793 bool IsKill = RegOp.
isKill();
2795 bool IsUndef = RegOp.
isUndef();
2796 bool IsDebug = RegOp.
isDebug();
2798 if (NonRegOp.
isImm())
2800 else if (NonRegOp.
isFI())
2821 int64_t NonRegVal = NonRegOp1.
getImm();
2824 NonRegOp2.
setImm(NonRegVal);
2831 unsigned OpIdx1)
const {
2836 unsigned Opc =
MI.getOpcode();
2837 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2847 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2850 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2855 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2861 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2876 unsigned Src1Idx)
const {
2877 assert(!NewMI &&
"this should never be used");
2879 unsigned Opc =
MI.getOpcode();
2881 if (CommutedOpcode == -1)
2884 if (Src0Idx > Src1Idx)
2887 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2888 static_cast<int>(Src0Idx) &&
2889 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2890 static_cast<int>(Src1Idx) &&
2891 "inconsistency with findCommutedOpIndices");
2916 Src1, AMDGPU::OpName::src1_modifiers);
2919 AMDGPU::OpName::src1_sel);
2931 unsigned &SrcOpIdx0,
2932 unsigned &SrcOpIdx1)
const {
2937 unsigned &SrcOpIdx0,
2938 unsigned &SrcOpIdx1)
const {
2939 if (!
Desc.isCommutable())
2942 unsigned Opc =
Desc.getOpcode();
2943 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2947 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2951 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2955 int64_t BrOffset)
const {
2972 return MI.getOperand(0).getMBB();
2977 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
2978 MI.getOpcode() == AMDGPU::SI_LOOP)
2990 "new block should be inserted for expanding unconditional branch");
2993 "restore block should be inserted for restoring clobbered registers");
3001 if (ST.useAddPC64Inst()) {
3003 MCCtx.createTempSymbol(
"offset",
true);
3007 MCCtx.createTempSymbol(
"post_addpc",
true);
3008 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
3012 Offset->setVariableValue(OffsetExpr);
3016 assert(RS &&
"RegScavenger required for long branching");
3024 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
3025 ST.hasVALUReadSGPRHazard();
3026 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
3027 if (FlushSGPRWrites)
3035 ApplyHazardWorkarounds();
3038 MCCtx.createTempSymbol(
"post_getpc",
true);
3042 MCCtx.createTempSymbol(
"offset_lo",
true);
3044 MCCtx.createTempSymbol(
"offset_hi",
true);
3047 .
addReg(PCReg, {}, AMDGPU::sub0)
3051 .
addReg(PCReg, {}, AMDGPU::sub1)
3053 ApplyHazardWorkarounds();
3094 if (LongBranchReservedReg) {
3095 RS->enterBasicBlock(
MBB);
3096 Scav = LongBranchReservedReg;
3098 RS->enterBasicBlockEnd(
MBB);
3099 Scav = RS->scavengeRegisterBackwards(
3104 RS->setRegUsed(Scav);
3112 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3129unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3131 case SIInstrInfo::SCC_TRUE:
3132 return AMDGPU::S_CBRANCH_SCC1;
3133 case SIInstrInfo::SCC_FALSE:
3134 return AMDGPU::S_CBRANCH_SCC0;
3135 case SIInstrInfo::VCCNZ:
3136 return AMDGPU::S_CBRANCH_VCCNZ;
3137 case SIInstrInfo::VCCZ:
3138 return AMDGPU::S_CBRANCH_VCCZ;
3139 case SIInstrInfo::EXECNZ:
3140 return AMDGPU::S_CBRANCH_EXECNZ;
3141 case SIInstrInfo::EXECZ:
3142 return AMDGPU::S_CBRANCH_EXECZ;
3148SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3150 case AMDGPU::S_CBRANCH_SCC0:
3152 case AMDGPU::S_CBRANCH_SCC1:
3154 case AMDGPU::S_CBRANCH_VCCNZ:
3156 case AMDGPU::S_CBRANCH_VCCZ:
3158 case AMDGPU::S_CBRANCH_EXECNZ:
3160 case AMDGPU::S_CBRANCH_EXECZ:
3172 bool AllowModify)
const {
3173 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3175 TBB =
I->getOperand(0).getMBB();
3179 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3180 if (Pred == INVALID_BR)
3185 Cond.push_back(
I->getOperand(1));
3189 if (
I ==
MBB.end()) {
3195 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3197 FBB =
I->getOperand(0).getMBB();
3207 bool AllowModify)
const {
3215 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3216 switch (
I->getOpcode()) {
3217 case AMDGPU::S_MOV_B64_term:
3218 case AMDGPU::S_XOR_B64_term:
3219 case AMDGPU::S_OR_B64_term:
3220 case AMDGPU::S_ANDN2_B64_term:
3221 case AMDGPU::S_AND_B64_term:
3222 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3223 case AMDGPU::S_MOV_B32_term:
3224 case AMDGPU::S_XOR_B32_term:
3225 case AMDGPU::S_OR_B32_term:
3226 case AMDGPU::S_ANDN2_B32_term:
3227 case AMDGPU::S_AND_B32_term:
3228 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3231 case AMDGPU::SI_ELSE:
3232 case AMDGPU::SI_KILL_I1_TERMINATOR:
3233 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3250 int *BytesRemoved)
const {
3252 unsigned RemovedSize = 0;
3255 if (
MI.isBranch() ||
MI.isReturn()) {
3257 MI.eraseFromParent();
3263 *BytesRemoved = RemovedSize;
3280 int *BytesAdded)
const {
3281 if (!FBB &&
Cond.empty()) {
3285 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3292 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3304 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3322 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3329 if (
Cond.size() != 2) {
3333 if (
Cond[0].isImm()) {
3344 Register FalseReg,
int &CondCycles,
3345 int &TrueCycles,
int &FalseCycles)
const {
3355 CondCycles = TrueCycles = FalseCycles = NumInsts;
3358 return RI.hasVGPRs(RC) && NumInsts <= 6;
3372 if (NumInsts % 2 == 0)
3375 CondCycles = TrueCycles = FalseCycles = NumInsts;
3376 return RI.isSGPRClass(RC);
3387 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3388 if (Pred == VCCZ || Pred == SCC_FALSE) {
3389 Pred =
static_cast<BranchPredicate
>(-Pred);
3395 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3397 if (DstSize == 32) {
3399 if (Pred == SCC_TRUE) {
3414 if (DstSize == 64 && Pred == SCC_TRUE) {
3424 static const int16_t Sub0_15[] = {
3425 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3426 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3427 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3428 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3431 static const int16_t Sub0_15_64[] = {
3432 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3433 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3434 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3435 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3438 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3440 const int16_t *SubIndices = Sub0_15;
3441 int NElts = DstSize / 32;
3445 if (Pred == SCC_TRUE) {
3447 SelOp = AMDGPU::S_CSELECT_B32;
3448 EltRC = &AMDGPU::SGPR_32RegClass;
3450 SelOp = AMDGPU::S_CSELECT_B64;
3451 EltRC = &AMDGPU::SGPR_64RegClass;
3452 SubIndices = Sub0_15_64;
3458 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3463 for (
int Idx = 0; Idx != NElts; ++Idx) {
3467 unsigned SubIdx = SubIndices[Idx];
3470 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3472 .
addReg(FalseReg, {}, SubIdx)
3473 .addReg(TrueReg, {}, SubIdx);
3476 .
addReg(TrueReg, {}, SubIdx)
3477 .addReg(FalseReg, {}, SubIdx);
3489 switch (
MI.getOpcode()) {
3490 case AMDGPU::V_MOV_B16_t16_e32:
3491 case AMDGPU::V_MOV_B16_t16_e64:
3492 case AMDGPU::V_MOV_B32_e32:
3493 case AMDGPU::V_MOV_B32_e64:
3494 case AMDGPU::V_MOV_B64_PSEUDO:
3495 case AMDGPU::V_MOV_B64_e32:
3496 case AMDGPU::V_MOV_B64_e64:
3497 case AMDGPU::S_MOV_B32:
3498 case AMDGPU::S_MOV_B64:
3499 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3501 case AMDGPU::WWM_COPY:
3502 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3503 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3504 case AMDGPU::V_ACCVGPR_MOV_B32:
3505 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3506 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3514 switch (
MI.getOpcode()) {
3515 case AMDGPU::V_MOV_B16_t16_e32:
3516 case AMDGPU::V_MOV_B16_t16_e64:
3518 case AMDGPU::V_MOV_B32_e32:
3519 case AMDGPU::V_MOV_B32_e64:
3520 case AMDGPU::V_MOV_B64_PSEUDO:
3521 case AMDGPU::V_MOV_B64_e32:
3522 case AMDGPU::V_MOV_B64_e64:
3523 case AMDGPU::S_MOV_B32:
3524 case AMDGPU::S_MOV_B64:
3525 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3527 case AMDGPU::WWM_COPY:
3528 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3529 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3530 case AMDGPU::V_ACCVGPR_MOV_B32:
3531 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3532 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3540 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3541 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3542 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3545 unsigned Opc =
MI.getOpcode();
3547 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3549 MI.removeOperand(Idx);
3555 MI.setDesc(NewDesc);
3561 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
3562 Desc.implicit_defs().size();
3564 for (
unsigned I =
MI.getNumOperands() - 1;
I >=
NumOps; --
I)
3565 MI.removeOperand(
I);
3569 unsigned SubRegIndex) {
3570 switch (SubRegIndex) {
3571 case AMDGPU::NoSubRegister:
3581 case AMDGPU::sub1_lo16:
3583 case AMDGPU::sub1_hi16:
3586 return std::nullopt;
3594 case AMDGPU::V_MAC_F16_e32:
3595 case AMDGPU::V_MAC_F16_e64:
3596 case AMDGPU::V_MAD_F16_e64:
3597 return AMDGPU::V_MADAK_F16;
3598 case AMDGPU::V_MAC_F32_e32:
3599 case AMDGPU::V_MAC_F32_e64:
3600 case AMDGPU::V_MAD_F32_e64:
3601 return AMDGPU::V_MADAK_F32;
3602 case AMDGPU::V_FMAC_F32_e32:
3603 case AMDGPU::V_FMAC_F32_e64:
3604 case AMDGPU::V_FMA_F32_e64:
3605 return AMDGPU::V_FMAAK_F32;
3606 case AMDGPU::V_FMAC_F16_e32:
3607 case AMDGPU::V_FMAC_F16_e64:
3608 case AMDGPU::V_FMAC_F16_t16_e64:
3609 case AMDGPU::V_FMAC_F16_fake16_e64:
3610 case AMDGPU::V_FMAC_F16_t16_e32:
3611 case AMDGPU::V_FMAC_F16_fake16_e32:
3612 case AMDGPU::V_FMA_F16_e64:
3613 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3614 ? AMDGPU::V_FMAAK_F16_t16
3615 : AMDGPU::V_FMAAK_F16_fake16
3616 : AMDGPU::V_FMAAK_F16;
3617 case AMDGPU::V_FMAC_F64_e32:
3618 case AMDGPU::V_FMAC_F64_e64:
3619 case AMDGPU::V_FMA_F64_e64:
3620 return AMDGPU::V_FMAAK_F64;
3628 case AMDGPU::V_MAC_F16_e32:
3629 case AMDGPU::V_MAC_F16_e64:
3630 case AMDGPU::V_MAD_F16_e64:
3631 return AMDGPU::V_MADMK_F16;
3632 case AMDGPU::V_MAC_F32_e32:
3633 case AMDGPU::V_MAC_F32_e64:
3634 case AMDGPU::V_MAD_F32_e64:
3635 return AMDGPU::V_MADMK_F32;
3636 case AMDGPU::V_FMAC_F32_e32:
3637 case AMDGPU::V_FMAC_F32_e64:
3638 case AMDGPU::V_FMA_F32_e64:
3639 return AMDGPU::V_FMAMK_F32;
3640 case AMDGPU::V_FMAC_F16_e32:
3641 case AMDGPU::V_FMAC_F16_e64:
3642 case AMDGPU::V_FMAC_F16_t16_e64:
3643 case AMDGPU::V_FMAC_F16_fake16_e64:
3644 case AMDGPU::V_FMAC_F16_t16_e32:
3645 case AMDGPU::V_FMAC_F16_fake16_e32:
3646 case AMDGPU::V_FMA_F16_e64:
3647 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3648 ? AMDGPU::V_FMAMK_F16_t16
3649 : AMDGPU::V_FMAMK_F16_fake16
3650 : AMDGPU::V_FMAMK_F16;
3651 case AMDGPU::V_FMAC_F64_e32:
3652 case AMDGPU::V_FMAC_F64_e64:
3653 case AMDGPU::V_FMA_F64_e64:
3654 return AMDGPU::V_FMAMK_F64;
3668 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3671 if (
Opc == AMDGPU::COPY) {
3672 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3679 if (HasMultipleUses) {
3682 unsigned ImmDefSize = RI.getRegSizeInBits(*MRI->
getRegClass(Reg));
3685 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3693 if (ImmDefSize == 32 &&
3698 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3699 RI.getSubRegIdxSize(UseSubReg) == 16;
3702 if (RI.hasVGPRs(DstRC))
3705 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3711 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3718 for (
unsigned MovOp :
3719 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3720 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3728 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3732 if (MovDstPhysReg) {
3736 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3743 if (MovDstPhysReg) {
3744 if (!MovDstRC->
contains(MovDstPhysReg))
3760 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3768 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3772 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3774 UseMI.getOperand(0).setReg(MovDstPhysReg);
3779 UseMI.setDesc(NewMCID);
3780 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3781 UseMI.addImplicitDefUseOperands(*MF);
3785 if (HasMultipleUses)
3788 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3789 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3790 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3791 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3792 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3793 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3794 Opc == AMDGPU::V_FMAC_F64_e64) {
3803 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3814 auto CopyRegOperandToNarrowerRC =
3817 if (!
MI.getOperand(OpNo).isReg())
3821 if (RI.getCommonSubClass(RC, NewRC) != NewRC)
3824 BuildMI(*
MI.getParent(),
MI.getIterator(),
MI.getDebugLoc(),
3825 get(AMDGPU::COPY), Tmp)
3827 MI.getOperand(OpNo).setReg(Tmp);
3828 MI.getOperand(OpNo).setIsKill();
3835 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3836 if (!RegSrc->
isReg())
3839 ST.getConstantBusLimit(
Opc) < 2)
3854 if (Def && Def->isMoveImmediate() &&
3869 unsigned SrcSubReg = RegSrc->
getSubReg();
3874 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3875 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3876 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3877 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3878 UseMI.untieRegOperand(
3879 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3886 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3887 NewOpc == AMDGPU::V_FMAMK_F16_fake16) {
3891 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3892 UseMI.getOperand(0).getReg())
3894 UseMI.getOperand(0).setReg(Tmp);
3895 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3896 CopyRegOperandToNarrowerRC(
UseMI, 3, NewRC);
3901 DefMI.eraseFromParent();
3908 if (ST.getConstantBusLimit(
Opc) < 2) {
3911 bool Src0Inlined =
false;
3912 if (Src0->
isReg()) {
3917 if (Def && Def->isMoveImmediate() &&
3922 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3923 RI.isSGPRReg(*MRI, Src0->
getReg())) {
3929 if (Src1->
isReg() && !Src0Inlined) {
3932 if (Def && Def->isMoveImmediate() &&
3936 else if (RI.isSGPRReg(*MRI, Src1->
getReg()))
3949 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3950 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3951 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3952 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3953 UseMI.untieRegOperand(
3954 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3956 const std::optional<int64_t> SubRegImm =
3966 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3967 NewOpc == AMDGPU::V_FMAAK_F16_fake16) {
3971 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3972 UseMI.getOperand(0).getReg())
3974 UseMI.getOperand(0).setReg(Tmp);
3975 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3976 CopyRegOperandToNarrowerRC(
UseMI, 2, NewRC);
3986 DefMI.eraseFromParent();
3998 if (BaseOps1.
size() != BaseOps2.
size())
4000 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
4001 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
4009 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
4010 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
4011 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
4013 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
4016bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
4019 int64_t Offset0, Offset1;
4022 bool Offset0IsScalable, Offset1IsScalable;
4036 LocationSize Width0 = MIa.
memoperands().front()->getSize();
4037 LocationSize Width1 = MIb.
memoperands().front()->getSize();
4044 "MIa must load from or modify a memory location");
4046 "MIb must load from or modify a memory location");
4068 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4075 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4085 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4099 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4110 if (
Reg.isPhysical())
4114 Imm = Def->getOperand(1).getImm();
4134 unsigned NumOps =
MI.getNumOperands();
4137 if (
Op.isReg() &&
Op.isKill())
4145 case AMDGPU::V_MAC_F16_e32:
4146 case AMDGPU::V_MAC_F16_e64:
4147 return AMDGPU::V_MAD_F16_e64;
4148 case AMDGPU::V_MAC_F32_e32:
4149 case AMDGPU::V_MAC_F32_e64:
4150 return AMDGPU::V_MAD_F32_e64;
4151 case AMDGPU::V_MAC_LEGACY_F32_e32:
4152 case AMDGPU::V_MAC_LEGACY_F32_e64:
4153 return AMDGPU::V_MAD_LEGACY_F32_e64;
4154 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4155 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4156 return AMDGPU::V_FMA_LEGACY_F32_e64;
4157 case AMDGPU::V_FMAC_F16_e32:
4158 case AMDGPU::V_FMAC_F16_e64:
4159 case AMDGPU::V_FMAC_F16_t16_e64:
4160 case AMDGPU::V_FMAC_F16_fake16_e64:
4161 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4162 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4163 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4164 : AMDGPU::V_FMA_F16_gfx9_e64;
4165 case AMDGPU::V_FMAC_F32_e32:
4166 case AMDGPU::V_FMAC_F32_e64:
4167 return AMDGPU::V_FMA_F32_e64;
4168 case AMDGPU::V_FMAC_F64_e32:
4169 case AMDGPU::V_FMAC_F64_e64:
4170 return AMDGPU::V_FMA_F64_e64;
4190 if (
MI.isBundle()) {
4193 if (
MI.getBundleSize() != 1)
4195 CandidateMI =
MI.getNextNode();
4199 MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4203 if (
MI.isBundle()) {
4208 MI.untieRegOperand(MO.getOperandNo());
4216 if (Def.isEarlyClobber() && Def.isReg() &&
4221 auto UpdateDefIndex = [&](
LiveRange &LR) {
4222 auto *S = LR.find(OldIndex);
4223 if (S != LR.end() && S->start == OldIndex) {
4224 assert(S->valno && S->valno->def == OldIndex);
4225 S->start = NewIndex;
4226 S->valno->def = NewIndex;
4230 for (
auto &SR : LI.subranges())
4236 if (U.RemoveMIUse) {
4239 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4243 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4244 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4245 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4246 U.RemoveMIUse->removeOperand(
I);
4251 if (
MI.isBundle()) {
4255 if (MO.isReg() && MO.getReg() == DefReg) {
4256 assert(MO.getSubReg() == 0 &&
4257 "tied sub-registers in bundles currently not supported");
4258 MI.removeOperand(MO.getOperandNo());
4275 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4276 MIOp.setIsUndef(
true);
4277 MIOp.setReg(DummyReg);
4281 if (
MI.isBundle()) {
4285 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4286 MIOp.setIsUndef(
true);
4287 MIOp.setReg(DummyReg);
4300 return MI.isBundle() ? &
MI : NewMI;
4305 ThreeAddressUpdates &U)
const {
4307 unsigned Opc =
MI.getOpcode();
4311 if (NewMFMAOpc != -1) {
4314 for (
unsigned I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I)
4315 MIB.
add(
MI.getOperand(
I));
4323 for (
unsigned I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I)
4328 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4329 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4330 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4334 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4335 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4336 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4337 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4338 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4339 bool Src0Literal =
false;
4344 case AMDGPU::V_MAC_F16_e64:
4345 case AMDGPU::V_FMAC_F16_e64:
4346 case AMDGPU::V_FMAC_F16_t16_e64:
4347 case AMDGPU::V_FMAC_F16_fake16_e64:
4348 case AMDGPU::V_MAC_F32_e64:
4349 case AMDGPU::V_MAC_LEGACY_F32_e64:
4350 case AMDGPU::V_FMAC_F32_e64:
4351 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4352 case AMDGPU::V_FMAC_F64_e64:
4354 case AMDGPU::V_MAC_F16_e32:
4355 case AMDGPU::V_FMAC_F16_e32:
4356 case AMDGPU::V_MAC_F32_e32:
4357 case AMDGPU::V_MAC_LEGACY_F32_e32:
4358 case AMDGPU::V_FMAC_F32_e32:
4359 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4360 case AMDGPU::V_FMAC_F64_e32: {
4361 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4362 AMDGPU::OpName::src0);
4363 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4374 MachineInstrBuilder MIB;
4377 const MachineOperand *Src0Mods =
4380 const MachineOperand *Src1Mods =
4383 const MachineOperand *Src2Mods =
4389 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4390 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4392 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4394 MachineInstr *
DefMI;
4430 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4446 if (Src0Literal && !ST.hasVOP3Literal())
4474 switch (
MI.getOpcode()) {
4475 case AMDGPU::S_SET_GPR_IDX_ON:
4476 case AMDGPU::S_SET_GPR_IDX_MODE:
4477 case AMDGPU::S_SET_GPR_IDX_OFF:
4495 if (
MI.isTerminator() ||
MI.isPosition())
4499 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4502 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4508 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4509 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4510 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4511 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4512 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4517 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4518 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4519 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4533 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4538 if (
MI.memoperands_empty())
4543 unsigned AS = Memop->getAddrSpace();
4544 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4545 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4546 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4547 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4562 if (
MI.memoperands_empty())
4571 unsigned AS = Memop->getAddrSpace();
4588 if (ST.isTgSplitEnabled())
4593 if (
MI.memoperands_empty())
4598 unsigned AS = Memop->getAddrSpace();
4614 unsigned Opcode =
MI.getOpcode();
4629 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4630 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4631 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT ||
4632 Opcode == AMDGPU::S_SETHALT)
4635 if (
MI.isCall() ||
MI.isInlineAsm())
4651 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4652 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4653 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4654 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4662 if (
MI.isMetaInstruction())
4666 if (
MI.isCopyLike()) {
4667 if (!RI.isSGPRReg(MRI,
MI.getOperand(0).getReg()))
4671 return MI.readsRegister(AMDGPU::EXEC, &RI);
4682 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4686 switch (Imm.getBitWidth()) {
4692 ST.hasInv2PiInlineImm());
4695 ST.hasInv2PiInlineImm());
4697 return ST.has16BitInsts() &&
4699 ST.hasInv2PiInlineImm());
4706 APInt IntImm = Imm.bitcastToAPInt();
4708 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4716 return ST.has16BitInsts() &&
4719 return ST.has16BitInsts() &&
4729 switch (OperandType) {
4739 int32_t Trunc =
static_cast<int32_t
>(Imm);
4781 int16_t Trunc =
static_cast<int16_t
>(Imm);
4782 return ST.has16BitInsts() &&
4791 int16_t Trunc =
static_cast<int16_t
>(Imm);
4792 return ST.has16BitInsts() &&
4843 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4849 return ST.hasVOP3Literal();
4853 int64_t ImmVal)
const {
4856 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4857 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4858 AMDGPU::OpName::src2))
4860 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4872 "unexpected imm-like operand kind");
4885 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4903 AMDGPU::OpName
OpName)
const {
4905 return Mods && Mods->
getImm();
4918 switch (
MI.getOpcode()) {
4919 default:
return false;
4921 case AMDGPU::V_ADDC_U32_e64:
4922 case AMDGPU::V_SUBB_U32_e64:
4923 case AMDGPU::V_SUBBREV_U32_e64: {
4926 if (!Src1->
isReg() || !RI.isVGPR(MRI, Src1->
getReg()))
4931 case AMDGPU::V_MAC_F16_e64:
4932 case AMDGPU::V_MAC_F32_e64:
4933 case AMDGPU::V_MAC_LEGACY_F32_e64:
4934 case AMDGPU::V_FMAC_F16_e64:
4935 case AMDGPU::V_FMAC_F16_t16_e64:
4936 case AMDGPU::V_FMAC_F16_fake16_e64:
4937 case AMDGPU::V_FMAC_F32_e64:
4938 case AMDGPU::V_FMAC_F64_e64:
4939 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4940 if (!Src2->
isReg() || !RI.isVGPR(MRI, Src2->
getReg()) ||
4945 case AMDGPU::V_CNDMASK_B32_e64:
4951 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(MRI, Src1->
getReg()) ||
4981 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
4990 unsigned Op32)
const {
5004 Inst32.
add(
MI.getOperand(
I));
5008 int Idx =
MI.getNumExplicitDefs();
5010 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
5015 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
5037 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
5045 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
5048 return AMDGPU::SReg_32RegClass.contains(Reg) ||
5049 AMDGPU::SReg_64RegClass.contains(Reg);
5077 switch (MO.getReg()) {
5079 case AMDGPU::VCC_LO:
5080 case AMDGPU::VCC_HI:
5082 case AMDGPU::FLAT_SCR:
5095 switch (
MI.getOpcode()) {
5096 case AMDGPU::V_READLANE_B32:
5097 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
5098 case AMDGPU::V_WRITELANE_B32:
5099 case AMDGPU::SI_SPILL_S32_TO_VGPR:
5106 if (
MI.isPreISelOpcode() ||
5107 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
5125 return SubReg.
getSubReg() != AMDGPU::NoSubRegister &&
5136 if (RI.isVectorRegister(MRI, SrcReg) && RI.isSGPRReg(MRI, DstReg)) {
5137 ErrInfo =
"illegal copy from vector register to SGPR";
5155 if (!MRI.
isSSA() &&
MI.isCopy())
5156 return verifyCopy(
MI, MRI, ErrInfo);
5158 if (SIInstrInfo::isGenericOpcode(Opcode))
5161 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
5162 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
5163 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
5165 if (Src0Idx == -1) {
5167 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
5168 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
5169 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
5170 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
5175 if (!
Desc.isVariadic() &&
5176 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
5177 ErrInfo =
"Instruction has wrong number of operands.";
5181 if (
MI.isInlineAsm()) {
5194 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5195 ErrInfo =
"inlineasm operand has incorrect register class.";
5203 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5204 ErrInfo =
"missing memory operand from image instruction.";
5209 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5212 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5213 "all fp values to integers.";
5218 int16_t RegClass = getOpRegClassID(OpInfo);
5220 switch (OpInfo.OperandType) {
5222 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5223 ErrInfo =
"Illegal immediate value for operand.";
5258 ErrInfo =
"Illegal immediate value for operand.";
5265 ErrInfo =
"Expected inline constant for operand.";
5280 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5281 ErrInfo =
"Expected immediate, but got non-immediate";
5290 if (OpInfo.isGenericType())
5305 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO &&
5306 Opcode != AMDGPU::V_MOV_B64_PSEUDO && !
isSpill(
MI)) {
5308 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5310 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5311 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5318 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5319 ErrInfo =
"Subtarget requires even aligned vector registers";
5324 if (RegClass != -1) {
5325 if (Reg.isVirtual())
5330 ErrInfo =
"Operand has incorrect register class.";
5338 if (!ST.hasSDWA()) {
5339 ErrInfo =
"SDWA is not supported on this target";
5343 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5344 AMDGPU::OpName::dst_sel}) {
5348 int64_t Imm = MO->
getImm();
5350 ErrInfo =
"Invalid SDWA selection";
5355 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5357 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5362 if (!ST.hasSDWAScalar()) {
5364 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.
getReg()))) {
5365 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5372 "Only reg allowed as operands in SDWA instructions on GFX9+";
5378 if (!ST.hasSDWAOmod()) {
5381 if (OMod !=
nullptr &&
5383 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5388 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5389 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5390 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5391 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5394 unsigned Mods = Src0ModsMO->
getImm();
5397 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5403 if (
isVOPC(BasicOpcode)) {
5404 if (!ST.hasSDWASdst() && DstIdx != -1) {
5407 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5408 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5411 }
else if (!ST.hasSDWAOutModsVOPC()) {
5414 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5415 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5421 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5422 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5429 if (DstUnused && DstUnused->isImm() &&
5432 if (!Dst.isReg() || !Dst.isTied()) {
5433 ErrInfo =
"Dst register should have tied register";
5438 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5441 "Dst register should be tied to implicit use of preserved register";
5445 ErrInfo =
"Dst register should use same physical register as preserved";
5452 if (
isImage(Opcode) && !
MI.mayStore()) {
5464 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5472 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5476 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5477 if (RegCount > DstSize) {
5478 ErrInfo =
"Image instruction returns too many registers for dst "
5487 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5488 unsigned ConstantBusCount = 0;
5489 bool UsesLiteral =
false;
5492 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5496 LiteralVal = &
MI.getOperand(ImmIdx);
5505 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5516 }
else if (!MO.
isFI()) {
5523 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5533 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5534 return !RI.regsOverlap(SGPRUsed, SGPR);
5543 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5544 Opcode != AMDGPU::V_WRITELANE_B32) {
5545 ErrInfo =
"VOP* instruction violates constant bus restriction";
5549 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5550 ErrInfo =
"VOP3 instruction uses literal";
5557 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5558 unsigned SGPRCount = 0;
5561 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5569 if (MO.
getReg() != SGPRUsed)
5574 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5575 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5582 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5583 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5590 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5600 ErrInfo =
"ABS not allowed in VOP3B instructions";
5613 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5620 if (
Desc.isBranch()) {
5622 ErrInfo =
"invalid branch target for SOPK instruction";
5629 ErrInfo =
"invalid immediate for SOPK instruction";
5634 ErrInfo =
"invalid immediate for SOPK instruction";
5641 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5642 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5643 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5644 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5645 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5646 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5648 const unsigned StaticNumOps =
5649 Desc.getNumOperands() +
Desc.implicit_uses().size();
5650 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5656 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5657 ErrInfo =
"missing implicit register operands";
5663 if (!Dst->isUse()) {
5664 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5669 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5670 UseOpIdx != StaticNumOps + 1) {
5671 ErrInfo =
"movrel implicit operands should be tied";
5678 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5680 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5681 ErrInfo =
"src0 should be subreg of implicit vector use";
5689 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5690 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5696 if (
MI.mayStore() &&
5701 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5702 ErrInfo =
"scalar stores must use m0 as offset register";
5708 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5710 if (
Offset->getImm() != 0) {
5711 ErrInfo =
"subtarget does not support offsets in flat instructions";
5716 if (
isDS(
MI) && !ST.hasGDS()) {
5718 if (GDSOp && GDSOp->
getImm() != 0) {
5719 ErrInfo =
"GDS is not supported on this subtarget";
5727 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5728 AMDGPU::OpName::vaddr0);
5729 AMDGPU::OpName RSrcOpName =
5730 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5731 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5739 ErrInfo =
"dim is out of range";
5744 if (ST.hasR128A16()) {
5746 IsA16 = R128A16->
getImm() != 0;
5747 }
else if (ST.hasA16()) {
5749 IsA16 = A16->
getImm() != 0;
5752 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5754 unsigned AddrWords =
5757 unsigned VAddrWords;
5759 VAddrWords = RsrcIdx - VAddr0Idx;
5760 if (ST.hasPartialNSAEncoding() &&
5762 unsigned LastVAddrIdx = RsrcIdx - 1;
5763 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5771 if (VAddrWords != AddrWords) {
5773 <<
" but got " << VAddrWords <<
"\n");
5774 ErrInfo =
"bad vaddr size";
5784 unsigned DC = DppCt->
getImm();
5785 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5786 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5787 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5788 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5789 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5790 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5791 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5792 ErrInfo =
"Invalid dpp_ctrl value";
5795 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5796 !ST.hasDPPWavefrontShifts()) {
5797 ErrInfo =
"Invalid dpp_ctrl value: "
5798 "wavefront shifts are not supported on GFX10+";
5801 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5802 !ST.hasDPPBroadcasts()) {
5803 ErrInfo =
"Invalid dpp_ctrl value: "
5804 "broadcasts are not supported on GFX10+";
5807 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5809 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5810 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5811 !ST.hasGFX90AInsts()) {
5812 ErrInfo =
"Invalid dpp_ctrl value: "
5813 "row_newbroadcast/row_share is not supported before "
5817 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5818 ErrInfo =
"Invalid dpp_ctrl value: "
5819 "row_share and row_xmask are not supported before GFX10";
5824 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5827 ErrInfo =
"Invalid dpp_ctrl value: "
5828 "DP ALU dpp only support row_newbcast";
5835 AMDGPU::OpName DataName =
5836 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5842 if (ST.hasGFX90AInsts()) {
5843 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5844 (RI.isAGPR(MRI, Dst->getReg()) != RI.isAGPR(MRI,
Data->getReg()))) {
5845 ErrInfo =
"Invalid register class: "
5846 "vdata and vdst should be both VGPR or AGPR";
5849 if (
Data && Data2 &&
5850 (RI.isAGPR(MRI,
Data->getReg()) != RI.isAGPR(MRI, Data2->
getReg()))) {
5851 ErrInfo =
"Invalid register class: "
5852 "both data operands should be VGPR or AGPR";
5856 if ((Dst && RI.isAGPR(MRI, Dst->getReg())) ||
5857 (
Data && RI.isAGPR(MRI,
Data->getReg())) ||
5858 (Data2 && RI.isAGPR(MRI, Data2->
getReg()))) {
5859 ErrInfo =
"Invalid register class: "
5860 "agpr loads and stores not supported on this GPU";
5866 if (ST.needsAlignedVGPRs()) {
5867 const auto isAlignedReg = [&
MI, &MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5872 if (Reg.isPhysical())
5873 return !(RI.getHWRegIndex(Reg) & 1);
5875 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5876 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5879 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5880 Opcode == AMDGPU::DS_GWS_BARRIER) {
5882 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5883 ErrInfo =
"Subtarget requires even aligned vector registers "
5884 "for DS_GWS instructions";
5890 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5891 ErrInfo =
"Subtarget requires even aligned vector registers "
5892 "for vaddr operand of image instructions";
5898 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5900 if (Src->isReg() && RI.isSGPRReg(MRI, Src->getReg())) {
5901 ErrInfo =
"Invalid register class: "
5902 "v_accvgpr_write with an SGPR is not supported on this GPU";
5907 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5910 ErrInfo =
"pseudo expects only physical SGPRs";
5917 if (!ST.hasScaleOffset()) {
5918 ErrInfo =
"Subtarget does not support offset scaling";
5922 ErrInfo =
"Instruction does not support offset scaling";
5931 for (
unsigned I = 0;
I < 3; ++
I) {
5937 if (ST.hasFlatScratchHiInB64InstHazard() &&
isSALU(
MI) &&
5938 MI.readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI,
nullptr)) {
5940 if ((Dst && RI.getRegClassForReg(MRI, Dst->getReg()) ==
5941 &AMDGPU::SReg_64RegClass) ||
5942 Opcode == AMDGPU::S_BITCMP0_B64 || Opcode == AMDGPU::S_BITCMP1_B64) {
5943 ErrInfo =
"Instruction cannot read flat_scratch_base_hi";
5955 switch (
MI.getOpcode()) {
5956 default:
return AMDGPU::INSTRUCTION_LIST_END;
5957 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5958 case AMDGPU::COPY:
return AMDGPU::COPY;
5959 case AMDGPU::PHI:
return AMDGPU::PHI;
5960 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
5961 case AMDGPU::WQM:
return AMDGPU::WQM;
5962 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
5963 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
5964 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
5965 case AMDGPU::S_MOV_B32: {
5967 return MI.getOperand(1).isReg() ||
5968 RI.isAGPR(MRI,
MI.getOperand(0).getReg()) ?
5969 AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
5971 case AMDGPU::S_ADD_I32:
5972 return ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
5973 case AMDGPU::S_ADDC_U32:
5974 return AMDGPU::V_ADDC_U32_e32;
5975 case AMDGPU::S_SUB_I32:
5976 return ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
5979 case AMDGPU::S_ADD_U32:
5980 return AMDGPU::V_ADD_CO_U32_e32;
5981 case AMDGPU::S_SUB_U32:
5982 return AMDGPU::V_SUB_CO_U32_e32;
5983 case AMDGPU::S_ADD_U64_PSEUDO:
5984 return AMDGPU::V_ADD_U64_PSEUDO;
5985 case AMDGPU::S_SUB_U64_PSEUDO:
5986 return AMDGPU::V_SUB_U64_PSEUDO;
5987 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
5988 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
5989 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
5990 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
5991 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
5992 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
5993 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
5994 case AMDGPU::S_XNOR_B32:
5995 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
5996 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
5997 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
5998 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
5999 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
6000 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
6001 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
6002 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
6003 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
6004 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
6005 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
6006 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
6007 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
6008 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
6009 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
6010 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
6011 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
6012 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
6013 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
6014 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
6015 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
6016 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
6017 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
6018 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
6019 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
6020 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
6021 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
6022 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
6023 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
6024 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
6025 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
6026 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
6027 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
6028 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
6029 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
6030 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
6031 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
6032 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
6033 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
6034 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
6035 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
6036 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
6037 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
6038 case AMDGPU::S_CVT_F32_F16:
6039 case AMDGPU::S_CVT_HI_F32_F16:
6040 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
6041 : AMDGPU::V_CVT_F32_F16_fake16_e64;
6042 case AMDGPU::S_CVT_F16_F32:
6043 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
6044 : AMDGPU::V_CVT_F16_F32_fake16_e64;
6045 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
6046 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
6047 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
6048 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
6049 case AMDGPU::S_CEIL_F16:
6050 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
6051 : AMDGPU::V_CEIL_F16_fake16_e64;
6052 case AMDGPU::S_FLOOR_F16:
6053 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
6054 : AMDGPU::V_FLOOR_F16_fake16_e64;
6055 case AMDGPU::S_TRUNC_F16:
6056 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
6057 : AMDGPU::V_TRUNC_F16_fake16_e64;
6058 case AMDGPU::S_RNDNE_F16:
6059 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
6060 : AMDGPU::V_RNDNE_F16_fake16_e64;
6061 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
6062 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
6063 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
6064 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
6065 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
6066 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
6067 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
6068 case AMDGPU::S_ADD_F16:
6069 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
6070 : AMDGPU::V_ADD_F16_fake16_e64;
6071 case AMDGPU::S_SUB_F16:
6072 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
6073 : AMDGPU::V_SUB_F16_fake16_e64;
6074 case AMDGPU::S_MIN_F16:
6075 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
6076 : AMDGPU::V_MIN_F16_fake16_e64;
6077 case AMDGPU::S_MAX_F16:
6078 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
6079 : AMDGPU::V_MAX_F16_fake16_e64;
6080 case AMDGPU::S_MINIMUM_F16:
6081 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
6082 : AMDGPU::V_MINIMUM_F16_fake16_e64;
6083 case AMDGPU::S_MAXIMUM_F16:
6084 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
6085 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
6086 case AMDGPU::S_MUL_F16:
6087 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
6088 : AMDGPU::V_MUL_F16_fake16_e64;
6089 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
6090 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
6091 case AMDGPU::S_FMAC_F16:
6092 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
6093 : AMDGPU::V_FMAC_F16_fake16_e64;
6094 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
6095 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
6096 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
6097 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
6098 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
6099 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
6100 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
6101 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
6102 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
6103 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
6104 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
6105 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
6106 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
6107 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
6108 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
6109 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
6110 case AMDGPU::S_CMP_LT_F16:
6111 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
6112 : AMDGPU::V_CMP_LT_F16_fake16_e64;
6113 case AMDGPU::S_CMP_EQ_F16:
6114 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
6115 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
6116 case AMDGPU::S_CMP_LE_F16:
6117 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
6118 : AMDGPU::V_CMP_LE_F16_fake16_e64;
6119 case AMDGPU::S_CMP_GT_F16:
6120 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
6121 : AMDGPU::V_CMP_GT_F16_fake16_e64;
6122 case AMDGPU::S_CMP_LG_F16:
6123 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
6124 : AMDGPU::V_CMP_LG_F16_fake16_e64;
6125 case AMDGPU::S_CMP_GE_F16:
6126 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
6127 : AMDGPU::V_CMP_GE_F16_fake16_e64;
6128 case AMDGPU::S_CMP_O_F16:
6129 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
6130 : AMDGPU::V_CMP_O_F16_fake16_e64;
6131 case AMDGPU::S_CMP_U_F16:
6132 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
6133 : AMDGPU::V_CMP_U_F16_fake16_e64;
6134 case AMDGPU::S_CMP_NGE_F16:
6135 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
6136 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
6137 case AMDGPU::S_CMP_NLG_F16:
6138 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
6139 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
6140 case AMDGPU::S_CMP_NGT_F16:
6141 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
6142 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
6143 case AMDGPU::S_CMP_NLE_F16:
6144 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
6145 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
6146 case AMDGPU::S_CMP_NEQ_F16:
6147 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
6148 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
6149 case AMDGPU::S_CMP_NLT_F16:
6150 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
6151 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
6152 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
6153 case AMDGPU::V_S_EXP_F16_e64:
6154 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
6155 : AMDGPU::V_EXP_F16_fake16_e64;
6156 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
6157 case AMDGPU::V_S_LOG_F16_e64:
6158 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
6159 : AMDGPU::V_LOG_F16_fake16_e64;
6160 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
6161 case AMDGPU::V_S_RCP_F16_e64:
6162 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
6163 : AMDGPU::V_RCP_F16_fake16_e64;
6164 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
6165 case AMDGPU::V_S_RSQ_F16_e64:
6166 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
6167 : AMDGPU::V_RSQ_F16_fake16_e64;
6168 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
6169 case AMDGPU::V_S_SQRT_F16_e64:
6170 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
6171 : AMDGPU::V_SQRT_F16_fake16_e64;
6174 "Unexpected scalar opcode without corresponding vector one!");
6223 "Not a whole wave func");
6226 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6227 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6234 unsigned OpNo)
const {
6236 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6237 Desc.operands()[OpNo].RegClass == -1) {
6240 if (Reg.isVirtual()) {
6244 return RI.getPhysRegBaseClass(Reg);
6247 int16_t RegClass = getOpRegClassID(
Desc.operands()[OpNo]);
6248 return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
6256 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6258 unsigned Size = RI.getRegSizeInBits(*RC);
6259 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6260 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6261 : AMDGPU::V_MOV_B32_e32;
6263 Opcode = AMDGPU::COPY;
6264 else if (RI.isSGPRClass(RC))
6265 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6279 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6285 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6296 if (SubIdx == AMDGPU::sub0)
6298 if (SubIdx == AMDGPU::sub1)
6310void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6326 if (Reg.isPhysical())
6336 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6339 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6346 unsigned Opc =
MI.getOpcode();
6352 constexpr AMDGPU::OpName OpNames[] = {
6353 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6356 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6357 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6367 bool IsAGPR = RI.isAGPR(MRI, MO.
getReg());
6368 if (IsAGPR && !ST.hasMAIInsts())
6374 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6375 const int DataIdx = AMDGPU::getNamedOperandIdx(
6376 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6377 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6378 MI.getOperand(DataIdx).isReg() &&
6379 RI.isAGPR(MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6381 if ((
int)
OpIdx == DataIdx) {
6382 if (VDstIdx != -1 &&
6383 RI.isAGPR(MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6386 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6387 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6388 RI.isAGPR(MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6393 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6394 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6395 RI.isSGPRReg(MRI, MO.
getReg()))
6398 if (ST.hasFlatScratchHiInB64InstHazard() &&
6405 if (
Opc == AMDGPU::S_BITCMP0_B64 ||
Opc == AMDGPU::S_BITCMP1_B64)
6426 constexpr unsigned NumOps = 3;
6427 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6428 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6429 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6430 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6435 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6438 MO = &
MI.getOperand(SrcIdx);
6441 if (!MO->
isReg() || !RI.isSGPRReg(MRI, MO->
getReg()))
6445 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6449 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6453 return !OpSel && !OpSelHi;
6462 int64_t RegClass = getOpRegClassID(OpInfo);
6464 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6473 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6474 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6478 if (!LiteralLimit--)
6488 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6496 if (--ConstantBusLimit <= 0)
6508 if (!LiteralLimit--)
6510 if (--ConstantBusLimit <= 0)
6516 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6520 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6522 !
Op.isIdenticalTo(*MO))
6532 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6546 bool Is64BitOp = Is64BitFPOp ||
6553 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6562 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6580 bool IsGFX950Only = ST.hasGFX950Insts();
6581 bool IsGFX940Only = ST.hasGFX940Insts();
6583 if (!IsGFX950Only && !IsGFX940Only)
6601 unsigned Opcode =
MI.getOpcode();
6603 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6604 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6605 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6606 case AMDGPU::V_MQSAD_U32_U8_e64:
6607 case AMDGPU::V_PK_ADD_F16:
6608 case AMDGPU::V_PK_ADD_F32:
6609 case AMDGPU::V_PK_ADD_I16:
6610 case AMDGPU::V_PK_ADD_U16:
6611 case AMDGPU::V_PK_ASHRREV_I16:
6612 case AMDGPU::V_PK_FMA_F16:
6613 case AMDGPU::V_PK_FMA_F32:
6614 case AMDGPU::V_PK_FMAC_F16_e32:
6615 case AMDGPU::V_PK_FMAC_F16_e64:
6616 case AMDGPU::V_PK_LSHLREV_B16:
6617 case AMDGPU::V_PK_LSHRREV_B16:
6618 case AMDGPU::V_PK_MAD_I16:
6619 case AMDGPU::V_PK_MAD_U16:
6620 case AMDGPU::V_PK_MAX_F16:
6621 case AMDGPU::V_PK_MAX_I16:
6622 case AMDGPU::V_PK_MAX_U16:
6623 case AMDGPU::V_PK_MIN_F16:
6624 case AMDGPU::V_PK_MIN_I16:
6625 case AMDGPU::V_PK_MIN_U16:
6626 case AMDGPU::V_PK_MOV_B32:
6627 case AMDGPU::V_PK_MUL_F16:
6628 case AMDGPU::V_PK_MUL_F32:
6629 case AMDGPU::V_PK_MUL_LO_U16:
6630 case AMDGPU::V_PK_SUB_I16:
6631 case AMDGPU::V_PK_SUB_U16:
6632 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6641 unsigned Opc =
MI.getOpcode();
6644 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6647 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6653 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6654 RI.isSGPRReg(MRI, Src0.
getReg()))
6660 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6662 if (Src0.
isReg() && RI.isVGPR(MRI, Src0.
getReg())) {
6668 if (Src1.
isReg() && RI.isVGPR(MRI, Src1.
getReg())) {
6679 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6680 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6681 if (!RI.isVGPR(MRI,
MI.getOperand(Src2Idx).getReg()))
6693 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6694 RI.isVGPR(MRI, Src1.
getReg())) {
6707 if (HasImplicitSGPR || !
MI.isCommutable()) {
6724 if (CommutedOpc == -1) {
6729 MI.setDesc(
get(CommutedOpc));
6733 bool Src0Kill = Src0.
isKill();
6737 else if (Src1.
isReg()) {
6752 unsigned Opc =
MI.getOpcode();
6755 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6756 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6757 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6760 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6761 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6762 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6763 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6764 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6765 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6766 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6776 if (VOP3Idx[2] != -1) {
6788 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6789 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6791 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6793 SGPRsUsed.
insert(SGPRReg);
6797 for (
int Idx : VOP3Idx) {
6806 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6818 if (!RI.isSGPRClass(RI.getRegClassForReg(MRI, MO.
getReg())))
6825 if (ConstantBusLimit > 0) {
6837 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6838 !RI.isVGPR(MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6844 for (
unsigned I = 0;
I < 3; ++
I) {
6857 SRC = RI.getCommonSubClass(SRC, DstRC);
6860 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6862 if (RI.hasAGPRs(VRC)) {
6863 VRC = RI.getEquivalentVGPRClass(VRC);
6866 get(TargetOpcode::COPY), NewSrcReg)
6873 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6879 for (
unsigned i = 0; i < SubRegs; ++i) {
6882 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6883 .
addReg(SrcReg, {}, RI.getSubRegFromChannel(i));
6889 get(AMDGPU::REG_SEQUENCE), DstReg);
6890 for (
unsigned i = 0; i < SubRegs; ++i) {
6892 MIB.
addImm(RI.getSubRegFromChannel(i));
6905 if (SBase && !RI.isSGPRClass(MRI.
getRegClass(SBase->getReg()))) {
6907 SBase->setReg(SGPR);
6910 if (SOff && !RI.isSGPRReg(MRI, SOff->
getReg())) {
6918 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6919 if (OldSAddrIdx < 0)
6932 if (RI.isSGPRReg(MRI, SAddr.
getReg()))
6935 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6936 if (NewVAddrIdx < 0)
6939 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6943 if (OldVAddrIdx >= 0) {
6957 if (OldVAddrIdx == NewVAddrIdx) {
6968 assert(OldSAddrIdx == NewVAddrIdx);
6970 if (OldVAddrIdx >= 0) {
6971 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
6972 AMDGPU::OpName::vdst_in);
6976 if (NewVDstIn != -1) {
6977 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
6983 if (NewVDstIn != -1) {
6984 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
7025 unsigned OpSubReg =
Op.getSubReg();
7028 RI.getRegClassForReg(MRI, OpReg), OpSubReg);
7044 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
7047 bool ImpDef = Def->isImplicitDef();
7048 while (!ImpDef && Def && Def->isCopy()) {
7049 if (Def->getOperand(1).getReg().isPhysical())
7052 ImpDef = Def && Def->isImplicitDef();
7054 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
7073 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7079 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(), MRI);
7080 unsigned NumSubRegs =
RegSize / 32;
7081 Register VScalarOp = ScalarOp->getReg();
7083 if (NumSubRegs == 1) {
7086 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
7091 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
7097 CondReg = NewCondReg;
7107 ScalarOp->setReg(CurReg);
7108 ScalarOp->setIsKill();
7112 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
7113 "Unhandled register size");
7115 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
7122 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
7123 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
7126 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
7127 .
addReg(VScalarOp, VScalarOpUndef,
7128 TRI->getSubRegFromChannel(Idx + 1));
7135 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
7142 auto Cmp =
BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U64_e64),
7145 if (NumSubRegs <= 2)
7146 Cmp.addReg(VScalarOp);
7148 Cmp.addReg(VScalarOp, VScalarOpUndef,
7149 TRI->getSubRegFromChannel(Idx, 2));
7153 CondReg = NewCondReg;
7163 const auto *SScalarOpRC =
7169 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
7170 unsigned Channel = 0;
7171 for (
Register Piece : ReadlanePieces) {
7172 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
7176 ScalarOp->setReg(SScalarOp);
7177 ScalarOp->setIsKill();
7213 if (!Begin.isValid())
7215 if (!End.isValid()) {
7221 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7229 MBB.computeRegisterLiveness(
TRI, AMDGPU::SCC,
MI,
7230 std::numeric_limits<unsigned>::max()) !=
7248 for (
auto I = Begin;
I != AfterMI;
I++) {
7249 for (
auto &MO :
I->all_uses())
7275 MBB.addSuccessor(LoopBB);
7285 for (
auto &Succ : RemainderBB->
successors()) {
7309static std::tuple<unsigned, unsigned>
7317 TII.buildExtractSubReg(
MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7318 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7325 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7342 .
addImm(AMDGPU::sub0_sub1)
7348 return std::tuple(RsrcPtr, NewSRsrc);
7385 if (
MI.getOpcode() == AMDGPU::PHI) {
7387 assert(!RI.isSGPRClass(VRC));
7390 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7392 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7408 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7411 if (RI.hasVGPRs(DstRC)) {
7415 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7417 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7435 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7440 if (DstRC != Src0RC) {
7449 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7451 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7457 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7458 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7459 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7460 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7461 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7462 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7463 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7465 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7478 ? AMDGPU::OpName::rsrc
7479 : AMDGPU::OpName::srsrc;
7484 AMDGPU::OpName SampOpName =
7485 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7494 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7500 unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
7501 unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
7506 while (Start->getOpcode() != FrameSetupOpcode)
7509 while (End->getOpcode() != FrameDestroyOpcode)
7513 while (End !=
MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
7514 MI.definesRegister(End->getOperand(1).getReg(),
nullptr))
7522 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7526 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7536 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_d2 ||
7537 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_d4 ||
7538 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_d2 ||
7539 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_d4) {
7541 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7548 bool isSoffsetLegal =
true;
7550 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7551 if (SoffsetIdx != -1) {
7555 isSoffsetLegal =
false;
7559 bool isRsrcLegal =
true;
7561 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7562 if (RsrcIdx != -1) {
7564 if (Rsrc->
isReg() && !RI.isSGPRReg(MRI, Rsrc->
getReg()))
7565 isRsrcLegal =
false;
7569 if (isRsrcLegal && isSoffsetLegal)
7597 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7601 unsigned RsrcPtr, NewSRsrc;
7608 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7609 .addReg(VAddr->
getReg(), {}, AMDGPU::sub0)
7615 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7616 .addReg(VAddr->
getReg(), {}, AMDGPU::sub1)
7629 }
else if (!VAddr && ST.hasAddr64()) {
7633 "FIXME: Need to emit flat atomics here");
7635 unsigned RsrcPtr, NewSRsrc;
7661 MIB.
addImm(CPol->getImm());
7666 MIB.
addImm(TFE->getImm());
7686 MI.removeFromParent();
7691 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7692 .addImm(AMDGPU::sub0)
7693 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7694 .addImm(AMDGPU::sub1);
7697 if (!isSoffsetLegal) {
7708 if (!isSoffsetLegal) {
7720 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7721 if (RsrcIdx != -1) {
7722 DeferredList.insert(
MI);
7727 return DeferredList.contains(
MI);
7737 if (!ST.useRealTrue16Insts())
7740 unsigned Opcode =
MI.getOpcode();
7744 OpIdx >=
get(Opcode).getNumOperands() ||
7745 get(Opcode).operands()[
OpIdx].RegClass == -1)
7749 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7753 if (!RI.isVGPRClass(CurrRC))
7756 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7758 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7759 Op.setSubReg(AMDGPU::lo16);
7760 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7770 Op.setReg(NewDstReg);
7782 while (!Worklist.
empty()) {
7796 "Deferred MachineInstr are not supposed to re-populate worklist");
7816 case AMDGPU::S_ADD_I32:
7817 case AMDGPU::S_SUB_I32: {
7821 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7829 case AMDGPU::S_MUL_U64:
7830 if (ST.hasVectorMulU64()) {
7831 NewOpcode = AMDGPU::V_MUL_U64_e64;
7835 splitScalarSMulU64(Worklist, Inst, MDT);
7839 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7840 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7843 splitScalarSMulPseudo(Worklist, Inst, MDT);
7847 case AMDGPU::S_AND_B64:
7848 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7852 case AMDGPU::S_OR_B64:
7853 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
7857 case AMDGPU::S_XOR_B64:
7858 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
7862 case AMDGPU::S_NAND_B64:
7863 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
7867 case AMDGPU::S_NOR_B64:
7868 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
7872 case AMDGPU::S_XNOR_B64:
7873 if (ST.hasDLInsts())
7874 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
7876 splitScalar64BitXnor(Worklist, Inst, MDT);
7880 case AMDGPU::S_ANDN2_B64:
7881 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
7885 case AMDGPU::S_ORN2_B64:
7886 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
7890 case AMDGPU::S_BREV_B64:
7891 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
7895 case AMDGPU::S_NOT_B64:
7896 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
7900 case AMDGPU::S_BCNT1_I32_B64:
7901 splitScalar64BitBCNT(Worklist, Inst);
7905 case AMDGPU::S_BFE_I64:
7906 splitScalar64BitBFE(Worklist, Inst);
7910 case AMDGPU::S_FLBIT_I32_B64:
7911 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
7914 case AMDGPU::S_FF1_I32_B64:
7915 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
7919 case AMDGPU::S_LSHL_B32:
7920 if (ST.hasOnlyRevVALUShifts()) {
7921 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
7925 case AMDGPU::S_ASHR_I32:
7926 if (ST.hasOnlyRevVALUShifts()) {
7927 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
7931 case AMDGPU::S_LSHR_B32:
7932 if (ST.hasOnlyRevVALUShifts()) {
7933 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
7937 case AMDGPU::S_LSHL_B64:
7938 if (ST.hasOnlyRevVALUShifts()) {
7940 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
7941 : AMDGPU::V_LSHLREV_B64_e64;
7945 case AMDGPU::S_ASHR_I64:
7946 if (ST.hasOnlyRevVALUShifts()) {
7947 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
7951 case AMDGPU::S_LSHR_B64:
7952 if (ST.hasOnlyRevVALUShifts()) {
7953 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
7958 case AMDGPU::S_ABS_I32:
7959 lowerScalarAbs(Worklist, Inst);
7963 case AMDGPU::S_ABSDIFF_I32:
7964 lowerScalarAbsDiff(Worklist, Inst);
7968 case AMDGPU::S_CBRANCH_SCC0:
7969 case AMDGPU::S_CBRANCH_SCC1: {
7972 bool IsSCC = CondReg == AMDGPU::SCC;
7980 case AMDGPU::S_BFE_U64:
7981 case AMDGPU::S_BFM_B64:
7984 case AMDGPU::S_PACK_LL_B32_B16:
7985 case AMDGPU::S_PACK_LH_B32_B16:
7986 case AMDGPU::S_PACK_HL_B32_B16:
7987 case AMDGPU::S_PACK_HH_B32_B16:
7988 movePackToVALU(Worklist, MRI, Inst);
7992 case AMDGPU::S_XNOR_B32:
7993 lowerScalarXnor(Worklist, Inst);
7997 case AMDGPU::S_NAND_B32:
7998 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
8002 case AMDGPU::S_NOR_B32:
8003 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
8007 case AMDGPU::S_ANDN2_B32:
8008 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
8012 case AMDGPU::S_ORN2_B32:
8013 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
8021 case AMDGPU::S_ADD_CO_PSEUDO:
8022 case AMDGPU::S_SUB_CO_PSEUDO: {
8023 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
8024 ? AMDGPU::V_ADDC_U32_e64
8025 : AMDGPU::V_SUBB_U32_e64;
8026 const auto *CarryRC = RI.getWaveMaskRegClass();
8048 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8052 case AMDGPU::S_UADDO_PSEUDO:
8053 case AMDGPU::S_USUBO_PSEUDO: {
8059 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
8060 ? AMDGPU::V_ADD_CO_U32_e64
8061 : AMDGPU::V_SUB_CO_U32_e64;
8073 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8077 case AMDGPU::S_LSHL1_ADD_U32:
8078 case AMDGPU::S_LSHL2_ADD_U32:
8079 case AMDGPU::S_LSHL3_ADD_U32:
8080 case AMDGPU::S_LSHL4_ADD_U32: {
8084 unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1
8085 : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2
8086 : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3
8100 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8104 case AMDGPU::S_CSELECT_B32:
8105 case AMDGPU::S_CSELECT_B64:
8106 lowerSelect(Worklist, Inst, MDT);
8109 case AMDGPU::S_CMP_EQ_I32:
8110 case AMDGPU::S_CMP_LG_I32:
8111 case AMDGPU::S_CMP_GT_I32:
8112 case AMDGPU::S_CMP_GE_I32:
8113 case AMDGPU::S_CMP_LT_I32:
8114 case AMDGPU::S_CMP_LE_I32:
8115 case AMDGPU::S_CMP_EQ_U32:
8116 case AMDGPU::S_CMP_LG_U32:
8117 case AMDGPU::S_CMP_GT_U32:
8118 case AMDGPU::S_CMP_GE_U32:
8119 case AMDGPU::S_CMP_LT_U32:
8120 case AMDGPU::S_CMP_LE_U32:
8121 case AMDGPU::S_CMP_EQ_U64:
8122 case AMDGPU::S_CMP_LG_U64:
8123 case AMDGPU::S_CMP_LT_F32:
8124 case AMDGPU::S_CMP_EQ_F32:
8125 case AMDGPU::S_CMP_LE_F32:
8126 case AMDGPU::S_CMP_GT_F32:
8127 case AMDGPU::S_CMP_LG_F32:
8128 case AMDGPU::S_CMP_GE_F32:
8129 case AMDGPU::S_CMP_O_F32:
8130 case AMDGPU::S_CMP_U_F32:
8131 case AMDGPU::S_CMP_NGE_F32:
8132 case AMDGPU::S_CMP_NLG_F32:
8133 case AMDGPU::S_CMP_NGT_F32:
8134 case AMDGPU::S_CMP_NLE_F32:
8135 case AMDGPU::S_CMP_NEQ_F32:
8136 case AMDGPU::S_CMP_NLT_F32: {
8141 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
8155 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8159 case AMDGPU::S_CMP_LT_F16:
8160 case AMDGPU::S_CMP_EQ_F16:
8161 case AMDGPU::S_CMP_LE_F16:
8162 case AMDGPU::S_CMP_GT_F16:
8163 case AMDGPU::S_CMP_LG_F16:
8164 case AMDGPU::S_CMP_GE_F16:
8165 case AMDGPU::S_CMP_O_F16:
8166 case AMDGPU::S_CMP_U_F16:
8167 case AMDGPU::S_CMP_NGE_F16:
8168 case AMDGPU::S_CMP_NLG_F16:
8169 case AMDGPU::S_CMP_NGT_F16:
8170 case AMDGPU::S_CMP_NLE_F16:
8171 case AMDGPU::S_CMP_NEQ_F16:
8172 case AMDGPU::S_CMP_NLT_F16: {
8195 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8199 case AMDGPU::S_CVT_HI_F32_F16: {
8202 if (ST.useRealTrue16Insts()) {
8207 .
addReg(TmpReg, {}, AMDGPU::hi16)
8223 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8227 case AMDGPU::S_MINIMUM_F32:
8228 case AMDGPU::S_MAXIMUM_F32: {
8240 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8244 case AMDGPU::S_MINIMUM_F16:
8245 case AMDGPU::S_MAXIMUM_F16: {
8247 ? &AMDGPU::VGPR_16RegClass
8248 : &AMDGPU::VGPR_32RegClass);
8260 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8264 case AMDGPU::V_S_EXP_F16_e64:
8265 case AMDGPU::V_S_LOG_F16_e64:
8266 case AMDGPU::V_S_RCP_F16_e64:
8267 case AMDGPU::V_S_RSQ_F16_e64:
8268 case AMDGPU::V_S_SQRT_F16_e64: {
8270 ? &AMDGPU::VGPR_16RegClass
8271 : &AMDGPU::VGPR_32RegClass);
8283 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8289 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8297 if (NewOpcode == Opcode) {
8307 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
8321 RI.getCommonSubClass(NewDstRC, SrcRC)) {
8328 addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
8359 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8363 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8369 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8376 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8378 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8383 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8391 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8401 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8402 AMDGPU::OpName::src0_modifiers) >= 0)
8406 NewInstr->addOperand(Src);
8409 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8412 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8414 NewInstr.addImm(
Size);
8415 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8419 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8424 "Scalar BFE is only implemented for constant width and offset");
8432 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8433 AMDGPU::OpName::src1_modifiers) >= 0)
8435 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8437 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8438 AMDGPU::OpName::src2_modifiers) >= 0)
8440 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8442 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8444 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8446 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8452 NewInstr->addOperand(
Op);
8459 if (
Op.getReg() == AMDGPU::SCC) {
8461 if (
Op.isDef() && !
Op.isDead())
8462 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8464 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8469 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8470 Register DstReg = NewInstr->getOperand(0).getReg();
8485 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8489std::pair<bool, MachineBasicBlock *>
8492 if (ST.hasAddNoCarryInsts()) {
8504 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8506 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8507 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8518 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8519 return std::pair(
true, NewBB);
8522 return std::pair(
false,
nullptr);
8539 bool IsSCC = (CondReg == AMDGPU::SCC);
8553 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8558 bool CopyFound =
false;
8559 for (MachineInstr &CandI :
8562 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8564 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8566 .
addReg(CandI.getOperand(1).getReg());
8578 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8587 MachineInstr *NewInst;
8588 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8589 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8604 addUsersToMoveToVALUWorklist(NewDestReg, MRI, Worklist);
8619 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8620 : AMDGPU::V_SUB_CO_U32_e32;
8631 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8648 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8649 : AMDGPU::V_SUB_CO_U32_e32;
8662 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8676 if (ST.hasDLInsts()) {
8686 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8692 bool Src0IsSGPR = Src0.
isReg() &&
8694 bool Src1IsSGPR = Src1.
isReg() &&
8708 }
else if (Src1IsSGPR) {
8726 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8732 unsigned Opcode)
const {
8756 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8761 unsigned Opcode)
const {
8785 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8800 const MCInstrDesc &InstDesc =
get(Opcode);
8801 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8803 &AMDGPU::SGPR_32RegClass;
8805 const TargetRegisterClass *Src0SubRC =
8806 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8809 AMDGPU::sub0, Src0SubRC);
8812 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8813 const TargetRegisterClass *NewDestSubRC =
8814 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8817 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8820 AMDGPU::sub1, Src0SubRC);
8823 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8837 Worklist.
insert(&LoHalf);
8838 Worklist.
insert(&HiHalf);
8844 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
8867 const TargetRegisterClass *Src0SubRC =
8868 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8869 if (RI.isSGPRClass(Src0SubRC))
8870 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8871 const TargetRegisterClass *Src1SubRC =
8872 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8873 if (RI.isSGPRClass(Src1SubRC))
8874 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8878 MachineOperand Op0L =
8880 MachineOperand Op1L =
8882 MachineOperand Op0H =
8884 MachineOperand Op1H =
8903 MachineInstr *Op1L_Op0H =
8909 MachineInstr *Op1H_Op0L =
8915 MachineInstr *Carry =
8920 MachineInstr *LoHalf =
8930 MachineInstr *HiHalf =
8953 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
8976 const TargetRegisterClass *Src0SubRC =
8977 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8978 if (RI.isSGPRClass(Src0SubRC))
8979 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
8980 const TargetRegisterClass *Src1SubRC =
8981 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
8982 if (RI.isSGPRClass(Src1SubRC))
8983 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
8987 MachineOperand Op0L =
8989 MachineOperand Op1L =
8993 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
8994 ? AMDGPU::V_MUL_HI_U32_e64
8995 : AMDGPU::V_MUL_HI_I32_e64;
8996 MachineInstr *HiHalf =
8999 MachineInstr *LoHalf =
9018 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9034 const MCInstrDesc &InstDesc =
get(Opcode);
9035 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
9037 &AMDGPU::SGPR_32RegClass;
9039 const TargetRegisterClass *Src0SubRC =
9040 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9041 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
9043 &AMDGPU::SGPR_32RegClass;
9045 const TargetRegisterClass *Src1SubRC =
9046 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9049 AMDGPU::sub0, Src0SubRC);
9051 AMDGPU::sub0, Src1SubRC);
9053 AMDGPU::sub1, Src0SubRC);
9055 AMDGPU::sub1, Src1SubRC);
9058 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
9059 const TargetRegisterClass *NewDestSubRC =
9060 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
9063 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
9068 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
9081 Worklist.
insert(&LoHalf);
9082 Worklist.
insert(&HiHalf);
9085 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9105 MachineOperand* Op0;
9106 MachineOperand* Op1;
9108 if (Src0.
isReg() && RI.isSGPRReg(MRI, Src0.
getReg())) {
9141 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
9142 const TargetRegisterClass *SrcRC = Src.isReg() ?
9144 &AMDGPU::SGPR_32RegClass;
9149 const TargetRegisterClass *SrcSubRC =
9150 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9153 AMDGPU::sub0, SrcSubRC);
9155 AMDGPU::sub1, SrcSubRC);
9165 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9184 Offset == 0 &&
"Not implemented");
9207 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9217 .
addReg(Src.getReg(), {}, AMDGPU::sub0);
9220 .
addReg(Src.getReg(), {}, AMDGPU::sub0)
9226 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9245 const MCInstrDesc &InstDesc =
get(Opcode);
9247 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9248 unsigned OpcodeAdd = ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64
9249 : AMDGPU::V_ADD_CO_U32_e32;
9251 const TargetRegisterClass *SrcRC =
9252 Src.isReg() ? MRI.
getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9253 const TargetRegisterClass *SrcSubRC =
9254 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9256 MachineOperand SrcRegSub0 =
9258 MachineOperand SrcRegSub1 =
9271 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9277 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9281 addUsersToMoveToVALUWorklist(MidReg4, MRI, Worklist);
9284void SIInstrInfo::addUsersToMoveToVALUWorklist(
9288 MachineInstr &
UseMI = *MO.getParent();
9292 switch (
UseMI.getOpcode()) {
9295 case AMDGPU::SOFT_WQM:
9296 case AMDGPU::STRICT_WWM:
9297 case AMDGPU::STRICT_WQM:
9298 case AMDGPU::REG_SEQUENCE:
9300 case AMDGPU::INSERT_SUBREG:
9303 OpNo = MO.getOperandNo();
9310 if (!RI.hasVectorRegisters(OpRC))
9327 if (ST.useRealTrue16Insts()) {
9329 if (!Src0.
isReg() || !RI.isVGPR(MRI, Src0.
getReg())) {
9332 get(Src0.
isImm() ? AMDGPU::V_MOV_B32_e32 : AMDGPU::COPY), SrcReg0)
9338 if (!Src1.
isReg() || !RI.isVGPR(MRI, Src1.
getReg())) {
9341 get(Src1.
isImm() ? AMDGPU::V_MOV_B32_e32 : AMDGPU::COPY), SrcReg1)
9350 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9352 case AMDGPU::S_PACK_LL_B32_B16:
9354 .addReg(SrcReg0, {},
9355 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9356 .addImm(AMDGPU::lo16)
9357 .addReg(SrcReg1, {},
9358 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9359 .addImm(AMDGPU::hi16);
9361 case AMDGPU::S_PACK_LH_B32_B16:
9363 .addReg(SrcReg0, {},
9364 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9365 .addImm(AMDGPU::lo16)
9366 .addReg(SrcReg1, {}, AMDGPU::hi16)
9367 .addImm(AMDGPU::hi16);
9369 case AMDGPU::S_PACK_HL_B32_B16:
9370 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9371 .addImm(AMDGPU::lo16)
9372 .addReg(SrcReg1, {},
9373 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9374 .addImm(AMDGPU::hi16);
9376 case AMDGPU::S_PACK_HH_B32_B16:
9377 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9378 .addImm(AMDGPU::lo16)
9379 .addReg(SrcReg1, {}, AMDGPU::hi16)
9380 .addImm(AMDGPU::hi16);
9388 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9393 case AMDGPU::S_PACK_LL_B32_B16: {
9412 case AMDGPU::S_PACK_LH_B32_B16: {
9422 case AMDGPU::S_PACK_HL_B32_B16: {
9433 case AMDGPU::S_PACK_HH_B32_B16: {
9453 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9462 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9463 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9464 SmallVector<MachineInstr *, 4> CopyToDelete;
9467 for (MachineInstr &
MI :
9471 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9474 MachineRegisterInfo &MRI =
MI.getMF()->getRegInfo();
9475 Register DestReg =
MI.getOperand(0).getReg();
9482 MI.getOperand(SCCIdx).setReg(NewCond);
9488 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9491 for (
auto &Copy : CopyToDelete)
9492 Copy->eraseFromParent();
9500void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9506 for (MachineInstr &
MI :
9509 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9511 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9520 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9528 case AMDGPU::REG_SEQUENCE:
9529 case AMDGPU::INSERT_SUBREG:
9531 case AMDGPU::SOFT_WQM:
9532 case AMDGPU::STRICT_WWM:
9533 case AMDGPU::STRICT_WQM: {
9535 if (RI.isAGPRClass(SrcRC)) {
9536 if (RI.isAGPRClass(NewDstRC))
9541 case AMDGPU::REG_SEQUENCE:
9542 case AMDGPU::INSERT_SUBREG:
9543 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9546 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9552 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9555 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9569 int OpIndices[3])
const {
9570 const MCInstrDesc &
Desc =
MI.getDesc();
9586 const MachineRegisterInfo &MRI =
MI.getMF()->getRegInfo();
9588 for (
unsigned i = 0; i < 3; ++i) {
9589 int Idx = OpIndices[i];
9593 const MachineOperand &MO =
MI.getOperand(Idx);
9599 const TargetRegisterClass *OpRC =
9600 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9601 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9608 if (RI.isSGPRClass(RegRC))
9626 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9627 SGPRReg = UsedSGPRs[0];
9630 if (!SGPRReg && UsedSGPRs[1]) {
9631 if (UsedSGPRs[1] == UsedSGPRs[2])
9632 SGPRReg = UsedSGPRs[1];
9639 AMDGPU::OpName OperandName)
const {
9640 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9643 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9647 return &
MI.getOperand(Idx);
9661 if (ST.isAmdHsaOS()) {
9664 RsrcDataFormat |= (1ULL << 56);
9669 RsrcDataFormat |= (2ULL << 59);
9672 return RsrcDataFormat;
9682 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9687 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9694 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9700 unsigned Opc =
MI.getOpcode();
9706 return get(
Opc).mayLoad() &&
9711 int &FrameIndex)
const {
9713 if (!Addr || !Addr->
isFI())
9724 int &FrameIndex)
const {
9732 int &FrameIndex)
const {
9746 int &FrameIndex)
const {
9763 while (++
I != E &&
I->isInsideBundle()) {
9764 assert(!
I->isBundle() &&
"No nested bundle!");
9772 unsigned Opc =
MI.getOpcode();
9774 unsigned DescSize =
Desc.getSize();
9779 unsigned Size = DescSize;
9783 if (
MI.isBranch() && ST.hasOffset3fBug())
9794 bool HasLiteral =
false;
9795 unsigned LiteralSize = 4;
9796 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9801 if (ST.has64BitLiterals()) {
9802 switch (OpInfo.OperandType) {
9825 return HasLiteral ? DescSize + LiteralSize : DescSize;
9830 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9834 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9835 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9839 case TargetOpcode::BUNDLE:
9841 case TargetOpcode::INLINEASM:
9842 case TargetOpcode::INLINEASM_BR: {
9844 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9848 if (
MI.isMetaInstruction())
9852 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
9855 unsigned LoInstOpcode = D16Info->LoOp;
9857 DescSize =
Desc.getSize();
9861 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
9864 DescSize =
Desc.getSize();
9875 if (
MI.memoperands_empty())
9887 static const std::pair<int, const char *> TargetIndices[] = {
9926std::pair<unsigned, unsigned>
9933 static const std::pair<unsigned, const char *> TargetFlags[] = {
9951 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9967 return AMDGPU::WWM_COPY;
9969 return AMDGPU::COPY;
9986 if (!IsLRSplitInst && Opcode != AMDGPU::IMPLICIT_DEF)
9990 if (RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg)))
9991 return IsLRSplitInst;
10004 bool IsNullOrVectorRegister =
true;
10008 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg));
10011 return IsNullOrVectorRegister &&
10013 (!
MI.isTerminator() &&
MI.getOpcode() != AMDGPU::COPY &&
10014 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
10022 if (ST.hasAddNoCarryInsts())
10038 if (ST.hasAddNoCarryInsts())
10042 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
10044 : RS.scavengeRegisterBackwards(
10045 *RI.getBoolRC(),
I,
false,
10058 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
10059 case AMDGPU::SI_KILL_I1_TERMINATOR:
10068 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
10069 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
10070 case AMDGPU::SI_KILL_I1_PSEUDO:
10071 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
10083 const unsigned OffsetBits =
10085 return (1 << OffsetBits) - 1;
10089 if (!ST.isWave32())
10092 if (
MI.isInlineAsm())
10095 if (
MI.getNumOperands() <
MI.getNumExplicitOperands())
10098 for (
auto &
Op :
MI.implicit_operands()) {
10099 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
10100 Op.setReg(AMDGPU::VCC_LO);
10109 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
10113 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
10114 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
10130 if (Imm > MaxImm) {
10131 if (Imm <= MaxImm + 64) {
10133 Overflow = Imm - MaxImm;
10152 if (Overflow > 0) {
10160 if (ST.hasRestrictedSOffset())
10165 SOffset = Overflow;
10203 if (!ST.hasFlatInstOffsets())
10211 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10223std::pair<int64_t, int64_t>
10226 int64_t RemainderOffset = COffsetVal;
10227 int64_t ImmField = 0;
10232 if (AllowNegative) {
10234 int64_t
D = 1LL << NumBits;
10235 RemainderOffset = (COffsetVal /
D) *
D;
10236 ImmField = COffsetVal - RemainderOffset;
10238 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10240 (ImmField % 4) != 0) {
10242 RemainderOffset += ImmField % 4;
10243 ImmField -= ImmField % 4;
10245 }
else if (COffsetVal >= 0) {
10247 RemainderOffset = COffsetVal - ImmField;
10251 assert(RemainderOffset + ImmField == COffsetVal);
10252 return {ImmField, RemainderOffset};
10256 if (ST.hasNegativeScratchOffsetBug() &&
10264 switch (ST.getGeneration()) {
10293 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10294 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10295 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10296 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10297 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10298 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10299 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10300 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10307#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10308 case OPCODE##_dpp: \
10309 case OPCODE##_e32: \
10310 case OPCODE##_e64: \
10311 case OPCODE##_e64_dpp: \
10312 case OPCODE##_sdwa:
10326 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10327 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10328 case AMDGPU::V_FMA_F16_gfx9_e64:
10329 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10330 case AMDGPU::V_INTERP_P2_F16:
10331 case AMDGPU::V_MAD_F16_e64:
10332 case AMDGPU::V_MAD_U16_e64:
10333 case AMDGPU::V_MAD_I16_e64:
10342 "SIInsertWaitcnts should have promoted soft waitcnt instructions!");
10356 switch (ST.getGeneration()) {
10369 if (
isMAI(Opcode)) {
10377 if (MCOp == AMDGPU::INSTRUCTION_LIST_END && ST.hasGFX11_7Insts())
10380 if (MCOp == AMDGPU::INSTRUCTION_LIST_END && ST.hasGFX1250Insts())
10387 if (ST.hasGFX90AInsts()) {
10388 uint32_t NMCOp = AMDGPU::INSTRUCTION_LIST_END;
10389 if (ST.hasGFX940Insts())
10391 if (NMCOp == AMDGPU::INSTRUCTION_LIST_END)
10393 if (NMCOp == AMDGPU::INSTRUCTION_LIST_END)
10395 if (NMCOp != AMDGPU::INSTRUCTION_LIST_END)
10401 if (MCOp == AMDGPU::INSTRUCTION_LIST_END)
10420 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10421 if (
MI.getOperand(1 + 2 *
I + 1).getImm() == SubReg) {
10422 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10434 switch (
MI.getOpcode()) {
10436 case AMDGPU::REG_SEQUENCE:
10440 case AMDGPU::INSERT_SUBREG:
10441 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10458 if (!
P.Reg.isVirtual())
10463 while (
auto *
MI = DefInst) {
10465 switch (
MI->getOpcode()) {
10467 case AMDGPU::V_MOV_B32_e32: {
10468 auto &Op1 =
MI->getOperand(1);
10497 auto *DefBB =
DefMI.getParent();
10501 if (
UseMI.getParent() != DefBB)
10504 const int MaxInstScan = 20;
10508 auto E =
UseMI.getIterator();
10509 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10510 if (
I->isDebugInstr())
10513 if (++NumInst > MaxInstScan)
10516 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10529 auto *DefBB =
DefMI.getParent();
10531 const int MaxUseScan = 10;
10535 auto &UseInst = *
Use.getParent();
10538 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10541 if (++NumUse > MaxUseScan)
10548 const int MaxInstScan = 20;
10552 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10555 if (
I->isDebugInstr())
10558 if (++NumInst > MaxInstScan)
10571 if (Reg == VReg && --NumUse == 0)
10573 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10582 auto Cur =
MBB.begin();
10583 if (Cur !=
MBB.end())
10585 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10588 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10597 if (InsPt !=
MBB.end() &&
10598 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10599 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10600 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10601 InsPt->definesRegister(Src,
nullptr)) {
10605 .
addReg(Src, {}, SrcSubReg)
10630 if (isFullCopyInstr(
MI)) {
10631 Register DstReg =
MI.getOperand(0).getReg();
10632 Register SrcReg =
MI.getOperand(1).getReg();
10654 unsigned *PredCost)
const {
10655 if (
MI.isBundle()) {
10658 unsigned Lat = 0,
Count = 0;
10659 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10661 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10663 return Lat +
Count - 1;
10666 return SchedModel.computeInstrLatency(&
MI);
10673 return *CallAddrOp;
10680 unsigned Opcode =
MI.getOpcode();
10682 auto HandleAddrSpaceCast = [
this, &MRI](
const MachineInstr &
MI) {
10685 :
MI.getOperand(1).getReg();
10689 unsigned SrcAS = SrcTy.getAddressSpace();
10692 ST.hasGloballyAddressableScratch()
10700 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10701 return HandleAddrSpaceCast(
MI);
10704 auto IID = GI->getIntrinsicID();
10711 case Intrinsic::amdgcn_addrspacecast_nonnull:
10712 return HandleAddrSpaceCast(
MI);
10713 case Intrinsic::amdgcn_if:
10714 case Intrinsic::amdgcn_else:
10728 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10729 Opcode == AMDGPU::G_SEXTLOAD) {
10730 if (
MI.memoperands_empty())
10734 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10735 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10743 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10744 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10745 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10754 Formatter = std::make_unique<AMDGPUMIRFormatter>(ST);
10755 return Formatter.get();
10764 unsigned opcode =
MI.getOpcode();
10765 if (opcode == AMDGPU::V_READLANE_B32 ||
10766 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10767 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10770 if (isCopyInstr(
MI)) {
10774 RI.getPhysRegBaseClass(srcOp.
getReg());
10782 if (
MI.isPreISelOpcode())
10797 if (
MI.memoperands_empty())
10801 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10802 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10817 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10819 if (!
SrcOp.isReg())
10823 if (!Reg || !
SrcOp.readsReg())
10829 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
10856 F,
"ds_ordered_count unsupported for this calling conv"));
10870 Register &SrcReg2, int64_t &CmpMask,
10871 int64_t &CmpValue)
const {
10872 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
10875 switch (
MI.getOpcode()) {
10878 case AMDGPU::S_CMP_EQ_U32:
10879 case AMDGPU::S_CMP_EQ_I32:
10880 case AMDGPU::S_CMP_LG_U32:
10881 case AMDGPU::S_CMP_LG_I32:
10882 case AMDGPU::S_CMP_LT_U32:
10883 case AMDGPU::S_CMP_LT_I32:
10884 case AMDGPU::S_CMP_GT_U32:
10885 case AMDGPU::S_CMP_GT_I32:
10886 case AMDGPU::S_CMP_LE_U32:
10887 case AMDGPU::S_CMP_LE_I32:
10888 case AMDGPU::S_CMP_GE_U32:
10889 case AMDGPU::S_CMP_GE_I32:
10890 case AMDGPU::S_CMP_EQ_U64:
10891 case AMDGPU::S_CMP_LG_U64:
10892 SrcReg =
MI.getOperand(0).getReg();
10893 if (
MI.getOperand(1).isReg()) {
10894 if (
MI.getOperand(1).getSubReg())
10896 SrcReg2 =
MI.getOperand(1).getReg();
10898 }
else if (
MI.getOperand(1).isImm()) {
10900 CmpValue =
MI.getOperand(1).getImm();
10906 case AMDGPU::S_CMPK_EQ_U32:
10907 case AMDGPU::S_CMPK_EQ_I32:
10908 case AMDGPU::S_CMPK_LG_U32:
10909 case AMDGPU::S_CMPK_LG_I32:
10910 case AMDGPU::S_CMPK_LT_U32:
10911 case AMDGPU::S_CMPK_LT_I32:
10912 case AMDGPU::S_CMPK_GT_U32:
10913 case AMDGPU::S_CMPK_GT_I32:
10914 case AMDGPU::S_CMPK_LE_U32:
10915 case AMDGPU::S_CMPK_LE_I32:
10916 case AMDGPU::S_CMPK_GE_U32:
10917 case AMDGPU::S_CMPK_GE_I32:
10918 SrcReg =
MI.getOperand(0).getReg();
10920 CmpValue =
MI.getOperand(1).getImm();
10930 if (S->isLiveIn(AMDGPU::SCC))
10939bool SIInstrInfo::invertSCCUse(
MachineInstr *SCCDef)
const {
10942 bool SCCIsDead =
false;
10945 constexpr unsigned ScanLimit = 12;
10946 unsigned Count = 0;
10947 for (MachineInstr &
MI :
10949 if (++
Count > ScanLimit)
10951 if (
MI.readsRegister(AMDGPU::SCC, &RI)) {
10952 if (
MI.getOpcode() == AMDGPU::S_CSELECT_B32 ||
10953 MI.getOpcode() == AMDGPU::S_CSELECT_B64 ||
10954 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
10955 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC1)
10960 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
10973 for (MachineInstr *
MI : InvertInstr) {
10974 if (
MI->getOpcode() == AMDGPU::S_CSELECT_B32 ||
10975 MI->getOpcode() == AMDGPU::S_CSELECT_B64) {
10977 }
else if (
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
10978 MI->getOpcode() == AMDGPU::S_CBRANCH_SCC1) {
10979 MI->setDesc(
get(
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0
10980 ? AMDGPU::S_CBRANCH_SCC1
10981 : AMDGPU::S_CBRANCH_SCC0));
10994 bool NeedInversion)
const {
10995 MachineInstr *KillsSCC =
nullptr;
11000 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
11002 if (
MI.killsRegister(AMDGPU::SCC, &RI))
11005 if (NeedInversion && !invertSCCUse(SCCRedefine))
11007 if (MachineOperand *SccDef =
11009 SccDef->setIsDead(
false);
11017 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
11018 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
11020 bool Op1IsNonZeroImm =
11021 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
11022 bool Op2IsZeroImm =
11023 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
11024 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
11030 unsigned &NewDefOpc) {
11033 if (Def.getOpcode() != AMDGPU::S_ADD_I32 &&
11034 Def.getOpcode() != AMDGPU::S_ADD_U32)
11040 if ((!AddSrc1.
isImm() || AddSrc1.
getImm() != 1) &&
11046 if (Def.getOpcode() == AMDGPU::S_ADD_I32) {
11048 Def.findRegisterDefOperand(AMDGPU::SCC,
nullptr);
11051 NewDefOpc = AMDGPU::S_ADD_U32;
11053 NeedInversion = !NeedInversion;
11058 Register SrcReg2, int64_t CmpMask,
11067 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
11068 this](
bool NeedInversion) ->
bool {
11092 unsigned NewDefOpc = Def->getOpcode();
11098 if (!optimizeSCC(Def, &CmpInstr, NeedInversion))
11101 if (NewDefOpc != Def->getOpcode())
11102 Def->setDesc(
get(NewDefOpc));
11111 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
11118 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
11126 optimizeSCC(
Select, Def,
false);
11133 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
11134 this](int64_t ExpectedValue,
unsigned SrcSize,
11135 bool IsReversible,
bool IsSigned) ->
bool {
11163 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
11164 Def->getOpcode() != AMDGPU::S_AND_B64)
11168 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
11179 SrcOp = &Def->getOperand(2);
11180 else if (isMask(&Def->getOperand(2)))
11181 SrcOp = &Def->getOperand(1);
11189 if (IsSigned && BitNo == SrcSize - 1)
11192 ExpectedValue <<= BitNo;
11194 bool IsReversedCC =
false;
11195 if (CmpValue != ExpectedValue) {
11198 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
11203 Register DefReg = Def->getOperand(0).getReg();
11207 if (!optimizeSCC(Def, &CmpInstr,
false))
11218 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
11219 : AMDGPU::S_BITCMP1_B32
11220 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
11221 : AMDGPU::S_BITCMP1_B64;
11226 Def->eraseFromParent();
11234 case AMDGPU::S_CMP_EQ_U32:
11235 case AMDGPU::S_CMP_EQ_I32:
11236 case AMDGPU::S_CMPK_EQ_U32:
11237 case AMDGPU::S_CMPK_EQ_I32:
11238 return optimizeCmpAnd(1, 32,
true,
false) ||
11239 optimizeCmpSelect(
true);
11240 case AMDGPU::S_CMP_GE_U32:
11241 case AMDGPU::S_CMPK_GE_U32:
11242 return optimizeCmpAnd(1, 32,
false,
false);
11243 case AMDGPU::S_CMP_GE_I32:
11244 case AMDGPU::S_CMPK_GE_I32:
11245 return optimizeCmpAnd(1, 32,
false,
true);
11246 case AMDGPU::S_CMP_EQ_U64:
11247 return optimizeCmpAnd(1, 64,
true,
false);
11248 case AMDGPU::S_CMP_LG_U32:
11249 case AMDGPU::S_CMP_LG_I32:
11250 case AMDGPU::S_CMPK_LG_U32:
11251 case AMDGPU::S_CMPK_LG_I32:
11252 return optimizeCmpAnd(0, 32,
true,
false) ||
11253 optimizeCmpSelect(
false);
11254 case AMDGPU::S_CMP_GT_U32:
11255 case AMDGPU::S_CMPK_GT_U32:
11256 return optimizeCmpAnd(0, 32,
false,
false);
11257 case AMDGPU::S_CMP_GT_I32:
11258 case AMDGPU::S_CMPK_GT_I32:
11259 return optimizeCmpAnd(0, 32,
false,
true);
11260 case AMDGPU::S_CMP_LG_U64:
11261 return optimizeCmpAnd(0, 64,
true,
false) ||
11262 optimizeCmpSelect(
false);
11269 AMDGPU::OpName
OpName)
const {
11270 if (!ST.needsAlignedVGPRs())
11273 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
11285 bool IsAGPR = RI.isAGPR(MRI, DataReg);
11287 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
11291 : &AMDGPU::VReg_64_Align2RegClass);
11293 .
addReg(DataReg, {},
Op.getSubReg())
11298 Op.setSubReg(AMDGPU::sub0);
11313 if (ST.hasGFX1250Insts())
11320 unsigned Opcode =
MI.getOpcode();
11326 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
11327 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
11330 if (!ST.hasGFX940Insts())
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static MachineBasicBlock * loadScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool setsSCCIfResultIsZero(const MachineInstr &Def, bool &NeedInversion, unsigned &NewDefOpc)
static bool isSCCDeadOnExit(MachineBasicBlock *MBB)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI void moveOperands(MachineOperand *Dst, MachineOperand *Src, unsigned NumOps)
Move NumOps operands from Src to Dst, updating use-def lists as needed.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
LLVM_ABI void clearVirtRegs()
clearVirtRegs - Remove all virtual registers (after physreg assignment).
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
void setSimpleHint(Register VReg, Register PrefReg)
Specify the preferred (target independent) register allocation hint for the specified virtual registe...
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
iterator_range< use_iterator > use_operands(Register Reg) const
LLVM_ABI void removeRegOperandFromUseList(MachineOperand *MO)
Remove MO from its use-def list.
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI void addRegOperandToUseList(MachineOperand *MO)
Add MO to the linked list of operands for its register.
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
bool canAddToBBProlog(const MachineInstr &MI) const
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
bool isXDLWMMA(const MachineInstr &MI) const
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
bool isSpill(uint32_t Opcode) const
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
InstructionUniformity getGenericInstructionUniformity(const MachineInstr &MI) const
bool mayAccessScratch(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
unsigned getOpSize(uint32_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStackAccess(const MachineInstr &MI, int &FrameIndex) const
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
static bool setsSCCIfResultIsNonZero(const MachineInstr &MI)
const MIRFormatter * getMIRFormatter() const override
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
InstructionUniformity getInstructionUniformity(const MachineInstr &MI) const final
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
static bool isBUF(const MachineInstr &MI)
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const override
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
void mutateAndCleanupImplicit(MachineInstr &MI, const MCInstrDesc &NewDesc) const
static bool isMAI(const MCInstrDesc &Desc)
static bool usesLGKM_CNT(const MachineInstr &MI)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst) const
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
const MachineOperand & getCalleeOperand(const MachineInstr &MI) const override
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
bool isAlwaysGDS(uint32_t Opcode) const
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool isWWMRegSpillOpcode(uint32_t Opcode)
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
std::optional< int64_t > getImmOrMaterializedImm(MachineOperand &Op) const
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
unsigned getScratchReservedForDynamicVGPRs() const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual const MachineOperand & getCalleeOperand(const MachineInstr &MI) const
Returns the callee operand from the given MI.
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int32_t getCommuteRev(uint32_t Opcode)
LLVM_READONLY int32_t getCommuteOrig(uint32_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READONLY int32_t getGlobalVaddrOp(uint32_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
LLVM_READONLY int32_t getMFMAEarlyClobberOp(uint32_t Opcode)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY int32_t getIfAddr64Inst(uint32_t Opcode)
Check if Opcode is an Addr64 opcode.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
LLVM_READONLY int32_t getVOPe32(uint32_t Opcode)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
LLVM_READONLY int32_t getAddr64Inst(uint32_t Opcode)
int32_t getMCOpcode(uint32_t Opcode, unsigned Gen)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
LLVM_READONLY int32_t getBasicFromSDWAOp(uint32_t Opcode)
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
LLVM_READONLY int32_t getFlatScratchInstSVfromSS(uint32_t Opcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
@ AlwaysUniform
The result values are always uniform.
@ NeverUniform
The result values can never be assumed to be uniform.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr RegState getUndefRegState(bool B)
GenericCycleInfo< MachineSSAContext > MachineCycleInfo
MachineCycleInfo::CycleT MachineCycle
static const MachineMemOperand::Flags MOThreadPrivate
Mark the MMO of accesses to memory locations that are never written to by other threads.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.
VirtRegInfo - Information about a virtual register used by a set of operands.
bool Reads
Reads - One of the operands read the virtual register.
bool Writes
Writes - One of the operands writes the virtual register.