34#include "llvm/IR/IntrinsicsAMDGPU.h"
41#define DEBUG_TYPE "si-instr-info"
43#define GET_INSTRINFO_CTOR_DTOR
44#include "AMDGPUGenInstrInfo.inc"
47#define GET_D16ImageDimIntrinsics_IMPL
48#define GET_ImageDimIntrinsicTable_IMPL
49#define GET_RsrcIntrinsics_IMPL
50#include "AMDGPUGenSearchableTables.inc"
58 cl::desc(
"Restrict range of branch instructions (DEBUG)"));
61 "amdgpu-fix-16-bit-physreg-copies",
62 cl::desc(
"Fix copies between 32 and 16 bit registers by extending to 32 bit"),
78 unsigned N =
Node->getNumOperands();
79 while (
N &&
Node->getOperand(
N - 1).getValueType() == MVT::Glue)
91 int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0,
OpName);
92 int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1,
OpName);
94 if (Op0Idx == -1 && Op1Idx == -1)
98 if ((Op0Idx == -1 && Op1Idx != -1) ||
99 (Op1Idx == -1 && Op0Idx != -1))
120 return !
MI.memoperands_empty() &&
122 return MMO->isLoad() && MMO->isInvariant();
144 if (!
MI.hasImplicitDef() &&
145 MI.getNumImplicitOperands() ==
MI.getDesc().implicit_uses().size() &&
146 !
MI.mayRaiseFPException())
154bool SIInstrInfo::resultDependsOnExec(
const MachineInstr &
MI)
const {
157 if (
MI.isCompare()) {
168 switch (
Use.getOpcode()) {
169 case AMDGPU::S_AND_SAVEEXEC_B32:
170 case AMDGPU::S_AND_SAVEEXEC_B64:
172 case AMDGPU::S_AND_B32:
173 case AMDGPU::S_AND_B64:
174 if (!
Use.readsRegister(AMDGPU::EXEC,
nullptr))
185 if (!
MI.isConvergent())
188 switch (
MI.getOpcode()) {
191 case AMDGPU::V_READFIRSTLANE_B32:
208 if (
MI.getOpcode() == AMDGPU::SI_IF_BREAK)
213 for (
auto Op :
MI.uses()) {
214 if (
Op.isReg() &&
Op.getReg().isVirtual() &&
220 if (FromCycle ==
nullptr)
226 while (FromCycle && !FromCycle->
contains(ToCycle)) {
246 int64_t &Offset1)
const {
254 if (!
get(Opc0).mayLoad() || !
get(Opc1).mayLoad())
258 if (!
get(Opc0).getNumDefs() || !
get(Opc1).getNumDefs())
274 int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
275 int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
276 if (Offset0Idx == -1 || Offset1Idx == -1)
283 Offset0Idx -=
get(Opc0).NumDefs;
284 Offset1Idx -=
get(Opc1).NumDefs;
314 if (!Load0Offset || !Load1Offset)
331 int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
332 int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
334 if (OffIdx0 == -1 || OffIdx1 == -1)
340 OffIdx0 -=
get(Opc0).NumDefs;
341 OffIdx1 -=
get(Opc1).NumDefs;
360 case AMDGPU::DS_READ2ST64_B32:
361 case AMDGPU::DS_READ2ST64_B64:
362 case AMDGPU::DS_WRITE2ST64_B32:
363 case AMDGPU::DS_WRITE2ST64_B64:
378 OffsetIsScalable =
false;
395 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
397 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
398 if (
Opc == AMDGPU::DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64)
411 unsigned Offset0 = Offset0Op->
getImm() & 0xff;
412 unsigned Offset1 = Offset1Op->
getImm() & 0xff;
413 if (Offset0 + 1 != Offset1)
424 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
432 Offset = EltSize * Offset0;
434 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
435 if (DataOpIdx == -1) {
436 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
438 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
454 if (BaseOp && !BaseOp->
isFI())
462 if (SOffset->
isReg())
468 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
470 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
479 isMIMG(LdSt) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
480 int SRsrcIdx = AMDGPU::getNamedOperandIdx(
Opc, RsrcOpName);
482 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
483 if (VAddr0Idx >= 0) {
485 for (
int I = VAddr0Idx;
I < SRsrcIdx; ++
I)
492 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
507 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::sdst);
524 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
526 DataOpIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdata);
543 if (BaseOps1.
front()->isIdenticalTo(*BaseOps2.
front()))
551 if (MO1->getAddrSpace() != MO2->getAddrSpace())
554 const auto *Base1 = MO1->getValue();
555 const auto *Base2 = MO2->getValue();
556 if (!Base1 || !Base2)
564 return Base1 == Base2;
568 int64_t Offset1,
bool OffsetIsScalable1,
570 int64_t Offset2,
bool OffsetIsScalable2,
571 unsigned ClusterSize,
572 unsigned NumBytes)
const {
585 }
else if (!BaseOps1.
empty() || !BaseOps2.
empty()) {
604 const unsigned LoadSize = NumBytes / ClusterSize;
605 const unsigned NumDWords = ((LoadSize + 3) / 4) * ClusterSize;
606 return NumDWords <= MaxMemoryClusterDWords;
620 int64_t Offset0, int64_t Offset1,
621 unsigned NumLoads)
const {
622 assert(Offset1 > Offset0 &&
623 "Second offset should be larger than first offset!");
628 return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
635 const char *Msg =
"illegal VGPR to SGPR copy") {
656 assert((
TII.getSubtarget().hasMAIInsts() &&
657 !
TII.getSubtarget().hasGFX90AInsts()) &&
658 "Expected GFX908 subtarget.");
661 AMDGPU::AGPR_32RegClass.
contains(SrcReg)) &&
662 "Source register of the copy should be either an SGPR or an AGPR.");
665 "Destination register of the copy should be an AGPR.");
674 for (
auto Def =
MI,
E =
MBB.begin(); Def !=
E; ) {
677 if (!Def->modifiesRegister(SrcReg, &RI))
680 if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
681 Def->getOperand(0).getReg() != SrcReg)
688 bool SafeToPropagate =
true;
691 for (
auto I = Def;
I !=
MI && SafeToPropagate; ++
I)
692 if (
I->modifiesRegister(DefOp.
getReg(), &RI))
693 SafeToPropagate =
false;
695 if (!SafeToPropagate)
698 for (
auto I = Def;
I !=
MI; ++
I)
699 I->clearRegisterKills(DefOp.
getReg(), &RI);
708 if (ImpUseSuperReg) {
709 Builder.addReg(ImpUseSuperReg,
717 RS.enterBasicBlockEnd(
MBB);
718 RS.backward(std::next(
MI));
727 unsigned RegNo = (DestReg - AMDGPU::AGPR0) % 3;
730 assert(
MBB.getParent()->getRegInfo().isReserved(Tmp) &&
731 "VGPR used for an intermediate copy should have been reserved.");
736 Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
746 unsigned TmpCopyOp = AMDGPU::V_MOV_B32_e32;
747 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg)) {
748 TmpCopyOp = AMDGPU::V_ACCVGPR_READ_B32_e64;
755 if (ImpUseSuperReg) {
756 UseBuilder.
addReg(ImpUseSuperReg,
777 for (
unsigned Idx = 0; Idx < BaseIndices.
size(); ++Idx) {
778 int16_t SubIdx = BaseIndices[Idx];
779 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
780 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
781 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
782 unsigned Opcode = AMDGPU::S_MOV_B32;
785 bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0;
786 bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0;
787 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.
size())) {
791 DestSubReg = RI.getSubReg(DestReg, SubIdx);
792 SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
793 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
794 Opcode = AMDGPU::S_MOV_B64;
809 assert(FirstMI && LastMI);
817 LastMI->addRegisterKilled(SrcReg, &RI);
823 Register SrcReg,
bool KillSrc,
bool RenamableDest,
824 bool RenamableSrc)
const {
826 unsigned Size = RI.getRegSizeInBits(*RC);
828 unsigned SrcSize = RI.getRegSizeInBits(*SrcRC);
834 if (((
Size == 16) != (SrcSize == 16))) {
836 assert(ST.useRealTrue16Insts());
838 MCRegister SubReg = RI.getSubReg(RegToFix, AMDGPU::lo16);
841 if (DestReg == SrcReg) {
847 RC = RI.getPhysRegBaseClass(DestReg);
848 Size = RI.getRegSizeInBits(*RC);
849 SrcRC = RI.getPhysRegBaseClass(SrcReg);
850 SrcSize = RI.getRegSizeInBits(*SrcRC);
854 if (RC == &AMDGPU::VGPR_32RegClass) {
856 AMDGPU::SReg_32RegClass.
contains(SrcReg) ||
857 AMDGPU::AGPR_32RegClass.
contains(SrcReg));
858 unsigned Opc = AMDGPU::AGPR_32RegClass.contains(SrcReg) ?
859 AMDGPU::V_ACCVGPR_READ_B32_e64 : AMDGPU::V_MOV_B32_e32;
865 if (RC == &AMDGPU::SReg_32_XM0RegClass ||
866 RC == &AMDGPU::SReg_32RegClass) {
867 if (SrcReg == AMDGPU::SCC) {
874 if (!AMDGPU::SReg_32RegClass.
contains(SrcReg)) {
875 if (DestReg == AMDGPU::VCC_LO) {
893 if (RC == &AMDGPU::SReg_64RegClass) {
894 if (SrcReg == AMDGPU::SCC) {
901 if (!AMDGPU::SReg_64_EncodableRegClass.
contains(SrcReg)) {
902 if (DestReg == AMDGPU::VCC) {
920 if (DestReg == AMDGPU::SCC) {
923 if (AMDGPU::SReg_64RegClass.
contains(SrcReg)) {
927 assert(ST.hasScalarCompareEq64());
941 if (RC == &AMDGPU::AGPR_32RegClass) {
942 if (AMDGPU::VGPR_32RegClass.
contains(SrcReg) ||
943 (ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
949 if (AMDGPU::AGPR_32RegClass.
contains(SrcReg) && ST.hasGFX90AInsts()) {
958 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
965 AMDGPU::SReg_LO16RegClass.
contains(SrcReg) ||
966 AMDGPU::AGPR_LO16RegClass.
contains(SrcReg));
968 bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
969 bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
970 bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
971 bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
974 MCRegister NewDestReg = RI.get32BitRegister(DestReg);
975 MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
988 if (IsAGPRDst || IsAGPRSrc) {
989 if (!DstLow || !SrcLow) {
991 "Cannot use hi16 subreg with an AGPR!");
998 if (ST.useRealTrue16Insts()) {
1004 if (AMDGPU::VGPR_16_Lo128RegClass.
contains(DestReg) &&
1005 (IsSGPRSrc || AMDGPU::VGPR_16_Lo128RegClass.
contains(SrcReg))) {
1017 if (IsSGPRSrc && !ST.hasSDWAScalar()) {
1018 if (!DstLow || !SrcLow) {
1020 "Cannot use hi16 subreg on VI!");
1043 if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
1044 if (ST.hasMovB64()) {
1049 if (ST.hasPkMovB32()) {
1065 const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
1066 if (RI.isSGPRClass(RC)) {
1067 if (!RI.isSGPRClass(SrcRC)) {
1071 const bool CanKillSuperReg = KillSrc && !RI.regsOverlap(SrcReg, DestReg);
1077 unsigned EltSize = 4;
1078 unsigned Opcode = AMDGPU::V_MOV_B32_e32;
1079 if (RI.isAGPRClass(RC)) {
1080 if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
1081 Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
1082 else if (RI.hasVGPRs(SrcRC) ||
1083 (ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
1084 Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
1086 Opcode = AMDGPU::INSTRUCTION_LIST_END;
1087 }
else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
1088 Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
1089 }
else if ((
Size % 64 == 0) && RI.hasVGPRs(RC) &&
1090 (RI.isProperlyAlignedRC(*RC) &&
1091 (SrcRC == RC || RI.isSGPRClass(SrcRC)))) {
1093 if (ST.hasMovB64()) {
1094 Opcode = AMDGPU::V_MOV_B64_e32;
1096 }
else if (ST.hasPkMovB32()) {
1097 Opcode = AMDGPU::V_PK_MOV_B32;
1107 std::unique_ptr<RegScavenger> RS;
1108 if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
1109 RS = std::make_unique<RegScavenger>();
1115 const bool Overlap = RI.regsOverlap(SrcReg, DestReg);
1116 const bool CanKillSuperReg = KillSrc && !Overlap;
1118 for (
unsigned Idx = 0; Idx < SubIndices.
size(); ++Idx) {
1121 SubIdx = SubIndices[Idx];
1123 SubIdx = SubIndices[SubIndices.
size() - Idx - 1];
1124 Register DestSubReg = RI.getSubReg(DestReg, SubIdx);
1125 Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx);
1126 assert(DestSubReg && SrcSubReg &&
"Failed to find subregs!");
1128 bool IsFirstSubreg = Idx == 0;
1129 bool UseKill = CanKillSuperReg && Idx == SubIndices.
size() - 1;
1131 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
1135 *RS, Overlap, ImpDefSuper, ImpUseSuper);
1136 }
else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1182 return &AMDGPU::VGPR_32RegClass;
1195 "Not a VGPR32 reg");
1197 if (
Cond.size() == 1) {
1207 }
else if (
Cond.size() == 2) {
1208 assert(
Cond[0].isImm() &&
"Cond[0] is not an immediate");
1210 case SIInstrInfo::SCC_TRUE: {
1221 case SIInstrInfo::SCC_FALSE: {
1232 case SIInstrInfo::VCCNZ: {
1246 case SIInstrInfo::VCCZ: {
1260 case SIInstrInfo::EXECNZ: {
1273 case SIInstrInfo::EXECZ: {
1323 int64_t &ImmVal)
const {
1324 switch (
MI.getOpcode()) {
1325 case AMDGPU::V_MOV_B32_e32:
1326 case AMDGPU::S_MOV_B32:
1327 case AMDGPU::S_MOVK_I32:
1328 case AMDGPU::S_MOV_B64:
1329 case AMDGPU::V_MOV_B64_e32:
1330 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
1331 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
1332 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
1333 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
1334 case AMDGPU::V_MOV_B64_PSEUDO:
1335 case AMDGPU::V_MOV_B16_t16_e32: {
1339 return MI.getOperand(0).getReg() == Reg;
1344 case AMDGPU::V_MOV_B16_t16_e64: {
1346 if (Src0.
isImm() && !
MI.getOperand(1).getImm()) {
1348 return MI.getOperand(0).getReg() == Reg;
1353 case AMDGPU::S_BREV_B32:
1354 case AMDGPU::V_BFREV_B32_e32:
1355 case AMDGPU::V_BFREV_B32_e64: {
1359 return MI.getOperand(0).getReg() == Reg;
1364 case AMDGPU::S_NOT_B32:
1365 case AMDGPU::V_NOT_B32_e32:
1366 case AMDGPU::V_NOT_B32_e64: {
1369 ImmVal =
static_cast<int64_t
>(~static_cast<int32_t>(Src0.
getImm()));
1370 return MI.getOperand(0).getReg() == Reg;
1380std::optional<int64_t>
1385 if (!
Op.isReg() || !
Op.getReg().isVirtual())
1386 return std::nullopt;
1389 if (Def && Def->isMoveImmediate()) {
1395 return std::nullopt;
1400 if (RI.isAGPRClass(DstRC))
1401 return AMDGPU::COPY;
1402 if (RI.getRegSizeInBits(*DstRC) == 16) {
1405 return RI.isSGPRClass(DstRC) ? AMDGPU::COPY : AMDGPU::V_MOV_B16_t16_e64;
1407 if (RI.getRegSizeInBits(*DstRC) == 32)
1408 return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1409 if (RI.getRegSizeInBits(*DstRC) == 64 && RI.isSGPRClass(DstRC))
1410 return AMDGPU::S_MOV_B64;
1411 if (RI.getRegSizeInBits(*DstRC) == 64 && !RI.isSGPRClass(DstRC))
1412 return AMDGPU::V_MOV_B64_PSEUDO;
1413 return AMDGPU::COPY;
1418 bool IsIndirectSrc)
const {
1419 if (IsIndirectSrc) {
1421 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1);
1423 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2);
1425 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3);
1427 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
1429 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
1431 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6);
1433 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7);
1435 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
1437 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9);
1439 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10);
1441 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11);
1443 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12);
1445 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16);
1446 if (VecSize <= 1024)
1447 return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32);
1453 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1);
1455 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2);
1457 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3);
1459 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
1461 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
1463 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6);
1465 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7);
1467 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
1469 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9);
1471 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10);
1473 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11);
1475 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12);
1477 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16);
1478 if (VecSize <= 1024)
1479 return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32);
1486 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1488 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1490 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1492 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1494 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1496 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1498 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1500 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1502 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1504 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1506 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1508 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1510 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1511 if (VecSize <= 1024)
1512 return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1519 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1;
1521 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2;
1523 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3;
1525 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
1527 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
1529 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6;
1531 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7;
1533 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
1535 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
1537 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
1539 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
1541 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
1543 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
1544 if (VecSize <= 1024)
1545 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32;
1552 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1;
1554 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2;
1556 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4;
1558 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8;
1559 if (VecSize <= 1024)
1560 return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16;
1567 bool IsSGPR)
const {
1579 assert(EltSize == 32 &&
"invalid reg indexing elt size");
1586 return AMDGPU::SI_SPILL_S32_SAVE;
1588 return AMDGPU::SI_SPILL_S64_SAVE;
1590 return AMDGPU::SI_SPILL_S96_SAVE;
1592 return AMDGPU::SI_SPILL_S128_SAVE;
1594 return AMDGPU::SI_SPILL_S160_SAVE;
1596 return AMDGPU::SI_SPILL_S192_SAVE;
1598 return AMDGPU::SI_SPILL_S224_SAVE;
1600 return AMDGPU::SI_SPILL_S256_SAVE;
1602 return AMDGPU::SI_SPILL_S288_SAVE;
1604 return AMDGPU::SI_SPILL_S320_SAVE;
1606 return AMDGPU::SI_SPILL_S352_SAVE;
1608 return AMDGPU::SI_SPILL_S384_SAVE;
1610 return AMDGPU::SI_SPILL_S512_SAVE;
1612 return AMDGPU::SI_SPILL_S1024_SAVE;
1621 return AMDGPU::SI_SPILL_V16_SAVE;
1623 return AMDGPU::SI_SPILL_V32_SAVE;
1625 return AMDGPU::SI_SPILL_V64_SAVE;
1627 return AMDGPU::SI_SPILL_V96_SAVE;
1629 return AMDGPU::SI_SPILL_V128_SAVE;
1631 return AMDGPU::SI_SPILL_V160_SAVE;
1633 return AMDGPU::SI_SPILL_V192_SAVE;
1635 return AMDGPU::SI_SPILL_V224_SAVE;
1637 return AMDGPU::SI_SPILL_V256_SAVE;
1639 return AMDGPU::SI_SPILL_V288_SAVE;
1641 return AMDGPU::SI_SPILL_V320_SAVE;
1643 return AMDGPU::SI_SPILL_V352_SAVE;
1645 return AMDGPU::SI_SPILL_V384_SAVE;
1647 return AMDGPU::SI_SPILL_V512_SAVE;
1649 return AMDGPU::SI_SPILL_V1024_SAVE;
1658 return AMDGPU::SI_SPILL_AV32_SAVE;
1660 return AMDGPU::SI_SPILL_AV64_SAVE;
1662 return AMDGPU::SI_SPILL_AV96_SAVE;
1664 return AMDGPU::SI_SPILL_AV128_SAVE;
1666 return AMDGPU::SI_SPILL_AV160_SAVE;
1668 return AMDGPU::SI_SPILL_AV192_SAVE;
1670 return AMDGPU::SI_SPILL_AV224_SAVE;
1672 return AMDGPU::SI_SPILL_AV256_SAVE;
1674 return AMDGPU::SI_SPILL_AV288_SAVE;
1676 return AMDGPU::SI_SPILL_AV320_SAVE;
1678 return AMDGPU::SI_SPILL_AV352_SAVE;
1680 return AMDGPU::SI_SPILL_AV384_SAVE;
1682 return AMDGPU::SI_SPILL_AV512_SAVE;
1684 return AMDGPU::SI_SPILL_AV1024_SAVE;
1691 bool IsVectorSuperClass) {
1696 if (IsVectorSuperClass)
1697 return AMDGPU::SI_SPILL_WWM_AV32_SAVE;
1699 return AMDGPU::SI_SPILL_WWM_V32_SAVE;
1705 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1712 if (ST.hasMAIInsts())
1731 FrameInfo.getObjectAlign(FrameIndex));
1732 unsigned SpillSize = RI.getSpillSize(*RC);
1735 if (RI.isSGPRClass(RC)) {
1737 assert(SrcReg != AMDGPU::M0 &&
"m0 should not be spilled");
1738 assert(SrcReg != AMDGPU::EXEC_LO && SrcReg != AMDGPU::EXEC_HI &&
1739 SrcReg != AMDGPU::EXEC &&
"exec should not be spilled");
1747 if (SrcReg.
isVirtual() && SpillSize == 4) {
1757 if (RI.spillSGPRToVGPR())
1777 return AMDGPU::SI_SPILL_S32_RESTORE;
1779 return AMDGPU::SI_SPILL_S64_RESTORE;
1781 return AMDGPU::SI_SPILL_S96_RESTORE;
1783 return AMDGPU::SI_SPILL_S128_RESTORE;
1785 return AMDGPU::SI_SPILL_S160_RESTORE;
1787 return AMDGPU::SI_SPILL_S192_RESTORE;
1789 return AMDGPU::SI_SPILL_S224_RESTORE;
1791 return AMDGPU::SI_SPILL_S256_RESTORE;
1793 return AMDGPU::SI_SPILL_S288_RESTORE;
1795 return AMDGPU::SI_SPILL_S320_RESTORE;
1797 return AMDGPU::SI_SPILL_S352_RESTORE;
1799 return AMDGPU::SI_SPILL_S384_RESTORE;
1801 return AMDGPU::SI_SPILL_S512_RESTORE;
1803 return AMDGPU::SI_SPILL_S1024_RESTORE;
1812 return AMDGPU::SI_SPILL_V16_RESTORE;
1814 return AMDGPU::SI_SPILL_V32_RESTORE;
1816 return AMDGPU::SI_SPILL_V64_RESTORE;
1818 return AMDGPU::SI_SPILL_V96_RESTORE;
1820 return AMDGPU::SI_SPILL_V128_RESTORE;
1822 return AMDGPU::SI_SPILL_V160_RESTORE;
1824 return AMDGPU::SI_SPILL_V192_RESTORE;
1826 return AMDGPU::SI_SPILL_V224_RESTORE;
1828 return AMDGPU::SI_SPILL_V256_RESTORE;
1830 return AMDGPU::SI_SPILL_V288_RESTORE;
1832 return AMDGPU::SI_SPILL_V320_RESTORE;
1834 return AMDGPU::SI_SPILL_V352_RESTORE;
1836 return AMDGPU::SI_SPILL_V384_RESTORE;
1838 return AMDGPU::SI_SPILL_V512_RESTORE;
1840 return AMDGPU::SI_SPILL_V1024_RESTORE;
1849 return AMDGPU::SI_SPILL_AV32_RESTORE;
1851 return AMDGPU::SI_SPILL_AV64_RESTORE;
1853 return AMDGPU::SI_SPILL_AV96_RESTORE;
1855 return AMDGPU::SI_SPILL_AV128_RESTORE;
1857 return AMDGPU::SI_SPILL_AV160_RESTORE;
1859 return AMDGPU::SI_SPILL_AV192_RESTORE;
1861 return AMDGPU::SI_SPILL_AV224_RESTORE;
1863 return AMDGPU::SI_SPILL_AV256_RESTORE;
1865 return AMDGPU::SI_SPILL_AV288_RESTORE;
1867 return AMDGPU::SI_SPILL_AV320_RESTORE;
1869 return AMDGPU::SI_SPILL_AV352_RESTORE;
1871 return AMDGPU::SI_SPILL_AV384_RESTORE;
1873 return AMDGPU::SI_SPILL_AV512_RESTORE;
1875 return AMDGPU::SI_SPILL_AV1024_RESTORE;
1882 bool IsVectorSuperClass) {
1887 if (IsVectorSuperClass)
1888 return AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
1890 return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
1896 bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
1903 if (ST.hasMAIInsts())
1906 assert(!RI.isAGPRClass(RC));
1920 unsigned SpillSize = RI.getSpillSize(*RC);
1927 FrameInfo.getObjectAlign(FrameIndex));
1929 if (RI.isSGPRClass(RC)) {
1931 assert(DestReg != AMDGPU::M0 &&
"m0 should not be reloaded into");
1932 assert(DestReg != AMDGPU::EXEC_LO && DestReg != AMDGPU::EXEC_HI &&
1933 DestReg != AMDGPU::EXEC &&
"exec should not be spilled");
1938 if (DestReg.
isVirtual() && SpillSize == 4) {
1943 if (RI.spillSGPRToVGPR())
1969 unsigned Quantity)
const {
1971 unsigned MaxSNopCount = 1u << ST.getSNopBits();
1972 while (Quantity > 0) {
1973 unsigned Arg = std::min(Quantity, MaxSNopCount);
1980 auto *MF =
MBB.getParent();
1983 assert(Info->isEntryFunction());
1985 if (
MBB.succ_empty()) {
1986 bool HasNoTerminator =
MBB.getFirstTerminator() ==
MBB.end();
1987 if (HasNoTerminator) {
1988 if (Info->returnsVoid()) {
2002 constexpr unsigned DoorbellIDMask = 0x3ff;
2003 constexpr unsigned ECQueueWaveAbort = 0x400;
2008 if (!
MBB.succ_empty() || std::next(
MI.getIterator()) !=
MBB.end()) {
2009 MBB.splitAt(
MI,
false);
2013 MBB.addSuccessor(TrapBB);
2023 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::TTMP2)
2027 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_AND_B32), DoorbellRegMasked)
2032 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_OR_B32), SetWaveAbortBit)
2033 .
addUse(DoorbellRegMasked)
2034 .
addImm(ECQueueWaveAbort);
2035 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2036 .
addUse(SetWaveAbortBit);
2039 BuildMI(*TrapBB, TrapBB->
end(),
DL,
get(AMDGPU::S_MOV_B32), AMDGPU::M0)
2050 return MBB.getNextNode();
2054 switch (
MI.getOpcode()) {
2056 if (
MI.isMetaInstruction())
2061 return MI.getOperand(0).getImm() + 1;
2071 switch (
MI.getOpcode()) {
2073 case AMDGPU::S_MOV_B64_term:
2076 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2079 case AMDGPU::S_MOV_B32_term:
2082 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2085 case AMDGPU::S_XOR_B64_term:
2088 MI.setDesc(
get(AMDGPU::S_XOR_B64));
2091 case AMDGPU::S_XOR_B32_term:
2094 MI.setDesc(
get(AMDGPU::S_XOR_B32));
2096 case AMDGPU::S_OR_B64_term:
2099 MI.setDesc(
get(AMDGPU::S_OR_B64));
2101 case AMDGPU::S_OR_B32_term:
2104 MI.setDesc(
get(AMDGPU::S_OR_B32));
2107 case AMDGPU::S_ANDN2_B64_term:
2110 MI.setDesc(
get(AMDGPU::S_ANDN2_B64));
2113 case AMDGPU::S_ANDN2_B32_term:
2116 MI.setDesc(
get(AMDGPU::S_ANDN2_B32));
2119 case AMDGPU::S_AND_B64_term:
2122 MI.setDesc(
get(AMDGPU::S_AND_B64));
2125 case AMDGPU::S_AND_B32_term:
2128 MI.setDesc(
get(AMDGPU::S_AND_B32));
2131 case AMDGPU::S_AND_SAVEEXEC_B64_term:
2134 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B64));
2137 case AMDGPU::S_AND_SAVEEXEC_B32_term:
2140 MI.setDesc(
get(AMDGPU::S_AND_SAVEEXEC_B32));
2143 case AMDGPU::SI_SPILL_S32_TO_VGPR:
2144 MI.setDesc(
get(AMDGPU::V_WRITELANE_B32));
2147 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
2148 MI.setDesc(
get(AMDGPU::V_READLANE_B32));
2150 case AMDGPU::AV_MOV_B32_IMM_PSEUDO: {
2154 get(IsAGPR ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::V_MOV_B32_e32));
2157 case AMDGPU::AV_MOV_B64_IMM_PSEUDO: {
2160 int64_t Imm =
MI.getOperand(1).getImm();
2162 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2163 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2168 MI.eraseFromParent();
2174 case AMDGPU::V_MOV_B64_PSEUDO: {
2176 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2177 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2185 if (ST.hasMovB64() && Mov64RC->
contains(Dst)) {
2186 MI.setDesc(Mov64Desc);
2191 if (
SrcOp.isImm()) {
2193 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2194 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2218 if (ST.hasPkMovB32() &&
2237 MI.eraseFromParent();
2240 case AMDGPU::V_MOV_B64_DPP_PSEUDO: {
2244 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2248 if (ST.has64BitLiterals()) {
2249 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2255 MI.setDesc(
get(AMDGPU::S_MOV_B64));
2260 Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
2261 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2263 APInt Lo(32, Imm.getLoBits(32).getZExtValue());
2264 APInt Hi(32, Imm.getHiBits(32).getZExtValue());
2269 MI.eraseFromParent();
2272 case AMDGPU::V_SET_INACTIVE_B32: {
2276 .
add(
MI.getOperand(3))
2277 .
add(
MI.getOperand(4))
2278 .
add(
MI.getOperand(1))
2279 .
add(
MI.getOperand(2))
2280 .
add(
MI.getOperand(5));
2281 MI.eraseFromParent();
2284 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2285 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2286 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2287 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2288 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2289 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2290 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2291 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2292 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2293 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2294 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2295 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2296 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2297 case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2298 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V1:
2299 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V2:
2300 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
2301 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
2302 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
2303 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6:
2304 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7:
2305 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
2306 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
2307 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
2308 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
2309 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
2310 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
2311 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
2312 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
2313 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V2:
2314 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V4:
2315 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V8:
2316 case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V16: {
2320 if (RI.hasVGPRs(EltRC)) {
2321 Opc = AMDGPU::V_MOVRELD_B32_e32;
2323 Opc = RI.getRegSizeInBits(*EltRC) == 64 ? AMDGPU::S_MOVRELD_B64
2324 : AMDGPU::S_MOVRELD_B32;
2329 bool IsUndef =
MI.getOperand(1).isUndef();
2330 unsigned SubReg =
MI.getOperand(3).getImm();
2331 assert(VecReg ==
MI.getOperand(1).getReg());
2336 .
add(
MI.getOperand(2))
2340 const int ImpDefIdx =
2342 const int ImpUseIdx = ImpDefIdx + 1;
2344 MI.eraseFromParent();
2347 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V1:
2348 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2:
2349 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
2350 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
2351 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
2352 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6:
2353 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7:
2354 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
2355 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
2356 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
2357 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V11:
2358 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V12:
2359 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V16:
2360 case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32: {
2361 assert(ST.useVGPRIndexMode());
2363 bool IsUndef =
MI.getOperand(1).isUndef();
2372 const MCInstrDesc &OpDesc =
get(AMDGPU::V_MOV_B32_indirect_write);
2376 .
add(
MI.getOperand(2))
2380 const int ImpDefIdx =
2382 const int ImpUseIdx = ImpDefIdx + 1;
2389 MI.eraseFromParent();
2392 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V1:
2393 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V2:
2394 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
2395 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
2396 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
2397 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6:
2398 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7:
2399 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
2400 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
2401 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
2402 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V11:
2403 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V12:
2404 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V16:
2405 case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V32: {
2406 assert(ST.useVGPRIndexMode());
2409 bool IsUndef =
MI.getOperand(1).isUndef();
2413 .
add(
MI.getOperand(2))
2426 MI.eraseFromParent();
2429 case AMDGPU::SI_PC_ADD_REL_OFFSET: {
2432 Register RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
2433 Register RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
2452 if (ST.hasGetPCZeroExtension()) {
2456 BuildMI(MF,
DL,
get(AMDGPU::S_SEXT_I32_I16), RegHi).addReg(RegHi));
2463 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U32), RegLo).addReg(RegLo).add(OpLo));
2473 MI.eraseFromParent();
2476 case AMDGPU::SI_PC_ADD_REL_OFFSET64: {
2486 Op.setOffset(
Op.getOffset() + 4);
2488 BuildMI(MF,
DL,
get(AMDGPU::S_ADD_U64), Reg).addReg(Reg).add(
Op));
2492 MI.eraseFromParent();
2495 case AMDGPU::ENTER_STRICT_WWM: {
2501 case AMDGPU::ENTER_STRICT_WQM: {
2508 MI.eraseFromParent();
2511 case AMDGPU::EXIT_STRICT_WWM:
2512 case AMDGPU::EXIT_STRICT_WQM: {
2518 case AMDGPU::SI_RETURN: {
2532 MI.eraseFromParent();
2536 case AMDGPU::S_MUL_U64_U32_PSEUDO:
2537 case AMDGPU::S_MUL_I64_I32_PSEUDO:
2538 MI.setDesc(
get(AMDGPU::S_MUL_U64));
2541 case AMDGPU::S_GETPC_B64_pseudo:
2542 MI.setDesc(
get(AMDGPU::S_GETPC_B64));
2543 if (ST.hasGetPCZeroExtension()) {
2545 Register DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
2554 case AMDGPU::V_MAX_BF16_PSEUDO_e64: {
2555 assert(ST.hasBF16PackedInsts());
2556 MI.setDesc(
get(AMDGPU::V_PK_MAX_NUM_BF16));
2567 case AMDGPU::GET_STACK_BASE:
2570 if (ST.getFrameLowering()->mayReserveScratchForCWSR(*
MBB.getParent())) {
2577 Register DestReg =
MI.getOperand(0).getReg();
2587 MI.getOperand(
MI.getNumExplicitOperands()).setIsDead(
false);
2588 MI.getOperand(
MI.getNumExplicitOperands()).setIsUse();
2589 MI.setDesc(
get(AMDGPU::S_CMOVK_I32));
2592 MI.setDesc(
get(AMDGPU::S_MOV_B32));
2595 MI.getNumExplicitOperands());
2613 case AMDGPU::S_MOV_B64:
2614 case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
2623 if (UsedLanes.
all())
2628 unsigned LoSubReg = RI.composeSubRegIndices(OrigSubReg, AMDGPU::sub0);
2629 unsigned HiSubReg = RI.composeSubRegIndices(OrigSubReg, AMDGPU::sub1);
2631 bool NeedLo = (UsedLanes & RI.getSubRegIndexLaneMask(LoSubReg)).any();
2632 bool NeedHi = (UsedLanes & RI.getSubRegIndexLaneMask(HiSubReg)).any();
2634 if (NeedLo && NeedHi)
2638 int32_t Imm32 = NeedLo ?
Lo_32(Imm64) :
Hi_32(Imm64);
2640 unsigned UseSubReg = NeedLo ? LoSubReg : HiSubReg;
2649 case AMDGPU::S_LOAD_DWORDX16_IMM:
2650 case AMDGPU::S_LOAD_DWORDX8_IMM: {
2663 for (
auto &CandMO :
I->operands()) {
2664 if (!CandMO.isReg() || CandMO.getReg() != RegToFind || CandMO.isDef())
2672 if (!UseMO || UseMO->
getSubReg() == AMDGPU::NoSubRegister)
2676 unsigned SubregSize = RI.getSubRegIdxSize(UseMO->
getSubReg());
2682 unsigned NewOpcode = -1;
2683 if (SubregSize == 256)
2684 NewOpcode = AMDGPU::S_LOAD_DWORDX8_IMM;
2685 else if (SubregSize == 128)
2686 NewOpcode = AMDGPU::S_LOAD_DWORDX4_IMM;
2696 UseMO->
setSubReg(AMDGPU::NoSubRegister);
2701 MI->getOperand(0).setReg(DestReg);
2702 MI->getOperand(0).setSubReg(AMDGPU::NoSubRegister);
2706 OffsetMO->
setImm(FinalOffset);
2712 MI->setMemRefs(*MF, NewMMOs);
2725std::pair<MachineInstr*, MachineInstr*>
2727 assert (
MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
2729 if (ST.hasMovB64() && ST.hasFeature(AMDGPU::FeatureDPALU_DPP) &&
2732 MI.setDesc(
get(AMDGPU::V_MOV_B64_dpp));
2733 return std::pair(&
MI,
nullptr);
2744 for (
auto Sub : { AMDGPU::sub0, AMDGPU::sub1 }) {
2746 if (Dst.isPhysical()) {
2747 MovDPP.addDef(RI.getSubReg(Dst,
Sub));
2754 for (
unsigned I = 1;
I <= 2; ++
I) {
2757 if (
SrcOp.isImm()) {
2759 Imm.ashrInPlace(Part * 32);
2760 MovDPP.addImm(Imm.getLoBits(32).getZExtValue());
2764 if (Src.isPhysical())
2765 MovDPP.addReg(RI.getSubReg(Src,
Sub));
2772 MovDPP.addImm(MO.getImm());
2774 Split[Part] = MovDPP;
2778 if (Dst.isVirtual())
2785 MI.eraseFromParent();
2786 return std::pair(Split[0], Split[1]);
2789std::optional<DestSourcePair>
2791 if (
MI.getOpcode() == AMDGPU::WWM_COPY)
2794 return std::nullopt;
2798 AMDGPU::OpName Src0OpName,
2800 AMDGPU::OpName Src1OpName)
const {
2807 "All commutable instructions have both src0 and src1 modifiers");
2809 int Src0ModsVal = Src0Mods->
getImm();
2810 int Src1ModsVal = Src1Mods->
getImm();
2812 Src1Mods->
setImm(Src0ModsVal);
2813 Src0Mods->
setImm(Src1ModsVal);
2822 bool IsKill = RegOp.
isKill();
2824 bool IsUndef = RegOp.
isUndef();
2825 bool IsDebug = RegOp.
isDebug();
2827 if (NonRegOp.
isImm())
2829 else if (NonRegOp.
isFI())
2850 int64_t NonRegVal = NonRegOp1.
getImm();
2853 NonRegOp2.
setImm(NonRegVal);
2860 unsigned OpIdx1)
const {
2865 unsigned Opc =
MI.getOpcode();
2866 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2876 if ((
int)OpIdx0 == Src0Idx && !MO0.
isReg() &&
2879 if ((
int)OpIdx1 == Src0Idx && !MO1.
isReg() &&
2884 if ((
int)OpIdx1 != Src0Idx && MO0.
isReg()) {
2890 if ((
int)OpIdx0 != Src0Idx && MO1.
isReg()) {
2905 unsigned Src1Idx)
const {
2906 assert(!NewMI &&
"this should never be used");
2908 unsigned Opc =
MI.getOpcode();
2910 if (CommutedOpcode == -1)
2913 if (Src0Idx > Src1Idx)
2916 assert(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) ==
2917 static_cast<int>(Src0Idx) &&
2918 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1) ==
2919 static_cast<int>(Src1Idx) &&
2920 "inconsistency with findCommutedOpIndices");
2945 Src1, AMDGPU::OpName::src1_modifiers);
2948 AMDGPU::OpName::src1_sel);
2960 unsigned &SrcOpIdx0,
2961 unsigned &SrcOpIdx1)
const {
2966 unsigned &SrcOpIdx0,
2967 unsigned &SrcOpIdx1)
const {
2968 if (!
Desc.isCommutable())
2971 unsigned Opc =
Desc.getOpcode();
2972 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
2976 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
2980 return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
2984 int64_t BrOffset)
const {
3001 return MI.getOperand(0).getMBB();
3006 if (
MI.getOpcode() == AMDGPU::SI_IF ||
MI.getOpcode() == AMDGPU::SI_ELSE ||
3007 MI.getOpcode() == AMDGPU::SI_LOOP)
3019 "new block should be inserted for expanding unconditional branch");
3022 "restore block should be inserted for restoring clobbered registers");
3030 if (ST.useAddPC64Inst()) {
3032 MCCtx.createTempSymbol(
"offset",
true);
3036 MCCtx.createTempSymbol(
"post_addpc",
true);
3037 AddPC->setPostInstrSymbol(*MF, PostAddPCLabel);
3041 Offset->setVariableValue(OffsetExpr);
3045 assert(RS &&
"RegScavenger required for long branching");
3053 const bool FlushSGPRWrites = (ST.isWave64() && ST.hasVALUMaskWriteHazard()) ||
3054 ST.hasVALUReadSGPRHazard();
3055 auto ApplyHazardWorkarounds = [
this, &
MBB, &
I, &
DL, FlushSGPRWrites]() {
3056 if (FlushSGPRWrites)
3064 ApplyHazardWorkarounds();
3067 MCCtx.createTempSymbol(
"post_getpc",
true);
3071 MCCtx.createTempSymbol(
"offset_lo",
true);
3073 MCCtx.createTempSymbol(
"offset_hi",
true);
3076 .
addReg(PCReg, {}, AMDGPU::sub0)
3080 .
addReg(PCReg, {}, AMDGPU::sub1)
3082 ApplyHazardWorkarounds();
3123 if (LongBranchReservedReg) {
3124 RS->enterBasicBlock(
MBB);
3125 Scav = LongBranchReservedReg;
3127 RS->enterBasicBlockEnd(
MBB);
3128 Scav = RS->scavengeRegisterBackwards(
3133 RS->setRegUsed(Scav);
3141 TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
3158unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate
Cond) {
3160 case SIInstrInfo::SCC_TRUE:
3161 return AMDGPU::S_CBRANCH_SCC1;
3162 case SIInstrInfo::SCC_FALSE:
3163 return AMDGPU::S_CBRANCH_SCC0;
3164 case SIInstrInfo::VCCNZ:
3165 return AMDGPU::S_CBRANCH_VCCNZ;
3166 case SIInstrInfo::VCCZ:
3167 return AMDGPU::S_CBRANCH_VCCZ;
3168 case SIInstrInfo::EXECNZ:
3169 return AMDGPU::S_CBRANCH_EXECNZ;
3170 case SIInstrInfo::EXECZ:
3171 return AMDGPU::S_CBRANCH_EXECZ;
3177SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(
unsigned Opcode) {
3179 case AMDGPU::S_CBRANCH_SCC0:
3181 case AMDGPU::S_CBRANCH_SCC1:
3183 case AMDGPU::S_CBRANCH_VCCNZ:
3185 case AMDGPU::S_CBRANCH_VCCZ:
3187 case AMDGPU::S_CBRANCH_EXECNZ:
3189 case AMDGPU::S_CBRANCH_EXECZ:
3201 bool AllowModify)
const {
3202 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3204 TBB =
I->getOperand(0).getMBB();
3208 BranchPredicate Pred = getBranchPredicate(
I->getOpcode());
3209 if (Pred == INVALID_BR)
3214 Cond.push_back(
I->getOperand(1));
3218 if (
I ==
MBB.end()) {
3224 if (
I->getOpcode() == AMDGPU::S_BRANCH) {
3226 FBB =
I->getOperand(0).getMBB();
3236 bool AllowModify)
const {
3244 while (
I != E && !
I->isBranch() && !
I->isReturn()) {
3245 switch (
I->getOpcode()) {
3246 case AMDGPU::S_MOV_B64_term:
3247 case AMDGPU::S_XOR_B64_term:
3248 case AMDGPU::S_OR_B64_term:
3249 case AMDGPU::S_ANDN2_B64_term:
3250 case AMDGPU::S_AND_B64_term:
3251 case AMDGPU::S_AND_SAVEEXEC_B64_term:
3252 case AMDGPU::S_MOV_B32_term:
3253 case AMDGPU::S_XOR_B32_term:
3254 case AMDGPU::S_OR_B32_term:
3255 case AMDGPU::S_ANDN2_B32_term:
3256 case AMDGPU::S_AND_B32_term:
3257 case AMDGPU::S_AND_SAVEEXEC_B32_term:
3260 case AMDGPU::SI_ELSE:
3261 case AMDGPU::SI_KILL_I1_TERMINATOR:
3262 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
3279 int *BytesRemoved)
const {
3281 unsigned RemovedSize = 0;
3284 if (
MI.isBranch() ||
MI.isReturn()) {
3286 MI.eraseFromParent();
3292 *BytesRemoved = RemovedSize;
3309 int *BytesAdded)
const {
3310 if (!FBB &&
Cond.empty()) {
3314 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3321 = getBranchOpcode(
static_cast<BranchPredicate
>(
Cond[0].
getImm()));
3333 *BytesAdded = ST.hasOffset3fBug() ? 8 : 4;
3351 *BytesAdded = ST.hasOffset3fBug() ? 16 : 8;
3358 if (
Cond.size() != 2) {
3362 if (
Cond[0].isImm()) {
3373 Register FalseReg,
int &CondCycles,
3374 int &TrueCycles,
int &FalseCycles)
const {
3384 CondCycles = TrueCycles = FalseCycles = NumInsts;
3387 return RI.hasVGPRs(RC) && NumInsts <= 6;
3401 if (NumInsts % 2 == 0)
3404 CondCycles = TrueCycles = FalseCycles = NumInsts;
3405 return RI.isSGPRClass(RC);
3416 BranchPredicate Pred =
static_cast<BranchPredicate
>(
Cond[0].getImm());
3417 if (Pred == VCCZ || Pred == SCC_FALSE) {
3418 Pred =
static_cast<BranchPredicate
>(-Pred);
3424 unsigned DstSize = RI.getRegSizeInBits(*DstRC);
3426 if (DstSize == 32) {
3428 if (Pred == SCC_TRUE) {
3443 if (DstSize == 64 && Pred == SCC_TRUE) {
3453 static const int16_t Sub0_15[] = {
3454 AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
3455 AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
3456 AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
3457 AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
3460 static const int16_t Sub0_15_64[] = {
3461 AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
3462 AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
3463 AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
3464 AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
3467 unsigned SelOp = AMDGPU::V_CNDMASK_B32_e32;
3469 const int16_t *SubIndices = Sub0_15;
3470 int NElts = DstSize / 32;
3474 if (Pred == SCC_TRUE) {
3476 SelOp = AMDGPU::S_CSELECT_B32;
3477 EltRC = &AMDGPU::SGPR_32RegClass;
3479 SelOp = AMDGPU::S_CSELECT_B64;
3480 EltRC = &AMDGPU::SGPR_64RegClass;
3481 SubIndices = Sub0_15_64;
3487 MBB,
I,
DL,
get(AMDGPU::REG_SEQUENCE), DstReg);
3492 for (
int Idx = 0; Idx != NElts; ++Idx) {
3496 unsigned SubIdx = SubIndices[Idx];
3499 if (SelOp == AMDGPU::V_CNDMASK_B32_e32) {
3501 .
addReg(FalseReg, {}, SubIdx)
3502 .addReg(TrueReg, {}, SubIdx);
3505 .
addReg(TrueReg, {}, SubIdx)
3506 .addReg(FalseReg, {}, SubIdx);
3518 switch (
MI.getOpcode()) {
3519 case AMDGPU::V_MOV_B16_t16_e32:
3520 case AMDGPU::V_MOV_B16_t16_e64:
3521 case AMDGPU::V_MOV_B32_e32:
3522 case AMDGPU::V_MOV_B32_e64:
3523 case AMDGPU::V_MOV_B64_PSEUDO:
3524 case AMDGPU::V_MOV_B64_e32:
3525 case AMDGPU::V_MOV_B64_e64:
3526 case AMDGPU::S_MOV_B32:
3527 case AMDGPU::S_MOV_B64:
3528 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3530 case AMDGPU::WWM_COPY:
3531 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3532 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3533 case AMDGPU::V_ACCVGPR_MOV_B32:
3534 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3535 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3543 switch (
MI.getOpcode()) {
3544 case AMDGPU::V_MOV_B16_t16_e32:
3545 case AMDGPU::V_MOV_B16_t16_e64:
3547 case AMDGPU::V_MOV_B32_e32:
3548 case AMDGPU::V_MOV_B32_e64:
3549 case AMDGPU::V_MOV_B64_PSEUDO:
3550 case AMDGPU::V_MOV_B64_e32:
3551 case AMDGPU::V_MOV_B64_e64:
3552 case AMDGPU::S_MOV_B32:
3553 case AMDGPU::S_MOV_B64:
3554 case AMDGPU::S_MOV_B64_IMM_PSEUDO:
3556 case AMDGPU::WWM_COPY:
3557 case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
3558 case AMDGPU::V_ACCVGPR_READ_B32_e64:
3559 case AMDGPU::V_ACCVGPR_MOV_B32:
3560 case AMDGPU::AV_MOV_B32_IMM_PSEUDO:
3561 case AMDGPU::AV_MOV_B64_IMM_PSEUDO:
3569 AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
3570 AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::clamp,
3571 AMDGPU::OpName::omod, AMDGPU::OpName::op_sel};
3574 unsigned Opc =
MI.getOpcode();
3576 int Idx = AMDGPU::getNamedOperandIdx(
Opc, Name);
3578 MI.removeOperand(Idx);
3584 MI.setDesc(NewDesc);
3590 unsigned NumOps =
Desc.getNumOperands() +
Desc.implicit_uses().size() +
3591 Desc.implicit_defs().size();
3593 for (
unsigned I =
MI.getNumOperands() - 1;
I >=
NumOps; --
I)
3594 MI.removeOperand(
I);
3598 unsigned SubRegIndex) {
3599 switch (SubRegIndex) {
3600 case AMDGPU::NoSubRegister:
3610 case AMDGPU::sub1_lo16:
3612 case AMDGPU::sub1_hi16:
3615 return std::nullopt;
3623 case AMDGPU::V_MAC_F16_e32:
3624 case AMDGPU::V_MAC_F16_e64:
3625 case AMDGPU::V_MAD_F16_e64:
3626 return AMDGPU::V_MADAK_F16;
3627 case AMDGPU::V_MAC_F32_e32:
3628 case AMDGPU::V_MAC_F32_e64:
3629 case AMDGPU::V_MAD_F32_e64:
3630 return AMDGPU::V_MADAK_F32;
3631 case AMDGPU::V_FMAC_F32_e32:
3632 case AMDGPU::V_FMAC_F32_e64:
3633 case AMDGPU::V_FMA_F32_e64:
3634 return AMDGPU::V_FMAAK_F32;
3635 case AMDGPU::V_FMAC_F16_e32:
3636 case AMDGPU::V_FMAC_F16_e64:
3637 case AMDGPU::V_FMAC_F16_t16_e64:
3638 case AMDGPU::V_FMAC_F16_fake16_e64:
3639 case AMDGPU::V_FMAC_F16_t16_e32:
3640 case AMDGPU::V_FMAC_F16_fake16_e32:
3641 case AMDGPU::V_FMA_F16_e64:
3642 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3643 ? AMDGPU::V_FMAAK_F16_t16
3644 : AMDGPU::V_FMAAK_F16_fake16
3645 : AMDGPU::V_FMAAK_F16;
3646 case AMDGPU::V_FMAC_F64_e32:
3647 case AMDGPU::V_FMAC_F64_e64:
3648 case AMDGPU::V_FMA_F64_e64:
3649 return AMDGPU::V_FMAAK_F64;
3657 case AMDGPU::V_MAC_F16_e32:
3658 case AMDGPU::V_MAC_F16_e64:
3659 case AMDGPU::V_MAD_F16_e64:
3660 return AMDGPU::V_MADMK_F16;
3661 case AMDGPU::V_MAC_F32_e32:
3662 case AMDGPU::V_MAC_F32_e64:
3663 case AMDGPU::V_MAD_F32_e64:
3664 return AMDGPU::V_MADMK_F32;
3665 case AMDGPU::V_FMAC_F32_e32:
3666 case AMDGPU::V_FMAC_F32_e64:
3667 case AMDGPU::V_FMA_F32_e64:
3668 return AMDGPU::V_FMAMK_F32;
3669 case AMDGPU::V_FMAC_F16_e32:
3670 case AMDGPU::V_FMAC_F16_e64:
3671 case AMDGPU::V_FMAC_F16_t16_e64:
3672 case AMDGPU::V_FMAC_F16_fake16_e64:
3673 case AMDGPU::V_FMAC_F16_t16_e32:
3674 case AMDGPU::V_FMAC_F16_fake16_e32:
3675 case AMDGPU::V_FMA_F16_e64:
3676 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
3677 ? AMDGPU::V_FMAMK_F16_t16
3678 : AMDGPU::V_FMAMK_F16_fake16
3679 : AMDGPU::V_FMAMK_F16;
3680 case AMDGPU::V_FMAC_F64_e32:
3681 case AMDGPU::V_FMAC_F64_e64:
3682 case AMDGPU::V_FMA_F64_e64:
3683 return AMDGPU::V_FMAMK_F64;
3697 assert(!
DefMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3700 if (
Opc == AMDGPU::COPY) {
3701 assert(!
UseMI.getOperand(0).getSubReg() &&
"Expected SSA form");
3708 if (HasMultipleUses) {
3711 unsigned ImmDefSize = RI.getRegSizeInBits(*MRI->
getRegClass(Reg));
3714 if (UseSubReg != AMDGPU::NoSubRegister && ImmDefSize == 64)
3722 if (ImmDefSize == 32 &&
3727 bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3728 RI.getSubRegIdxSize(UseSubReg) == 16;
3731 if (RI.hasVGPRs(DstRC))
3734 if (DstReg.
isVirtual() && UseSubReg != AMDGPU::lo16)
3740 unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3747 for (
unsigned MovOp :
3748 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3749 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3757 MovDstRC = RI.getMatchingSuperRegClass(MovDstRC, DstRC, AMDGPU::lo16);
3761 if (MovDstPhysReg) {
3765 RI.getMatchingSuperReg(MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3772 if (MovDstPhysReg) {
3773 if (!MovDstRC->
contains(MovDstPhysReg))
3789 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType) &&
3797 if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3801 UseMI.getOperand(0).setSubReg(AMDGPU::NoSubRegister);
3803 UseMI.getOperand(0).setReg(MovDstPhysReg);
3808 UseMI.setDesc(NewMCID);
3809 UseMI.getOperand(1).ChangeToImmediate(*SubRegImm);
3810 UseMI.addImplicitDefUseOperands(*MF);
3814 if (HasMultipleUses)
3817 if (
Opc == AMDGPU::V_MAD_F32_e64 ||
Opc == AMDGPU::V_MAC_F32_e64 ||
3818 Opc == AMDGPU::V_MAD_F16_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3819 Opc == AMDGPU::V_FMA_F32_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 ||
3820 Opc == AMDGPU::V_FMA_F16_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64 ||
3821 Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3822 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
Opc == AMDGPU::V_FMA_F64_e64 ||
3823 Opc == AMDGPU::V_FMAC_F64_e64) {
3832 int Src0Idx = getNamedOperandIdx(
UseMI.getOpcode(), AMDGPU::OpName::src0);
3843 auto CopyRegOperandToNarrowerRC =
3846 if (!
MI.getOperand(OpNo).isReg())
3850 if (RI.getCommonSubClass(RC, NewRC) != NewRC)
3853 BuildMI(*
MI.getParent(),
MI.getIterator(),
MI.getDebugLoc(),
3854 get(AMDGPU::COPY), Tmp)
3856 MI.getOperand(OpNo).setReg(Tmp);
3857 MI.getOperand(OpNo).setIsKill();
3864 Src1->
isReg() && Src1->
getReg() == Reg ? Src0 : Src1;
3865 if (!RegSrc->
isReg())
3868 ST.getConstantBusLimit(
Opc) < 2)
3883 if (Def && Def->isMoveImmediate() &&
3898 unsigned SrcSubReg = RegSrc->
getSubReg();
3903 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3904 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3905 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3906 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3907 UseMI.untieRegOperand(
3908 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3915 if (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3916 NewOpc == AMDGPU::V_FMAMK_F16_fake16) {
3920 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
3921 UseMI.getOperand(0).getReg())
3923 UseMI.getOperand(0).setReg(Tmp);
3924 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
3925 CopyRegOperandToNarrowerRC(
UseMI, 3, NewRC);
3930 DefMI.eraseFromParent();
3937 if (ST.getConstantBusLimit(
Opc) < 2) {
3940 bool Src0Inlined =
false;
3941 if (Src0->
isReg()) {
3946 if (Def && Def->isMoveImmediate() &&
3951 }
else if (ST.getConstantBusLimit(
Opc) <= 1 &&
3952 RI.isSGPRReg(*MRI, Src0->
getReg())) {
3958 if (Src1->
isReg() && !Src0Inlined) {
3961 if (Def && Def->isMoveImmediate() &&
3965 else if (RI.isSGPRReg(*MRI, Src1->
getReg()))
3978 if (
Opc == AMDGPU::V_MAC_F32_e64 ||
Opc == AMDGPU::V_MAC_F16_e64 ||
3979 Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
3980 Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
3981 Opc == AMDGPU::V_FMAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F64_e64)
3982 UseMI.untieRegOperand(
3983 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2));
3985 const std::optional<int64_t> SubRegImm =
3995 if (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3996 NewOpc == AMDGPU::V_FMAAK_F16_fake16) {
4000 UseMI.getDebugLoc(),
get(AMDGPU::COPY),
4001 UseMI.getOperand(0).getReg())
4003 UseMI.getOperand(0).setReg(Tmp);
4004 CopyRegOperandToNarrowerRC(
UseMI, 1, NewRC);
4005 CopyRegOperandToNarrowerRC(
UseMI, 2, NewRC);
4015 DefMI.eraseFromParent();
4027 if (BaseOps1.
size() != BaseOps2.
size())
4029 for (
size_t I = 0,
E = BaseOps1.
size();
I <
E; ++
I) {
4030 if (!BaseOps1[
I]->isIdenticalTo(*BaseOps2[
I]))
4038 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
4039 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
4040 LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
4042 LowOffset + (int)LowWidth.
getValue() <= HighOffset;
4045bool SIInstrInfo::checkInstOffsetsDoNotOverlap(
const MachineInstr &MIa,
4048 int64_t Offset0, Offset1;
4051 bool Offset0IsScalable, Offset1IsScalable;
4065 LocationSize Width0 = MIa.
memoperands().front()->getSize();
4066 LocationSize Width1 = MIb.
memoperands().front()->getSize();
4073 "MIa must load from or modify a memory location");
4075 "MIb must load from or modify a memory location");
4097 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4104 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4114 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4128 return checkInstOffsetsDoNotOverlap(MIa, MIb);
4139 if (
Reg.isPhysical())
4143 Imm = Def->getOperand(1).getImm();
4163 unsigned NumOps =
MI.getNumOperands();
4166 if (
Op.isReg() &&
Op.isKill())
4174 case AMDGPU::V_MAC_F16_e32:
4175 case AMDGPU::V_MAC_F16_e64:
4176 return AMDGPU::V_MAD_F16_e64;
4177 case AMDGPU::V_MAC_F32_e32:
4178 case AMDGPU::V_MAC_F32_e64:
4179 return AMDGPU::V_MAD_F32_e64;
4180 case AMDGPU::V_MAC_LEGACY_F32_e32:
4181 case AMDGPU::V_MAC_LEGACY_F32_e64:
4182 return AMDGPU::V_MAD_LEGACY_F32_e64;
4183 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4184 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4185 return AMDGPU::V_FMA_LEGACY_F32_e64;
4186 case AMDGPU::V_FMAC_F16_e32:
4187 case AMDGPU::V_FMAC_F16_e64:
4188 case AMDGPU::V_FMAC_F16_t16_e64:
4189 case AMDGPU::V_FMAC_F16_fake16_e64:
4190 return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts()
4191 ? AMDGPU::V_FMA_F16_gfx9_t16_e64
4192 : AMDGPU::V_FMA_F16_gfx9_fake16_e64
4193 : AMDGPU::V_FMA_F16_gfx9_e64;
4194 case AMDGPU::V_FMAC_F32_e32:
4195 case AMDGPU::V_FMAC_F32_e64:
4196 return AMDGPU::V_FMA_F32_e64;
4197 case AMDGPU::V_FMAC_F64_e32:
4198 case AMDGPU::V_FMAC_F64_e64:
4199 return AMDGPU::V_FMA_F64_e64;
4219 if (
MI.isBundle()) {
4222 if (
MI.getBundleSize() != 1)
4224 CandidateMI =
MI.getNextNode();
4228 MachineInstr *NewMI = convertToThreeAddressImpl(*CandidateMI, U);
4232 if (
MI.isBundle()) {
4237 MI.untieRegOperand(MO.getOperandNo());
4245 if (Def.isEarlyClobber() && Def.isReg() &&
4250 auto UpdateDefIndex = [&](
LiveRange &LR) {
4251 auto *S = LR.find(OldIndex);
4252 if (S != LR.end() && S->start == OldIndex) {
4253 assert(S->valno && S->valno->def == OldIndex);
4254 S->start = NewIndex;
4255 S->valno->def = NewIndex;
4259 for (
auto &SR : LI.subranges())
4265 if (U.RemoveMIUse) {
4268 Register DefReg = U.RemoveMIUse->getOperand(0).getReg();
4272 U.RemoveMIUse->setDesc(
get(AMDGPU::IMPLICIT_DEF));
4273 U.RemoveMIUse->getOperand(0).setIsDead(
true);
4274 for (
unsigned I = U.RemoveMIUse->getNumOperands() - 1;
I != 0; --
I)
4275 U.RemoveMIUse->removeOperand(
I);
4280 if (
MI.isBundle()) {
4284 if (MO.isReg() && MO.getReg() == DefReg) {
4285 assert(MO.getSubReg() == 0 &&
4286 "tied sub-registers in bundles currently not supported");
4287 MI.removeOperand(MO.getOperandNo());
4304 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4305 MIOp.setIsUndef(
true);
4306 MIOp.setReg(DummyReg);
4310 if (
MI.isBundle()) {
4314 if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4315 MIOp.setIsUndef(
true);
4316 MIOp.setReg(DummyReg);
4329 return MI.isBundle() ? &
MI : NewMI;
4334 ThreeAddressUpdates &U)
const {
4336 unsigned Opc =
MI.getOpcode();
4340 if (NewMFMAOpc != -1) {
4343 for (
unsigned I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I)
4344 MIB.
add(
MI.getOperand(
I));
4352 for (
unsigned I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I)
4357 assert(
Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
4358 Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
4359 "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be "
4363 bool IsF64 =
Opc == AMDGPU::V_FMAC_F64_e32 ||
Opc == AMDGPU::V_FMAC_F64_e64;
4364 bool IsLegacy =
Opc == AMDGPU::V_MAC_LEGACY_F32_e32 ||
4365 Opc == AMDGPU::V_MAC_LEGACY_F32_e64 ||
4366 Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
4367 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64;
4368 bool Src0Literal =
false;
4373 case AMDGPU::V_MAC_F16_e64:
4374 case AMDGPU::V_FMAC_F16_e64:
4375 case AMDGPU::V_FMAC_F16_t16_e64:
4376 case AMDGPU::V_FMAC_F16_fake16_e64:
4377 case AMDGPU::V_MAC_F32_e64:
4378 case AMDGPU::V_MAC_LEGACY_F32_e64:
4379 case AMDGPU::V_FMAC_F32_e64:
4380 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4381 case AMDGPU::V_FMAC_F64_e64:
4383 case AMDGPU::V_MAC_F16_e32:
4384 case AMDGPU::V_FMAC_F16_e32:
4385 case AMDGPU::V_MAC_F32_e32:
4386 case AMDGPU::V_MAC_LEGACY_F32_e32:
4387 case AMDGPU::V_FMAC_F32_e32:
4388 case AMDGPU::V_FMAC_LEGACY_F32_e32:
4389 case AMDGPU::V_FMAC_F64_e32: {
4390 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
4391 AMDGPU::OpName::src0);
4392 const MachineOperand *Src0 = &
MI.getOperand(Src0Idx);
4403 MachineInstrBuilder MIB;
4406 const MachineOperand *Src0Mods =
4409 const MachineOperand *Src1Mods =
4412 const MachineOperand *Src2Mods =
4418 if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsLegacy &&
4419 (!IsF64 || ST.hasFmaakFmamkF64Insts()) &&
4421 (ST.getConstantBusLimit(
Opc) > 1 || !Src0->
isReg() ||
4423 MachineInstr *
DefMI;
4459 MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
4475 if (Src0Literal && !ST.hasVOP3Literal())
4503 switch (
MI.getOpcode()) {
4504 case AMDGPU::S_SET_GPR_IDX_ON:
4505 case AMDGPU::S_SET_GPR_IDX_MODE:
4506 case AMDGPU::S_SET_GPR_IDX_OFF:
4524 if (
MI.isTerminator() ||
MI.isPosition())
4528 if (
MI.getOpcode() == TargetOpcode::INLINEASM_BR)
4531 if (
MI.getOpcode() == AMDGPU::SCHED_BARRIER &&
MI.getOperand(0).getImm() == 0)
4537 return MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
4538 MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
4539 MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
4540 MI.getOpcode() == AMDGPU::S_SETPRIO ||
4541 MI.getOpcode() == AMDGPU::S_SETPRIO_INC_WG ||
4546 return Opcode == AMDGPU::DS_ORDERED_COUNT ||
4547 Opcode == AMDGPU::DS_ADD_GS_REG_RTN ||
4548 Opcode == AMDGPU::DS_SUB_GS_REG_RTN ||
isGWS(Opcode);
4562 if (
MI.getMF()->getFunction().hasFnAttribute(
"amdgpu-no-flat-scratch-init"))
4567 if (
MI.memoperands_empty())
4572 unsigned AS = Memop->getAddrSpace();
4573 if (AS == AMDGPUAS::FLAT_ADDRESS) {
4574 const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4575 return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4576 *MD, AMDGPUAS::PRIVATE_ADDRESS);
4591 if (
MI.memoperands_empty())
4600 unsigned AS = Memop->getAddrSpace();
4617 if (ST.isTgSplitEnabled())
4622 if (
MI.memoperands_empty())
4627 unsigned AS = Memop->getAddrSpace();
4643 unsigned Opcode =
MI.getOpcode();
4658 if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
4659 isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
4660 Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT ||
4661 Opcode == AMDGPU::S_SETHALT)
4664 if (
MI.isCall() ||
MI.isInlineAsm())
4680 if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
4681 Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
4682 Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
4683 Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
4691 if (
MI.isMetaInstruction())
4695 if (
MI.isCopyLike()) {
4696 if (!RI.isSGPRReg(MRI,
MI.getOperand(0).getReg()))
4700 return MI.readsRegister(AMDGPU::EXEC, &RI);
4711 return !
isSALU(
MI) ||
MI.readsRegister(AMDGPU::EXEC, &RI);
4715 switch (Imm.getBitWidth()) {
4721 ST.hasInv2PiInlineImm());
4724 ST.hasInv2PiInlineImm());
4726 return ST.has16BitInsts() &&
4728 ST.hasInv2PiInlineImm());
4735 APInt IntImm = Imm.bitcastToAPInt();
4737 bool HasInv2Pi = ST.hasInv2PiInlineImm();
4745 return ST.has16BitInsts() &&
4748 return ST.has16BitInsts() &&
4758 switch (OperandType) {
4768 int32_t Trunc =
static_cast<int32_t
>(Imm);
4810 int16_t Trunc =
static_cast<int16_t
>(Imm);
4811 return ST.has16BitInsts() &&
4820 int16_t Trunc =
static_cast<int16_t
>(Imm);
4821 return ST.has16BitInsts() &&
4872 if (!RI.opCanUseLiteralConstant(OpInfo.OperandType))
4878 return ST.hasVOP3Literal();
4882 int64_t ImmVal)
const {
4885 if (
isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() &&
4886 OpNo == (
unsigned)AMDGPU::getNamedOperandIdx(InstDesc.
getOpcode(),
4887 AMDGPU::OpName::src2))
4889 return RI.opCanUseInlineConstant(OpInfo.OperandType);
4901 "unexpected imm-like operand kind");
4914 if (Opcode == AMDGPU::V_MUL_LEGACY_F32_e64 && ST.hasGFX90AInsts())
4932 AMDGPU::OpName
OpName)
const {
4934 return Mods && Mods->
getImm();
4947 switch (
MI.getOpcode()) {
4948 default:
return false;
4950 case AMDGPU::V_ADDC_U32_e64:
4951 case AMDGPU::V_SUBB_U32_e64:
4952 case AMDGPU::V_SUBBREV_U32_e64: {
4955 if (!Src1->
isReg() || !RI.isVGPR(MRI, Src1->
getReg()))
4960 case AMDGPU::V_MAC_F16_e64:
4961 case AMDGPU::V_MAC_F32_e64:
4962 case AMDGPU::V_MAC_LEGACY_F32_e64:
4963 case AMDGPU::V_FMAC_F16_e64:
4964 case AMDGPU::V_FMAC_F16_t16_e64:
4965 case AMDGPU::V_FMAC_F16_fake16_e64:
4966 case AMDGPU::V_FMAC_F32_e64:
4967 case AMDGPU::V_FMAC_F64_e64:
4968 case AMDGPU::V_FMAC_LEGACY_F32_e64:
4969 if (!Src2->
isReg() || !RI.isVGPR(MRI, Src2->
getReg()) ||
4974 case AMDGPU::V_CNDMASK_B32_e64:
4980 if (Src1 && (!Src1->
isReg() || !RI.isVGPR(MRI, Src1->
getReg()) ||
5010 (
Use.getReg() == AMDGPU::VCC ||
Use.getReg() == AMDGPU::VCC_LO)) {
5019 unsigned Op32)
const {
5033 Inst32.
add(
MI.getOperand(
I));
5037 int Idx =
MI.getNumExplicitDefs();
5039 int OpTy =
MI.getDesc().operands()[Idx++].OperandType;
5044 if (AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2) == -1) {
5066 if (Reg == AMDGPU::SGPR_NULL || Reg == AMDGPU::SGPR_NULL64)
5074 return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::M0;
5077 return AMDGPU::SReg_32RegClass.contains(Reg) ||
5078 AMDGPU::SReg_64RegClass.contains(Reg);
5106 switch (MO.getReg()) {
5108 case AMDGPU::VCC_LO:
5109 case AMDGPU::VCC_HI:
5111 case AMDGPU::FLAT_SCR:
5124 switch (
MI.getOpcode()) {
5125 case AMDGPU::V_READLANE_B32:
5126 case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
5127 case AMDGPU::V_WRITELANE_B32:
5128 case AMDGPU::SI_SPILL_S32_TO_VGPR:
5135 if (
MI.isPreISelOpcode() ||
5136 SIInstrInfo::isGenericOpcode(
MI.getOpcode()) ||
5154 return SubReg.
getSubReg() != AMDGPU::NoSubRegister &&
5165 if (RI.isVectorRegister(MRI, SrcReg) && RI.isSGPRReg(MRI, DstReg)) {
5166 ErrInfo =
"illegal copy from vector register to SGPR";
5184 if (!MRI.
isSSA() &&
MI.isCopy())
5185 return verifyCopy(
MI, MRI, ErrInfo);
5187 if (SIInstrInfo::isGenericOpcode(Opcode))
5190 int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
5191 int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
5192 int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
5194 if (Src0Idx == -1) {
5196 Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0X);
5197 Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1X);
5198 Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0Y);
5199 Src3Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vsrc1Y);
5204 if (!
Desc.isVariadic() &&
5205 Desc.getNumOperands() !=
MI.getNumExplicitOperands()) {
5206 ErrInfo =
"Instruction has wrong number of operands.";
5210 if (
MI.isInlineAsm()) {
5223 if (!Reg.isVirtual() && !RC->
contains(Reg)) {
5224 ErrInfo =
"inlineasm operand has incorrect register class.";
5232 if (
isImage(
MI) &&
MI.memoperands_empty() &&
MI.mayLoadOrStore()) {
5233 ErrInfo =
"missing memory operand from image instruction.";
5238 for (
int i = 0, e =
Desc.getNumOperands(); i != e; ++i) {
5241 ErrInfo =
"FPImm Machine Operands are not supported. ISel should bitcast "
5242 "all fp values to integers.";
5247 int16_t RegClass = getOpRegClassID(OpInfo);
5249 switch (OpInfo.OperandType) {
5251 if (
MI.getOperand(i).isImm() ||
MI.getOperand(i).isGlobal()) {
5252 ErrInfo =
"Illegal immediate value for operand.";
5287 ErrInfo =
"Illegal immediate value for operand.";
5295 ErrInfo =
"Expected inline constant for operand.";
5309 if (!
MI.getOperand(i).isImm() && !
MI.getOperand(i).isFI()) {
5310 ErrInfo =
"Expected immediate, but got non-immediate";
5319 if (OpInfo.isGenericType())
5334 if (ST.needsAlignedVGPRs() && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO &&
5335 Opcode != AMDGPU::V_MOV_B64_PSEUDO && !
isSpill(
MI)) {
5337 if (RI.hasVectorRegisters(RC) && MO.
getSubReg()) {
5339 RI.getSubRegisterClass(RC, MO.
getSubReg())) {
5340 RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.
getSubReg());
5347 if (!RC || !RI.isProperlyAlignedRC(*RC)) {
5348 ErrInfo =
"Subtarget requires even aligned vector registers";
5353 if (RegClass != -1) {
5354 if (Reg.isVirtual())
5359 ErrInfo =
"Operand has incorrect register class.";
5367 if (!ST.hasSDWA()) {
5368 ErrInfo =
"SDWA is not supported on this target";
5372 for (
auto Op : {AMDGPU::OpName::src0_sel, AMDGPU::OpName::src1_sel,
5373 AMDGPU::OpName::dst_sel}) {
5377 int64_t Imm = MO->
getImm();
5379 ErrInfo =
"Invalid SDWA selection";
5384 int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
5386 for (
int OpIdx : {DstIdx, Src0Idx, Src1Idx, Src2Idx}) {
5391 if (!ST.hasSDWAScalar()) {
5393 if (!MO.
isReg() || !RI.hasVGPRs(RI.getRegClassForReg(MRI, MO.
getReg()))) {
5394 ErrInfo =
"Only VGPRs allowed as operands in SDWA instructions on VI";
5401 "Only reg allowed as operands in SDWA instructions on GFX9+";
5407 if (!ST.hasSDWAOmod()) {
5410 if (OMod !=
nullptr &&
5412 ErrInfo =
"OMod not allowed in SDWA instructions on VI";
5417 if (Opcode == AMDGPU::V_CVT_F32_FP8_sdwa ||
5418 Opcode == AMDGPU::V_CVT_F32_BF8_sdwa ||
5419 Opcode == AMDGPU::V_CVT_PK_F32_FP8_sdwa ||
5420 Opcode == AMDGPU::V_CVT_PK_F32_BF8_sdwa) {
5423 unsigned Mods = Src0ModsMO->
getImm();
5426 ErrInfo =
"sext, abs and neg are not allowed on this instruction";
5432 if (
isVOPC(BasicOpcode)) {
5433 if (!ST.hasSDWASdst() && DstIdx != -1) {
5436 if (!Dst.isReg() || Dst.getReg() != AMDGPU::VCC) {
5437 ErrInfo =
"Only VCC allowed as dst in SDWA instructions on VI";
5440 }
else if (!ST.hasSDWAOutModsVOPC()) {
5443 if (Clamp && (!Clamp->
isImm() || Clamp->
getImm() != 0)) {
5444 ErrInfo =
"Clamp not allowed in VOPC SDWA instructions on VI";
5450 if (OMod && (!OMod->
isImm() || OMod->
getImm() != 0)) {
5451 ErrInfo =
"OMod not allowed in VOPC SDWA instructions on VI";
5458 if (DstUnused && DstUnused->isImm() &&
5461 if (!Dst.isReg() || !Dst.isTied()) {
5462 ErrInfo =
"Dst register should have tied register";
5467 MI.getOperand(
MI.findTiedOperandIdx(DstIdx));
5470 "Dst register should be tied to implicit use of preserved register";
5474 ErrInfo =
"Dst register should use same physical register as preserved";
5481 if (
isImage(Opcode) && !
MI.mayStore()) {
5493 if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
5501 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
5505 uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
5506 if (RegCount > DstSize) {
5507 ErrInfo =
"Image instruction returns too many registers for dst "
5516 if (
isVALU(
MI) &&
Desc.getOpcode() != AMDGPU::V_WRITELANE_B32) {
5517 unsigned ConstantBusCount = 0;
5518 bool UsesLiteral =
false;
5521 int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
5525 LiteralVal = &
MI.getOperand(ImmIdx);
5534 for (
int OpIdx : {Src0Idx, Src1Idx, Src2Idx, Src3Idx}) {
5545 }
else if (!MO.
isFI()) {
5552 ErrInfo =
"VOP2/VOP3 instruction uses more than one literal";
5562 if (
llvm::all_of(SGPRsUsed, [
this, SGPRUsed](
unsigned SGPR) {
5563 return !RI.regsOverlap(SGPRUsed, SGPR);
5572 if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
5573 Opcode != AMDGPU::V_WRITELANE_B32) {
5574 ErrInfo =
"VOP* instruction violates constant bus restriction";
5578 if (
isVOP3(
MI) && UsesLiteral && !ST.hasVOP3Literal()) {
5579 ErrInfo =
"VOP3 instruction uses literal";
5586 if (
Desc.getOpcode() == AMDGPU::V_WRITELANE_B32) {
5587 unsigned SGPRCount = 0;
5590 for (
int OpIdx : {Src0Idx, Src1Idx}) {
5598 if (MO.
getReg() != SGPRUsed)
5603 if (SGPRCount > ST.getConstantBusLimit(Opcode)) {
5604 ErrInfo =
"WRITELANE instruction violates constant bus restriction";
5611 if (
Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32_e64 ||
5612 Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64_e64) {
5619 ErrInfo =
"v_div_scale_{f32|f64} require src0 = src1 or src2";
5629 ErrInfo =
"ABS not allowed in VOP3B instructions";
5642 ErrInfo =
"SOP2/SOPC instruction requires too many immediate constants";
5649 if (
Desc.isBranch()) {
5651 ErrInfo =
"invalid branch target for SOPK instruction";
5658 ErrInfo =
"invalid immediate for SOPK instruction";
5663 ErrInfo =
"invalid immediate for SOPK instruction";
5670 if (
Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
5671 Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
5672 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5673 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
5674 const bool IsDst =
Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
5675 Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
5677 const unsigned StaticNumOps =
5678 Desc.getNumOperands() +
Desc.implicit_uses().size();
5679 const unsigned NumImplicitOps = IsDst ? 2 : 1;
5685 if (
MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
5686 ErrInfo =
"missing implicit register operands";
5692 if (!Dst->isUse()) {
5693 ErrInfo =
"v_movreld_b32 vdst should be a use operand";
5698 if (!
MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
5699 UseOpIdx != StaticNumOps + 1) {
5700 ErrInfo =
"movrel implicit operands should be tied";
5707 =
MI.getOperand(StaticNumOps + NumImplicitOps - 1);
5709 !
isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
5710 ErrInfo =
"src0 should be subreg of implicit vector use";
5718 if (!
MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
5719 ErrInfo =
"VALU instruction does not implicitly read exec mask";
5725 if (
MI.mayStore() &&
5730 if (Soff && Soff->
getReg() != AMDGPU::M0) {
5731 ErrInfo =
"scalar stores must use m0 as offset register";
5737 if (
isFLAT(
MI) && !ST.hasFlatInstOffsets()) {
5739 if (
Offset->getImm() != 0) {
5740 ErrInfo =
"subtarget does not support offsets in flat instructions";
5745 if (
isDS(
MI) && !ST.hasGDS()) {
5747 if (GDSOp && GDSOp->
getImm() != 0) {
5748 ErrInfo =
"GDS is not supported on this subtarget";
5756 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opcode,
5757 AMDGPU::OpName::vaddr0);
5758 AMDGPU::OpName RSrcOpName =
5759 isMIMG(
MI) ? AMDGPU::OpName::srsrc : AMDGPU::OpName::rsrc;
5760 int RsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, RSrcOpName);
5768 ErrInfo =
"dim is out of range";
5773 if (ST.hasR128A16()) {
5775 IsA16 = R128A16->
getImm() != 0;
5776 }
else if (ST.hasA16()) {
5778 IsA16 = A16->
getImm() != 0;
5781 bool IsNSA = RsrcIdx - VAddr0Idx > 1;
5783 unsigned AddrWords =
5786 unsigned VAddrWords;
5788 VAddrWords = RsrcIdx - VAddr0Idx;
5789 if (ST.hasPartialNSAEncoding() &&
5791 unsigned LastVAddrIdx = RsrcIdx - 1;
5792 VAddrWords +=
getOpSize(
MI, LastVAddrIdx) / 4 - 1;
5800 if (VAddrWords != AddrWords) {
5802 <<
" but got " << VAddrWords <<
"\n");
5803 ErrInfo =
"bad vaddr size";
5813 unsigned DC = DppCt->
getImm();
5814 if (DC == DppCtrl::DPP_UNUSED1 || DC == DppCtrl::DPP_UNUSED2 ||
5815 DC == DppCtrl::DPP_UNUSED3 || DC > DppCtrl::DPP_LAST ||
5816 (DC >= DppCtrl::DPP_UNUSED4_FIRST && DC <= DppCtrl::DPP_UNUSED4_LAST) ||
5817 (DC >= DppCtrl::DPP_UNUSED5_FIRST && DC <= DppCtrl::DPP_UNUSED5_LAST) ||
5818 (DC >= DppCtrl::DPP_UNUSED6_FIRST && DC <= DppCtrl::DPP_UNUSED6_LAST) ||
5819 (DC >= DppCtrl::DPP_UNUSED7_FIRST && DC <= DppCtrl::DPP_UNUSED7_LAST) ||
5820 (DC >= DppCtrl::DPP_UNUSED8_FIRST && DC <= DppCtrl::DPP_UNUSED8_LAST)) {
5821 ErrInfo =
"Invalid dpp_ctrl value";
5824 if (DC >= DppCtrl::WAVE_SHL1 && DC <= DppCtrl::WAVE_ROR1 &&
5825 !ST.hasDPPWavefrontShifts()) {
5826 ErrInfo =
"Invalid dpp_ctrl value: "
5827 "wavefront shifts are not supported on GFX10+";
5830 if (DC >= DppCtrl::BCAST15 && DC <= DppCtrl::BCAST31 &&
5831 !ST.hasDPPBroadcasts()) {
5832 ErrInfo =
"Invalid dpp_ctrl value: "
5833 "broadcasts are not supported on GFX10+";
5836 if (DC >= DppCtrl::ROW_SHARE_FIRST && DC <= DppCtrl::ROW_XMASK_LAST &&
5838 if (DC >= DppCtrl::ROW_NEWBCAST_FIRST &&
5839 DC <= DppCtrl::ROW_NEWBCAST_LAST &&
5840 !ST.hasGFX90AInsts()) {
5841 ErrInfo =
"Invalid dpp_ctrl value: "
5842 "row_newbroadcast/row_share is not supported before "
5846 if (DC > DppCtrl::ROW_NEWBCAST_LAST || !ST.hasGFX90AInsts()) {
5847 ErrInfo =
"Invalid dpp_ctrl value: "
5848 "row_share and row_xmask are not supported before GFX10";
5853 if (Opcode != AMDGPU::V_MOV_B64_DPP_PSEUDO &&
5856 ErrInfo =
"Invalid dpp_ctrl value: "
5857 "DP ALU dpp only support row_newbcast";
5864 AMDGPU::OpName DataName =
5865 isDS(Opcode) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata;
5871 if (ST.hasGFX90AInsts()) {
5872 if (Dst &&
Data && !Dst->isTied() && !
Data->isTied() &&
5873 (RI.isAGPR(MRI, Dst->getReg()) != RI.isAGPR(MRI,
Data->getReg()))) {
5874 ErrInfo =
"Invalid register class: "
5875 "vdata and vdst should be both VGPR or AGPR";
5878 if (
Data && Data2 &&
5879 (RI.isAGPR(MRI,
Data->getReg()) != RI.isAGPR(MRI, Data2->
getReg()))) {
5880 ErrInfo =
"Invalid register class: "
5881 "both data operands should be VGPR or AGPR";
5885 if ((Dst && RI.isAGPR(MRI, Dst->getReg())) ||
5886 (
Data && RI.isAGPR(MRI,
Data->getReg())) ||
5887 (Data2 && RI.isAGPR(MRI, Data2->
getReg()))) {
5888 ErrInfo =
"Invalid register class: "
5889 "agpr loads and stores not supported on this GPU";
5895 if (ST.needsAlignedVGPRs()) {
5896 const auto isAlignedReg = [&
MI, &MRI,
this](AMDGPU::OpName
OpName) ->
bool {
5901 if (Reg.isPhysical())
5902 return !(RI.getHWRegIndex(Reg) & 1);
5904 return RI.getRegSizeInBits(RC) > 32 && RI.isProperlyAlignedRC(RC) &&
5905 !(RI.getChannelFromSubReg(
Op->getSubReg()) & 1);
5908 if (Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
5909 Opcode == AMDGPU::DS_GWS_BARRIER) {
5911 if (!isAlignedReg(AMDGPU::OpName::data0)) {
5912 ErrInfo =
"Subtarget requires even aligned vector registers "
5913 "for DS_GWS instructions";
5919 if (!isAlignedReg(AMDGPU::OpName::vaddr)) {
5920 ErrInfo =
"Subtarget requires even aligned vector registers "
5921 "for vaddr operand of image instructions";
5927 if (Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts()) {
5929 if (Src->isReg() && RI.isSGPRReg(MRI, Src->getReg())) {
5930 ErrInfo =
"Invalid register class: "
5931 "v_accvgpr_write with an SGPR is not supported on this GPU";
5936 if (
Desc.getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS) {
5939 ErrInfo =
"pseudo expects only physical SGPRs";
5946 if (!ST.hasScaleOffset()) {
5947 ErrInfo =
"Subtarget does not support offset scaling";
5951 ErrInfo =
"Instruction does not support offset scaling";
5960 for (
unsigned I = 0;
I < 3; ++
I) {
5966 if (ST.hasFlatScratchHiInB64InstHazard() &&
isSALU(
MI) &&
5967 MI.readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI,
nullptr)) {
5969 if ((Dst && RI.getRegClassForReg(MRI, Dst->getReg()) ==
5970 &AMDGPU::SReg_64RegClass) ||
5971 Opcode == AMDGPU::S_BITCMP0_B64 || Opcode == AMDGPU::S_BITCMP1_B64) {
5972 ErrInfo =
"Instruction cannot read flat_scratch_base_hi";
5981 if (
MI.getOpcode() == AMDGPU::S_MOV_B32) {
5983 return MI.getOperand(1).isReg() || RI.isAGPR(MRI,
MI.getOperand(0).getReg())
5985 : AMDGPU::V_MOV_B32_e32;
5995 default:
return AMDGPU::INSTRUCTION_LIST_END;
5996 case AMDGPU::REG_SEQUENCE:
return AMDGPU::REG_SEQUENCE;
5997 case AMDGPU::COPY:
return AMDGPU::COPY;
5998 case AMDGPU::PHI:
return AMDGPU::PHI;
5999 case AMDGPU::INSERT_SUBREG:
return AMDGPU::INSERT_SUBREG;
6000 case AMDGPU::WQM:
return AMDGPU::WQM;
6001 case AMDGPU::SOFT_WQM:
return AMDGPU::SOFT_WQM;
6002 case AMDGPU::STRICT_WWM:
return AMDGPU::STRICT_WWM;
6003 case AMDGPU::STRICT_WQM:
return AMDGPU::STRICT_WQM;
6004 case AMDGPU::S_ADD_I32:
6005 return ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
6006 case AMDGPU::S_ADDC_U32:
6007 return AMDGPU::V_ADDC_U32_e32;
6008 case AMDGPU::S_SUB_I32:
6009 return ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
6012 case AMDGPU::S_ADD_U32:
6013 return AMDGPU::V_ADD_CO_U32_e32;
6014 case AMDGPU::S_SUB_U32:
6015 return AMDGPU::V_SUB_CO_U32_e32;
6016 case AMDGPU::S_ADD_U64_PSEUDO:
6017 return AMDGPU::V_ADD_U64_PSEUDO;
6018 case AMDGPU::S_SUB_U64_PSEUDO:
6019 return AMDGPU::V_SUB_U64_PSEUDO;
6020 case AMDGPU::S_SUBB_U32:
return AMDGPU::V_SUBB_U32_e32;
6021 case AMDGPU::S_MUL_I32:
return AMDGPU::V_MUL_LO_U32_e64;
6022 case AMDGPU::S_MUL_HI_U32:
return AMDGPU::V_MUL_HI_U32_e64;
6023 case AMDGPU::S_MUL_HI_I32:
return AMDGPU::V_MUL_HI_I32_e64;
6024 case AMDGPU::S_AND_B32:
return AMDGPU::V_AND_B32_e64;
6025 case AMDGPU::S_OR_B32:
return AMDGPU::V_OR_B32_e64;
6026 case AMDGPU::S_XOR_B32:
return AMDGPU::V_XOR_B32_e64;
6027 case AMDGPU::S_XNOR_B32:
6028 return ST.hasDLInsts() ? AMDGPU::V_XNOR_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
6029 case AMDGPU::S_MIN_I32:
return AMDGPU::V_MIN_I32_e64;
6030 case AMDGPU::S_MIN_U32:
return AMDGPU::V_MIN_U32_e64;
6031 case AMDGPU::S_MAX_I32:
return AMDGPU::V_MAX_I32_e64;
6032 case AMDGPU::S_MAX_U32:
return AMDGPU::V_MAX_U32_e64;
6033 case AMDGPU::S_ASHR_I32:
return AMDGPU::V_ASHR_I32_e32;
6034 case AMDGPU::S_ASHR_I64:
return AMDGPU::V_ASHR_I64_e64;
6035 case AMDGPU::S_LSHL_B32:
return AMDGPU::V_LSHL_B32_e32;
6036 case AMDGPU::S_LSHL_B64:
return AMDGPU::V_LSHL_B64_e64;
6037 case AMDGPU::S_LSHR_B32:
return AMDGPU::V_LSHR_B32_e32;
6038 case AMDGPU::S_LSHR_B64:
return AMDGPU::V_LSHR_B64_e64;
6039 case AMDGPU::S_SEXT_I32_I8:
return AMDGPU::V_BFE_I32_e64;
6040 case AMDGPU::S_SEXT_I32_I16:
return AMDGPU::V_BFE_I32_e64;
6041 case AMDGPU::S_BFE_U32:
return AMDGPU::V_BFE_U32_e64;
6042 case AMDGPU::S_BFE_I32:
return AMDGPU::V_BFE_I32_e64;
6043 case AMDGPU::S_BFM_B32:
return AMDGPU::V_BFM_B32_e64;
6044 case AMDGPU::S_BREV_B32:
return AMDGPU::V_BFREV_B32_e32;
6045 case AMDGPU::S_NOT_B32:
return AMDGPU::V_NOT_B32_e32;
6046 case AMDGPU::S_NOT_B64:
return AMDGPU::V_NOT_B32_e32;
6047 case AMDGPU::S_CMP_EQ_I32:
return AMDGPU::V_CMP_EQ_I32_e64;
6048 case AMDGPU::S_CMP_LG_I32:
return AMDGPU::V_CMP_NE_I32_e64;
6049 case AMDGPU::S_CMP_GT_I32:
return AMDGPU::V_CMP_GT_I32_e64;
6050 case AMDGPU::S_CMP_GE_I32:
return AMDGPU::V_CMP_GE_I32_e64;
6051 case AMDGPU::S_CMP_LT_I32:
return AMDGPU::V_CMP_LT_I32_e64;
6052 case AMDGPU::S_CMP_LE_I32:
return AMDGPU::V_CMP_LE_I32_e64;
6053 case AMDGPU::S_CMP_EQ_U32:
return AMDGPU::V_CMP_EQ_U32_e64;
6054 case AMDGPU::S_CMP_LG_U32:
return AMDGPU::V_CMP_NE_U32_e64;
6055 case AMDGPU::S_CMP_GT_U32:
return AMDGPU::V_CMP_GT_U32_e64;
6056 case AMDGPU::S_CMP_GE_U32:
return AMDGPU::V_CMP_GE_U32_e64;
6057 case AMDGPU::S_CMP_LT_U32:
return AMDGPU::V_CMP_LT_U32_e64;
6058 case AMDGPU::S_CMP_LE_U32:
return AMDGPU::V_CMP_LE_U32_e64;
6059 case AMDGPU::S_CMP_EQ_U64:
return AMDGPU::V_CMP_EQ_U64_e64;
6060 case AMDGPU::S_CMP_LG_U64:
return AMDGPU::V_CMP_NE_U64_e64;
6061 case AMDGPU::S_BCNT1_I32_B32:
return AMDGPU::V_BCNT_U32_B32_e64;
6062 case AMDGPU::S_FF1_I32_B32:
return AMDGPU::V_FFBL_B32_e32;
6063 case AMDGPU::S_FLBIT_I32_B32:
return AMDGPU::V_FFBH_U32_e32;
6064 case AMDGPU::S_FLBIT_I32:
return AMDGPU::V_FFBH_I32_e64;
6065 case AMDGPU::S_CBRANCH_SCC0:
return AMDGPU::S_CBRANCH_VCCZ;
6066 case AMDGPU::S_CBRANCH_SCC1:
return AMDGPU::S_CBRANCH_VCCNZ;
6067 case AMDGPU::S_CVT_F32_I32:
return AMDGPU::V_CVT_F32_I32_e64;
6068 case AMDGPU::S_CVT_F32_U32:
return AMDGPU::V_CVT_F32_U32_e64;
6069 case AMDGPU::S_CVT_I32_F32:
return AMDGPU::V_CVT_I32_F32_e64;
6070 case AMDGPU::S_CVT_U32_F32:
return AMDGPU::V_CVT_U32_F32_e64;
6071 case AMDGPU::S_CVT_F32_F16:
6072 case AMDGPU::S_CVT_HI_F32_F16:
6073 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F32_F16_t16_e64
6074 : AMDGPU::V_CVT_F32_F16_fake16_e64;
6075 case AMDGPU::S_CVT_F16_F32:
6076 return ST.useRealTrue16Insts() ? AMDGPU::V_CVT_F16_F32_t16_e64
6077 : AMDGPU::V_CVT_F16_F32_fake16_e64;
6078 case AMDGPU::S_CEIL_F32:
return AMDGPU::V_CEIL_F32_e64;
6079 case AMDGPU::S_FLOOR_F32:
return AMDGPU::V_FLOOR_F32_e64;
6080 case AMDGPU::S_TRUNC_F32:
return AMDGPU::V_TRUNC_F32_e64;
6081 case AMDGPU::S_RNDNE_F32:
return AMDGPU::V_RNDNE_F32_e64;
6082 case AMDGPU::S_CEIL_F16:
6083 return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
6084 : AMDGPU::V_CEIL_F16_fake16_e64;
6085 case AMDGPU::S_FLOOR_F16:
6086 return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
6087 : AMDGPU::V_FLOOR_F16_fake16_e64;
6088 case AMDGPU::S_TRUNC_F16:
6089 return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
6090 : AMDGPU::V_TRUNC_F16_fake16_e64;
6091 case AMDGPU::S_RNDNE_F16:
6092 return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
6093 : AMDGPU::V_RNDNE_F16_fake16_e64;
6094 case AMDGPU::S_ADD_F32:
return AMDGPU::V_ADD_F32_e64;
6095 case AMDGPU::S_SUB_F32:
return AMDGPU::V_SUB_F32_e64;
6096 case AMDGPU::S_MIN_F32:
return AMDGPU::V_MIN_F32_e64;
6097 case AMDGPU::S_MAX_F32:
return AMDGPU::V_MAX_F32_e64;
6098 case AMDGPU::S_MINIMUM_F32:
return AMDGPU::V_MINIMUM_F32_e64;
6099 case AMDGPU::S_MAXIMUM_F32:
return AMDGPU::V_MAXIMUM_F32_e64;
6100 case AMDGPU::S_MUL_F32:
return AMDGPU::V_MUL_F32_e64;
6101 case AMDGPU::S_ADD_F16:
6102 return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
6103 : AMDGPU::V_ADD_F16_fake16_e64;
6104 case AMDGPU::S_SUB_F16:
6105 return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
6106 : AMDGPU::V_SUB_F16_fake16_e64;
6107 case AMDGPU::S_MIN_F16:
6108 return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
6109 : AMDGPU::V_MIN_F16_fake16_e64;
6110 case AMDGPU::S_MAX_F16:
6111 return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
6112 : AMDGPU::V_MAX_F16_fake16_e64;
6113 case AMDGPU::S_MINIMUM_F16:
6114 return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
6115 : AMDGPU::V_MINIMUM_F16_fake16_e64;
6116 case AMDGPU::S_MAXIMUM_F16:
6117 return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
6118 : AMDGPU::V_MAXIMUM_F16_fake16_e64;
6119 case AMDGPU::S_MUL_F16:
6120 return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
6121 : AMDGPU::V_MUL_F16_fake16_e64;
6122 case AMDGPU::S_CVT_PK_RTZ_F16_F32:
return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
6123 case AMDGPU::S_FMAC_F32:
return AMDGPU::V_FMAC_F32_e64;
6124 case AMDGPU::S_FMAC_F16:
6125 return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
6126 : AMDGPU::V_FMAC_F16_fake16_e64;
6127 case AMDGPU::S_FMAMK_F32:
return AMDGPU::V_FMAMK_F32;
6128 case AMDGPU::S_FMAAK_F32:
return AMDGPU::V_FMAAK_F32;
6129 case AMDGPU::S_CMP_LT_F32:
return AMDGPU::V_CMP_LT_F32_e64;
6130 case AMDGPU::S_CMP_EQ_F32:
return AMDGPU::V_CMP_EQ_F32_e64;
6131 case AMDGPU::S_CMP_LE_F32:
return AMDGPU::V_CMP_LE_F32_e64;
6132 case AMDGPU::S_CMP_GT_F32:
return AMDGPU::V_CMP_GT_F32_e64;
6133 case AMDGPU::S_CMP_LG_F32:
return AMDGPU::V_CMP_LG_F32_e64;
6134 case AMDGPU::S_CMP_GE_F32:
return AMDGPU::V_CMP_GE_F32_e64;
6135 case AMDGPU::S_CMP_O_F32:
return AMDGPU::V_CMP_O_F32_e64;
6136 case AMDGPU::S_CMP_U_F32:
return AMDGPU::V_CMP_U_F32_e64;
6137 case AMDGPU::S_CMP_NGE_F32:
return AMDGPU::V_CMP_NGE_F32_e64;
6138 case AMDGPU::S_CMP_NLG_F32:
return AMDGPU::V_CMP_NLG_F32_e64;
6139 case AMDGPU::S_CMP_NGT_F32:
return AMDGPU::V_CMP_NGT_F32_e64;
6140 case AMDGPU::S_CMP_NLE_F32:
return AMDGPU::V_CMP_NLE_F32_e64;
6141 case AMDGPU::S_CMP_NEQ_F32:
return AMDGPU::V_CMP_NEQ_F32_e64;
6142 case AMDGPU::S_CMP_NLT_F32:
return AMDGPU::V_CMP_NLT_F32_e64;
6143 case AMDGPU::S_CMP_LT_F16:
6144 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LT_F16_t16_e64
6145 : AMDGPU::V_CMP_LT_F16_fake16_e64;
6146 case AMDGPU::S_CMP_EQ_F16:
6147 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_EQ_F16_t16_e64
6148 : AMDGPU::V_CMP_EQ_F16_fake16_e64;
6149 case AMDGPU::S_CMP_LE_F16:
6150 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LE_F16_t16_e64
6151 : AMDGPU::V_CMP_LE_F16_fake16_e64;
6152 case AMDGPU::S_CMP_GT_F16:
6153 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GT_F16_t16_e64
6154 : AMDGPU::V_CMP_GT_F16_fake16_e64;
6155 case AMDGPU::S_CMP_LG_F16:
6156 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_LG_F16_t16_e64
6157 : AMDGPU::V_CMP_LG_F16_fake16_e64;
6158 case AMDGPU::S_CMP_GE_F16:
6159 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_GE_F16_t16_e64
6160 : AMDGPU::V_CMP_GE_F16_fake16_e64;
6161 case AMDGPU::S_CMP_O_F16:
6162 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_O_F16_t16_e64
6163 : AMDGPU::V_CMP_O_F16_fake16_e64;
6164 case AMDGPU::S_CMP_U_F16:
6165 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_U_F16_t16_e64
6166 : AMDGPU::V_CMP_U_F16_fake16_e64;
6167 case AMDGPU::S_CMP_NGE_F16:
6168 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGE_F16_t16_e64
6169 : AMDGPU::V_CMP_NGE_F16_fake16_e64;
6170 case AMDGPU::S_CMP_NLG_F16:
6171 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLG_F16_t16_e64
6172 : AMDGPU::V_CMP_NLG_F16_fake16_e64;
6173 case AMDGPU::S_CMP_NGT_F16:
6174 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NGT_F16_t16_e64
6175 : AMDGPU::V_CMP_NGT_F16_fake16_e64;
6176 case AMDGPU::S_CMP_NLE_F16:
6177 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLE_F16_t16_e64
6178 : AMDGPU::V_CMP_NLE_F16_fake16_e64;
6179 case AMDGPU::S_CMP_NEQ_F16:
6180 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NEQ_F16_t16_e64
6181 : AMDGPU::V_CMP_NEQ_F16_fake16_e64;
6182 case AMDGPU::S_CMP_NLT_F16:
6183 return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
6184 : AMDGPU::V_CMP_NLT_F16_fake16_e64;
6185 case AMDGPU::V_S_EXP_F32_e64:
return AMDGPU::V_EXP_F32_e64;
6186 case AMDGPU::V_S_EXP_F16_e64:
6187 return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
6188 : AMDGPU::V_EXP_F16_fake16_e64;
6189 case AMDGPU::V_S_LOG_F32_e64:
return AMDGPU::V_LOG_F32_e64;
6190 case AMDGPU::V_S_LOG_F16_e64:
6191 return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
6192 : AMDGPU::V_LOG_F16_fake16_e64;
6193 case AMDGPU::V_S_RCP_F32_e64:
return AMDGPU::V_RCP_F32_e64;
6194 case AMDGPU::V_S_RCP_F16_e64:
6195 return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
6196 : AMDGPU::V_RCP_F16_fake16_e64;
6197 case AMDGPU::V_S_RSQ_F32_e64:
return AMDGPU::V_RSQ_F32_e64;
6198 case AMDGPU::V_S_RSQ_F16_e64:
6199 return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
6200 : AMDGPU::V_RSQ_F16_fake16_e64;
6201 case AMDGPU::V_S_SQRT_F32_e64:
return AMDGPU::V_SQRT_F32_e64;
6202 case AMDGPU::V_S_SQRT_F16_e64:
6203 return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
6204 : AMDGPU::V_SQRT_F16_fake16_e64;
6207 "Unexpected scalar opcode without corresponding vector one!");
6256 "Not a whole wave func");
6259 if (
MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_SETUP ||
6260 MI.getOpcode() == AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP)
6267 unsigned OpNo)
const {
6269 if (
MI.isVariadic() || OpNo >=
Desc.getNumOperands() ||
6270 Desc.operands()[OpNo].RegClass == -1) {
6273 if (Reg.isVirtual()) {
6277 return RI.getPhysRegBaseClass(Reg);
6280 int16_t RegClass = getOpRegClassID(
Desc.operands()[OpNo]);
6281 return RegClass < 0 ? nullptr : RI.getRegClass(RegClass);
6289 unsigned RCID = getOpRegClassID(
get(
MI.getOpcode()).operands()[
OpIdx]);
6291 unsigned Size = RI.getRegSizeInBits(*RC);
6292 unsigned Opcode = (
Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
6293 :
Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
6294 : AMDGPU::V_MOV_B32_e32;
6296 Opcode = AMDGPU::COPY;
6297 else if (RI.isSGPRClass(RC))
6298 Opcode = (
Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
6312 return RI.getSubReg(SuperReg.
getReg(), SubIdx);
6318 unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.
getSubReg(), SubIdx);
6329 if (SubIdx == AMDGPU::sub0)
6331 if (SubIdx == AMDGPU::sub1)
6343void SIInstrInfo::swapOperands(
MachineInstr &Inst)
const {
6359 if (Reg.isPhysical())
6369 return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.
getSubReg()) !=
nullptr;
6372 return RI.getCommonSubClass(DRC, RC) !=
nullptr;
6379 unsigned Opc =
MI.getOpcode();
6385 constexpr AMDGPU::OpName OpNames[] = {
6386 AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};
6389 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
I]);
6390 if (
static_cast<unsigned>(SrcIdx) ==
OpIdx &&
6400 bool IsAGPR = RI.isAGPR(MRI, MO.
getReg());
6401 if (IsAGPR && !ST.hasMAIInsts())
6407 const int VDstIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst);
6408 const int DataIdx = AMDGPU::getNamedOperandIdx(
6409 Opc,
isDS(
Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
6410 if ((
int)
OpIdx == VDstIdx && DataIdx != -1 &&
6411 MI.getOperand(DataIdx).isReg() &&
6412 RI.isAGPR(MRI,
MI.getOperand(DataIdx).getReg()) != IsAGPR)
6414 if ((
int)
OpIdx == DataIdx) {
6415 if (VDstIdx != -1 &&
6416 RI.isAGPR(MRI,
MI.getOperand(VDstIdx).getReg()) != IsAGPR)
6419 const int Data1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data1);
6420 if (Data1Idx != -1 &&
MI.getOperand(Data1Idx).isReg() &&
6421 RI.isAGPR(MRI,
MI.getOperand(Data1Idx).getReg()) != IsAGPR)
6426 if (
Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
6427 (
int)
OpIdx == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0) &&
6428 RI.isSGPRReg(MRI, MO.
getReg()))
6431 if (ST.hasFlatScratchHiInB64InstHazard() &&
6438 if (
Opc == AMDGPU::S_BITCMP0_B64 ||
Opc == AMDGPU::S_BITCMP1_B64)
6459 constexpr unsigned NumOps = 3;
6460 constexpr AMDGPU::OpName OpNames[
NumOps * 2] = {
6461 AMDGPU::OpName::src0, AMDGPU::OpName::src1,
6462 AMDGPU::OpName::src2, AMDGPU::OpName::src0_modifiers,
6463 AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers};
6468 int SrcIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[SrcN]);
6471 MO = &
MI.getOperand(SrcIdx);
6474 if (!MO->
isReg() || !RI.isSGPRReg(MRI, MO->
getReg()))
6478 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OpNames[
NumOps + SrcN]);
6482 unsigned Mods =
MI.getOperand(ModsIdx).getImm();
6486 return !OpSel && !OpSelHi;
6495 int64_t RegClass = getOpRegClassID(OpInfo);
6497 RegClass != -1 ? RI.getRegClass(RegClass) :
nullptr;
6506 int ConstantBusLimit = ST.getConstantBusLimit(
MI.getOpcode());
6507 int LiteralLimit = !
isVOP3(
MI) || ST.hasVOP3Literal() ? 1 : 0;
6511 if (!LiteralLimit--)
6521 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6529 if (--ConstantBusLimit <= 0)
6541 if (!LiteralLimit--)
6543 if (--ConstantBusLimit <= 0)
6549 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
6553 if (!
Op.isReg() && !
Op.isFI() && !
Op.isRegMask() &&
6555 !
Op.isIdenticalTo(*MO))
6565 }
else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
6579 bool Is64BitOp = Is64BitFPOp ||
6586 (!ST.has64BitLiterals() || InstDesc.
getSize() != 4))
6595 if (!Is64BitFPOp && (int32_t)Imm < 0 &&
6613 bool IsGFX950Only = ST.hasGFX950Insts();
6614 bool IsGFX940Only = ST.hasGFX940Insts();
6616 if (!IsGFX950Only && !IsGFX940Only)
6634 unsigned Opcode =
MI.getOpcode();
6636 case AMDGPU::V_CVT_PK_BF8_F32_e64:
6637 case AMDGPU::V_CVT_PK_FP8_F32_e64:
6638 case AMDGPU::V_MQSAD_PK_U16_U8_e64:
6639 case AMDGPU::V_MQSAD_U32_U8_e64:
6640 case AMDGPU::V_PK_ADD_F16:
6641 case AMDGPU::V_PK_ADD_F32:
6642 case AMDGPU::V_PK_ADD_I16:
6643 case AMDGPU::V_PK_ADD_U16:
6644 case AMDGPU::V_PK_ASHRREV_I16:
6645 case AMDGPU::V_PK_FMA_F16:
6646 case AMDGPU::V_PK_FMA_F32:
6647 case AMDGPU::V_PK_FMAC_F16_e32:
6648 case AMDGPU::V_PK_FMAC_F16_e64:
6649 case AMDGPU::V_PK_LSHLREV_B16:
6650 case AMDGPU::V_PK_LSHRREV_B16:
6651 case AMDGPU::V_PK_MAD_I16:
6652 case AMDGPU::V_PK_MAD_U16:
6653 case AMDGPU::V_PK_MAX_F16:
6654 case AMDGPU::V_PK_MAX_I16:
6655 case AMDGPU::V_PK_MAX_U16:
6656 case AMDGPU::V_PK_MIN_F16:
6657 case AMDGPU::V_PK_MIN_I16:
6658 case AMDGPU::V_PK_MIN_U16:
6659 case AMDGPU::V_PK_MOV_B32:
6660 case AMDGPU::V_PK_MUL_F16:
6661 case AMDGPU::V_PK_MUL_F32:
6662 case AMDGPU::V_PK_MUL_LO_U16:
6663 case AMDGPU::V_PK_SUB_I16:
6664 case AMDGPU::V_PK_SUB_U16:
6665 case AMDGPU::V_QSAD_PK_U16_U8_e64:
6674 unsigned Opc =
MI.getOpcode();
6677 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
6680 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
6686 if (HasImplicitSGPR && ST.getConstantBusLimit(
Opc) <= 1 && Src0.
isReg() &&
6687 RI.isSGPRReg(MRI, Src0.
getReg()))
6693 if (
Opc == AMDGPU::V_WRITELANE_B32) {
6695 if (Src0.
isReg() && RI.isVGPR(MRI, Src0.
getReg())) {
6701 if (Src1.
isReg() && RI.isVGPR(MRI, Src1.
getReg())) {
6712 if (
Opc == AMDGPU::V_FMAC_F32_e32 ||
Opc == AMDGPU::V_FMAC_F16_e32) {
6713 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
6714 if (!RI.isVGPR(MRI,
MI.getOperand(Src2Idx).getReg()))
6726 if (
Opc == AMDGPU::V_READLANE_B32 && Src1.
isReg() &&
6727 RI.isVGPR(MRI, Src1.
getReg())) {
6740 if (HasImplicitSGPR || !
MI.isCommutable()) {
6757 if (CommutedOpc == -1) {
6762 MI.setDesc(
get(CommutedOpc));
6766 bool Src0Kill = Src0.
isKill();
6770 else if (Src1.
isReg()) {
6785 unsigned Opc =
MI.getOpcode();
6788 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0),
6789 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1),
6790 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2)
6793 if (
Opc == AMDGPU::V_PERMLANE16_B32_e64 ||
6794 Opc == AMDGPU::V_PERMLANEX16_B32_e64 ||
6795 Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 ||
6796 Opc == AMDGPU::V_PERMLANE_UP_B32_e64 ||
6797 Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 ||
6798 Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 ||
6799 Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) {
6809 if (VOP3Idx[2] != -1) {
6821 int ConstantBusLimit = ST.getConstantBusLimit(
Opc);
6822 int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
6824 Register SGPRReg = findUsedSGPR(
MI, VOP3Idx);
6826 SGPRsUsed.
insert(SGPRReg);
6830 for (
int Idx : VOP3Idx) {
6839 if (LiteralLimit > 0 && ConstantBusLimit > 0) {
6851 if (!RI.isSGPRClass(RI.getRegClassForReg(MRI, MO.
getReg())))
6858 if (ConstantBusLimit > 0) {
6870 if ((
Opc == AMDGPU::V_FMAC_F32_e64 ||
Opc == AMDGPU::V_FMAC_F16_e64) &&
6871 !RI.isVGPR(MRI,
MI.getOperand(VOP3Idx[2]).getReg()))
6877 for (
unsigned I = 0;
I < 3; ++
I) {
6890 SRC = RI.getCommonSubClass(SRC, DstRC);
6893 unsigned SubRegs = RI.getRegSizeInBits(*VRC) / 32;
6895 if (RI.hasAGPRs(VRC)) {
6896 VRC = RI.getEquivalentVGPRClass(VRC);
6899 get(TargetOpcode::COPY), NewSrcReg)
6906 get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
6912 for (
unsigned i = 0; i < SubRegs; ++i) {
6915 get(AMDGPU::V_READFIRSTLANE_B32), SGPR)
6916 .
addReg(SrcReg, {}, RI.getSubRegFromChannel(i));
6922 get(AMDGPU::REG_SEQUENCE), DstReg);
6923 for (
unsigned i = 0; i < SubRegs; ++i) {
6925 MIB.
addImm(RI.getSubRegFromChannel(i));
6938 if (SBase && !RI.isSGPRClass(MRI.
getRegClass(SBase->getReg()))) {
6940 SBase->setReg(SGPR);
6943 if (SOff && !RI.isSGPRReg(MRI, SOff->
getReg())) {
6951 int OldSAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
6952 if (OldSAddrIdx < 0)
6965 if (RI.isSGPRReg(MRI, SAddr.
getReg()))
6968 int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
6969 if (NewVAddrIdx < 0)
6972 int OldVAddrIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
6976 if (OldVAddrIdx >= 0) {
6990 if (OldVAddrIdx == NewVAddrIdx) {
7001 assert(OldSAddrIdx == NewVAddrIdx);
7003 if (OldVAddrIdx >= 0) {
7004 int NewVDstIn = AMDGPU::getNamedOperandIdx(NewOpc,
7005 AMDGPU::OpName::vdst_in);
7009 if (NewVDstIn != -1) {
7010 int OldVDstIn = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
7016 if (NewVDstIn != -1) {
7017 int NewVDst = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
7058 unsigned OpSubReg =
Op.getSubReg();
7061 RI.getRegClassForReg(MRI, OpReg), OpSubReg);
7077 if (Def->isMoveImmediate() && DstRC != &AMDGPU::VReg_1RegClass)
7080 bool ImpDef = Def->isImplicitDef();
7081 while (!ImpDef && Def && Def->isCopy()) {
7082 if (Def->getOperand(1).getReg().isPhysical())
7085 ImpDef = Def && Def->isImplicitDef();
7087 if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
7103 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7107 for (
auto [Idx, ScalarOp] :
enumerate(ScalarOps)) {
7108 unsigned RegSize =
TRI->getRegSizeInBits(ScalarOp->getReg(), MRI);
7109 unsigned NumSubRegs =
RegSize / 32;
7110 Register VScalarOp = ScalarOp->getReg();
7112 if (NumSubRegs == 1) {
7115 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
7120 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
7126 CondReg = NewCondReg;
7136 if (PhySGPRs.empty() || !PhySGPRs[Idx].isValid())
7137 ScalarOp->setReg(CurReg);
7140 BuildMI(*ScalarOp->getParent()->getParent(), ScalarOp->getParent(),
DL,
7141 TII.get(AMDGPU::COPY), PhySGPRs[Idx])
7143 ScalarOp->setReg(PhySGPRs[Idx]);
7145 ScalarOp->setIsKill();
7149 assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
7150 "Unhandled register size");
7152 for (
unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
7159 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
7160 .
addReg(VScalarOp, VScalarOpUndef,
TRI->getSubRegFromChannel(Idx));
7163 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
7164 .
addReg(VScalarOp, VScalarOpUndef,
7165 TRI->getSubRegFromChannel(Idx + 1));
7172 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), CurReg)
7182 if (NumSubRegs <= 2)
7183 Cmp.addReg(VScalarOp);
7185 Cmp.addReg(VScalarOp, VScalarOpUndef,
7186 TRI->getSubRegFromChannel(Idx, 2));
7190 CondReg = NewCondReg;
7200 const auto *SScalarOpRC =
7206 BuildMI(LoopBB,
I,
DL,
TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
7207 unsigned Channel = 0;
7208 for (
Register Piece : ReadlanePieces) {
7209 Merge.addReg(Piece).addImm(
TRI->getSubRegFromChannel(Channel++));
7213 if (PhySGPRs.empty() || !PhySGPRs[Idx].isValid())
7214 ScalarOp->setReg(SScalarOp);
7216 BuildMI(*ScalarOp->getParent()->getParent(), ScalarOp->getParent(),
DL,
7217 TII.get(AMDGPU::COPY), PhySGPRs[Idx])
7219 ScalarOp->setReg(PhySGPRs[Idx]);
7221 ScalarOp->setIsKill();
7253 assert((PhySGPRs.empty() || PhySGPRs.size() == ScalarOps.
size()) &&
7254 "Physical SGPRs must be empty or match the number of scalar operands");
7260 if (!Begin.isValid())
7262 if (!End.isValid()) {
7268 const auto *BoolXExecRC =
TRI->getWaveMaskRegClass();
7277 std::numeric_limits<unsigned>::max()) !=
7295 for (
auto I = Begin;
I != AfterMI;
I++) {
7296 for (
auto &MO :
I->all_uses())
7332 for (
auto &Succ : RemainderBB->
successors()) {
7357static std::tuple<unsigned, unsigned>
7365 TII.buildExtractSubReg(
MI, MRI, Rsrc, &AMDGPU::VReg_128RegClass,
7366 AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
7373 uint64_t RsrcDataFormat =
TII.getDefaultRsrcDataFormat();
7390 .
addImm(AMDGPU::sub0_sub1)
7396 return std::tuple(RsrcPtr, NewSRsrc);
7433 if (
MI.getOpcode() == AMDGPU::PHI) {
7435 assert(!RI.isSGPRClass(VRC));
7438 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7440 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7456 if (
MI.getOpcode() == AMDGPU::REG_SEQUENCE) {
7459 if (RI.hasVGPRs(DstRC)) {
7463 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
7465 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7483 if (
MI.getOpcode() == AMDGPU::INSERT_SUBREG) {
7488 if (DstRC != Src0RC) {
7497 if (
MI.getOpcode() == AMDGPU::SI_INIT_M0) {
7499 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7505 if (
MI.getOpcode() == AMDGPU::S_BITREPLICATE_B64_B32 ||
7506 MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
7507 MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
7508 MI.getOpcode() == AMDGPU::S_WQM_B32 ||
7509 MI.getOpcode() == AMDGPU::S_WQM_B64 ||
7510 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
7511 MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
7513 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7526 ? AMDGPU::OpName::rsrc
7527 : AMDGPU::OpName::srsrc;
7532 AMDGPU::OpName SampOpName =
7533 isMIMG(
MI) ? AMDGPU::OpName::ssamp : AMDGPU::OpName::samp;
7542 if (
MI.getOpcode() == AMDGPU::SI_CALL_ISEL) {
7550 if (
MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
7554 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
7564 if (
MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_d2 ||
7565 MI.getOpcode() == AMDGPU::TENSOR_LOAD_TO_LDS_d4 ||
7566 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_d2 ||
7567 MI.getOpcode() == AMDGPU::TENSOR_STORE_FROM_LDS_d4) {
7569 if (Src.isReg() && RI.hasVectorRegisters(MRI.
getRegClass(Src.getReg())))
7576 bool isSoffsetLegal =
true;
7578 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::soffset);
7579 if (SoffsetIdx != -1) {
7583 isSoffsetLegal =
false;
7587 bool isRsrcLegal =
true;
7589 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::srsrc);
7590 if (RsrcIdx != -1) {
7592 if (Rsrc->
isReg() && !RI.isSGPRReg(MRI, Rsrc->
getReg()))
7593 isRsrcLegal =
false;
7597 if (isRsrcLegal && isSoffsetLegal)
7625 const auto *BoolXExecRC = RI.getWaveMaskRegClass();
7629 unsigned RsrcPtr, NewSRsrc;
7636 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7637 .addReg(VAddr->
getReg(), {}, AMDGPU::sub0)
7643 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7644 .addReg(VAddr->
getReg(), {}, AMDGPU::sub1)
7657 }
else if (!VAddr && ST.hasAddr64()) {
7661 "FIXME: Need to emit flat atomics here");
7663 unsigned RsrcPtr, NewSRsrc;
7689 MIB.
addImm(CPol->getImm());
7694 MIB.
addImm(TFE->getImm());
7714 MI.removeFromParent();
7719 .
addReg(RsrcPtr, {}, AMDGPU::sub0)
7720 .addImm(AMDGPU::sub0)
7721 .
addReg(RsrcPtr, {}, AMDGPU::sub1)
7722 .addImm(AMDGPU::sub1);
7725 if (!isSoffsetLegal) {
7736 if (!isSoffsetLegal) {
7748 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::srsrc);
7749 if (RsrcIdx != -1) {
7750 DeferredList.insert(
MI);
7755 return DeferredList.contains(
MI);
7765 if (!ST.useRealTrue16Insts())
7768 unsigned Opcode =
MI.getOpcode();
7772 OpIdx >=
get(Opcode).getNumOperands() ||
7773 get(Opcode).operands()[
OpIdx].RegClass == -1)
7777 if (!
Op.isReg() || !
Op.getReg().isVirtual())
7781 if (!RI.isVGPRClass(CurrRC))
7784 int16_t RCID = getOpRegClassID(
get(Opcode).operands()[
OpIdx]);
7786 if (RI.getMatchingSuperRegClass(CurrRC, ExpectedRC, AMDGPU::lo16)) {
7787 Op.setSubReg(AMDGPU::lo16);
7788 }
else if (RI.getMatchingSuperRegClass(ExpectedRC, CurrRC, AMDGPU::lo16)) {
7798 Op.setReg(NewDstReg);
7811 assert(
MI->getOpcode() == AMDGPU::SI_CALL_ISEL &&
7812 "This only handle waterfall for SI_CALL_ISEL");
7819 while (Start->getOpcode() != AMDGPU::ADJCALLSTACKUP)
7822 while (End->getOpcode() != AMDGPU::ADJCALLSTACKDOWN)
7827 while (End !=
MBB.end() && End->isCopy() &&
7828 MI->definesRegister(End->getOperand(1).getReg(), &RI))
7838 while (!Worklist.
empty()) {
7844 moveToVALUImpl(Worklist, MDT, Inst, WaterFalls, V2SPhyCopiesToErase);
7850 moveToVALUImpl(Worklist, MDT, *Inst, WaterFalls, V2SPhyCopiesToErase);
7852 "Deferred MachineInstr are not supposed to re-populate worklist");
7855 for (std::pair<MachineInstr *, V2PhysSCopyInfo> &Entry : WaterFalls) {
7856 if (Entry.first->getOpcode() == AMDGPU::SI_CALL_ISEL)
7858 Entry.second.SGPRs);
7861 for (std::pair<MachineInstr *, bool> Entry : V2SPhyCopiesToErase)
7863 Entry.first->eraseFromParent();
7871 if (SubRegIndices.
size() <= 1) {
7874 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
7881 for (int16_t Indice : SubRegIndices) {
7884 get(AMDGPU::V_READFIRSTLANE_B32), NewDst)
7891 get(AMDGPU::REG_SEQUENCE), DstReg);
7892 for (
unsigned i = 0; i < SubRegIndices.size(); ++i) {
7894 MIB.
addImm(RI.getSubRegFromChannel(i));
7904 if (DstReg == AMDGPU::M0) {
7917 if (
I->getOpcode() == AMDGPU::SI_CALL_ISEL) {
7919 for (
unsigned i = 0; i <
UseMI->getNumOperands(); ++i) {
7920 if (
UseMI->getOperand(i).isReg() &&
7921 UseMI->getOperand(i).getReg() == DstReg) {
7925 V2SCopyInfo.MOs.push_back(MO);
7926 V2SCopyInfo.SGPRs.push_back(DstReg);
7930 }
else if (
I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG &&
7931 I->getOperand(0).isReg() &&
7932 I->getOperand(0).getReg() == DstReg) {
7935 }
else if (
I->readsRegister(DstReg, &RI)) {
7937 V2SPhyCopiesToErase[&Inst] =
false;
7939 if (
I->findRegisterDefOperand(DstReg, &RI))
7961 case AMDGPU::S_ADD_I32:
7962 case AMDGPU::S_SUB_I32: {
7966 std::tie(
Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
7974 case AMDGPU::S_MUL_U64:
7975 if (ST.hasVectorMulU64()) {
7976 NewOpcode = AMDGPU::V_MUL_U64_e64;
7980 splitScalarSMulU64(Worklist, Inst, MDT);
7984 case AMDGPU::S_MUL_U64_U32_PSEUDO:
7985 case AMDGPU::S_MUL_I64_I32_PSEUDO:
7988 splitScalarSMulPseudo(Worklist, Inst, MDT);
7992 case AMDGPU::S_AND_B64:
7993 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
7997 case AMDGPU::S_OR_B64:
7998 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
8002 case AMDGPU::S_XOR_B64:
8003 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
8007 case AMDGPU::S_NAND_B64:
8008 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
8012 case AMDGPU::S_NOR_B64:
8013 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
8017 case AMDGPU::S_XNOR_B64:
8018 if (ST.hasDLInsts())
8019 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
8021 splitScalar64BitXnor(Worklist, Inst, MDT);
8025 case AMDGPU::S_ANDN2_B64:
8026 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
8030 case AMDGPU::S_ORN2_B64:
8031 splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
8035 case AMDGPU::S_BREV_B64:
8036 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32,
true);
8040 case AMDGPU::S_NOT_B64:
8041 splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
8045 case AMDGPU::S_BCNT1_I32_B64:
8046 splitScalar64BitBCNT(Worklist, Inst);
8050 case AMDGPU::S_BFE_I64:
8051 splitScalar64BitBFE(Worklist, Inst);
8055 case AMDGPU::S_FLBIT_I32_B64:
8056 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBH_U32_e32);
8059 case AMDGPU::S_FF1_I32_B64:
8060 splitScalar64BitCountOp(Worklist, Inst, AMDGPU::V_FFBL_B32_e32);
8064 case AMDGPU::S_LSHL_B32:
8065 if (ST.hasOnlyRevVALUShifts()) {
8066 NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
8070 case AMDGPU::S_ASHR_I32:
8071 if (ST.hasOnlyRevVALUShifts()) {
8072 NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
8076 case AMDGPU::S_LSHR_B32:
8077 if (ST.hasOnlyRevVALUShifts()) {
8078 NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
8082 case AMDGPU::S_LSHL_B64:
8083 if (ST.hasOnlyRevVALUShifts()) {
8085 ? AMDGPU::V_LSHLREV_B64_pseudo_e64
8086 : AMDGPU::V_LSHLREV_B64_e64;
8090 case AMDGPU::S_ASHR_I64:
8091 if (ST.hasOnlyRevVALUShifts()) {
8092 NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
8096 case AMDGPU::S_LSHR_B64:
8097 if (ST.hasOnlyRevVALUShifts()) {
8098 NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
8103 case AMDGPU::S_ABS_I32:
8104 lowerScalarAbs(Worklist, Inst);
8108 case AMDGPU::S_ABSDIFF_I32:
8109 lowerScalarAbsDiff(Worklist, Inst);
8113 case AMDGPU::S_CBRANCH_SCC0:
8114 case AMDGPU::S_CBRANCH_SCC1: {
8117 bool IsSCC = CondReg == AMDGPU::SCC;
8125 case AMDGPU::S_BFE_U64:
8126 case AMDGPU::S_BFM_B64:
8129 case AMDGPU::S_PACK_LL_B32_B16:
8130 case AMDGPU::S_PACK_LH_B32_B16:
8131 case AMDGPU::S_PACK_HL_B32_B16:
8132 case AMDGPU::S_PACK_HH_B32_B16:
8133 movePackToVALU(Worklist, MRI, Inst);
8137 case AMDGPU::S_XNOR_B32:
8138 lowerScalarXnor(Worklist, Inst);
8142 case AMDGPU::S_NAND_B32:
8143 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
8147 case AMDGPU::S_NOR_B32:
8148 splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
8152 case AMDGPU::S_ANDN2_B32:
8153 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
8157 case AMDGPU::S_ORN2_B32:
8158 splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
8166 case AMDGPU::S_ADD_CO_PSEUDO:
8167 case AMDGPU::S_SUB_CO_PSEUDO: {
8168 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
8169 ? AMDGPU::V_ADDC_U32_e64
8170 : AMDGPU::V_SUBB_U32_e64;
8171 const auto *CarryRC = RI.getWaveMaskRegClass();
8193 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8197 case AMDGPU::S_UADDO_PSEUDO:
8198 case AMDGPU::S_USUBO_PSEUDO: {
8204 unsigned Opc = (Inst.
getOpcode() == AMDGPU::S_UADDO_PSEUDO)
8205 ? AMDGPU::V_ADD_CO_U32_e64
8206 : AMDGPU::V_SUB_CO_U32_e64;
8218 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8222 case AMDGPU::S_LSHL1_ADD_U32:
8223 case AMDGPU::S_LSHL2_ADD_U32:
8224 case AMDGPU::S_LSHL3_ADD_U32:
8225 case AMDGPU::S_LSHL4_ADD_U32: {
8229 unsigned ShiftAmt = (Opcode == AMDGPU::S_LSHL1_ADD_U32 ? 1
8230 : Opcode == AMDGPU::S_LSHL2_ADD_U32 ? 2
8231 : Opcode == AMDGPU::S_LSHL3_ADD_U32 ? 3
8245 addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
8249 case AMDGPU::S_CSELECT_B32:
8250 case AMDGPU::S_CSELECT_B64:
8251 lowerSelect(Worklist, Inst, MDT);
8254 case AMDGPU::S_CMP_EQ_I32:
8255 case AMDGPU::S_CMP_LG_I32:
8256 case AMDGPU::S_CMP_GT_I32:
8257 case AMDGPU::S_CMP_GE_I32:
8258 case AMDGPU::S_CMP_LT_I32:
8259 case AMDGPU::S_CMP_LE_I32:
8260 case AMDGPU::S_CMP_EQ_U32:
8261 case AMDGPU::S_CMP_LG_U32:
8262 case AMDGPU::S_CMP_GT_U32:
8263 case AMDGPU::S_CMP_GE_U32:
8264 case AMDGPU::S_CMP_LT_U32:
8265 case AMDGPU::S_CMP_LE_U32:
8266 case AMDGPU::S_CMP_EQ_U64:
8267 case AMDGPU::S_CMP_LG_U64:
8268 case AMDGPU::S_CMP_LT_F32:
8269 case AMDGPU::S_CMP_EQ_F32:
8270 case AMDGPU::S_CMP_LE_F32:
8271 case AMDGPU::S_CMP_GT_F32:
8272 case AMDGPU::S_CMP_LG_F32:
8273 case AMDGPU::S_CMP_GE_F32:
8274 case AMDGPU::S_CMP_O_F32:
8275 case AMDGPU::S_CMP_U_F32:
8276 case AMDGPU::S_CMP_NGE_F32:
8277 case AMDGPU::S_CMP_NLG_F32:
8278 case AMDGPU::S_CMP_NGT_F32:
8279 case AMDGPU::S_CMP_NLE_F32:
8280 case AMDGPU::S_CMP_NEQ_F32:
8281 case AMDGPU::S_CMP_NLT_F32: {
8286 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >=
8300 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8304 case AMDGPU::S_CMP_LT_F16:
8305 case AMDGPU::S_CMP_EQ_F16:
8306 case AMDGPU::S_CMP_LE_F16:
8307 case AMDGPU::S_CMP_GT_F16:
8308 case AMDGPU::S_CMP_LG_F16:
8309 case AMDGPU::S_CMP_GE_F16:
8310 case AMDGPU::S_CMP_O_F16:
8311 case AMDGPU::S_CMP_U_F16:
8312 case AMDGPU::S_CMP_NGE_F16:
8313 case AMDGPU::S_CMP_NLG_F16:
8314 case AMDGPU::S_CMP_NGT_F16:
8315 case AMDGPU::S_CMP_NLE_F16:
8316 case AMDGPU::S_CMP_NEQ_F16:
8317 case AMDGPU::S_CMP_NLT_F16: {
8340 addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
8344 case AMDGPU::S_CVT_HI_F32_F16: {
8347 if (ST.useRealTrue16Insts()) {
8352 .
addReg(TmpReg, {}, AMDGPU::hi16)
8368 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8372 case AMDGPU::S_MINIMUM_F32:
8373 case AMDGPU::S_MAXIMUM_F32: {
8385 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8389 case AMDGPU::S_MINIMUM_F16:
8390 case AMDGPU::S_MAXIMUM_F16: {
8392 ? &AMDGPU::VGPR_16RegClass
8393 : &AMDGPU::VGPR_32RegClass);
8405 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8409 case AMDGPU::V_S_EXP_F16_e64:
8410 case AMDGPU::V_S_LOG_F16_e64:
8411 case AMDGPU::V_S_RCP_F16_e64:
8412 case AMDGPU::V_S_RSQ_F16_e64:
8413 case AMDGPU::V_S_SQRT_F16_e64: {
8415 ? &AMDGPU::VGPR_16RegClass
8416 : &AMDGPU::VGPR_32RegClass);
8428 addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
8434 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
8442 if (NewOpcode == Opcode) {
8449 V2SPhyCopiesToErase);
8457 RI.getCommonSubClass(NewDstRC, SrcRC)) {
8464 addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
8495 if (ST.useRealTrue16Insts() && Inst.
isCopy() &&
8499 if (RI.getMatchingSuperRegClass(NewDstRC, SrcRegRC, AMDGPU::lo16)) {
8505 get(AMDGPU::REG_SEQUENCE), NewDstReg)
8512 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8514 }
else if (RI.getMatchingSuperRegClass(SrcRegRC, NewDstRC,
8519 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8527 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8537 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8538 AMDGPU::OpName::src0_modifiers) >= 0)
8542 NewInstr->addOperand(Src);
8545 if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
8548 unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
8550 NewInstr.addImm(
Size);
8551 }
else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
8555 }
else if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
8560 "Scalar BFE is only implemented for constant width and offset");
8568 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8569 AMDGPU::OpName::src1_modifiers) >= 0)
8571 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src1) >= 0)
8573 if (AMDGPU::getNamedOperandIdx(NewOpcode,
8574 AMDGPU::OpName::src2_modifiers) >= 0)
8576 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src2) >= 0)
8578 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::clamp) >= 0)
8580 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::omod) >= 0)
8582 if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::op_sel) >= 0)
8588 NewInstr->addOperand(
Op);
8595 if (
Op.getReg() == AMDGPU::SCC) {
8597 if (
Op.isDef() && !
Op.isDead())
8598 addSCCDefUsersToVALUWorklist(
Op, Inst, Worklist);
8600 addSCCDefsToVALUWorklist(NewInstr, Worklist);
8605 if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
8606 Register DstReg = NewInstr->getOperand(0).getReg();
8621 addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
8625std::pair<bool, MachineBasicBlock *>
8628 if (ST.hasAddNoCarryInsts()) {
8640 assert(
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_SUB_I32);
8642 unsigned NewOpc =
Opc == AMDGPU::S_ADD_I32 ?
8643 AMDGPU::V_ADD_U32_e64 : AMDGPU::V_SUB_U32_e64;
8654 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8655 return std::pair(
true, NewBB);
8658 return std::pair(
false,
nullptr);
8675 bool IsSCC = (CondReg == AMDGPU::SCC);
8689 const TargetRegisterClass *TC = RI.getWaveMaskRegClass();
8694 bool CopyFound =
false;
8695 for (MachineInstr &CandI :
8698 if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) !=
8700 if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
8702 .
addReg(CandI.getOperand(1).getReg());
8714 ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
8723 MachineInstr *NewInst;
8724 if (Inst.
getOpcode() == AMDGPU::S_CSELECT_B32) {
8725 NewInst =
BuildMI(
MBB, MII,
DL,
get(AMDGPU::V_CNDMASK_B32_e64), NewDestReg)
8740 addUsersToMoveToVALUWorklist(NewDestReg, MRI, Worklist);
8755 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8756 : AMDGPU::V_SUB_CO_U32_e32;
8767 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8784 unsigned SubOp = ST.hasAddNoCarryInsts() ? AMDGPU::V_SUB_U32_e32
8785 : AMDGPU::V_SUB_CO_U32_e32;
8798 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
8812 if (ST.hasDLInsts()) {
8822 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8828 bool Src0IsSGPR = Src0.
isReg() &&
8830 bool Src1IsSGPR = Src1.
isReg() &&
8844 }
else if (Src1IsSGPR) {
8862 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8868 unsigned Opcode)
const {
8892 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8897 unsigned Opcode)
const {
8921 addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
8936 const MCInstrDesc &InstDesc =
get(Opcode);
8937 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
8939 &AMDGPU::SGPR_32RegClass;
8941 const TargetRegisterClass *Src0SubRC =
8942 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
8945 AMDGPU::sub0, Src0SubRC);
8948 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
8949 const TargetRegisterClass *NewDestSubRC =
8950 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
8953 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0).
add(SrcReg0Sub0);
8956 AMDGPU::sub1, Src0SubRC);
8959 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1).
add(SrcReg0Sub1);
8973 Worklist.
insert(&LoHalf);
8974 Worklist.
insert(&HiHalf);
8980 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9003 const TargetRegisterClass *Src0SubRC =
9004 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9005 if (RI.isSGPRClass(Src0SubRC))
9006 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
9007 const TargetRegisterClass *Src1SubRC =
9008 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9009 if (RI.isSGPRClass(Src1SubRC))
9010 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
9014 MachineOperand Op0L =
9016 MachineOperand Op1L =
9018 MachineOperand Op0H =
9020 MachineOperand Op1H =
9039 MachineInstr *Op1L_Op0H =
9045 MachineInstr *Op1H_Op0L =
9051 MachineInstr *Carry =
9056 MachineInstr *LoHalf =
9066 MachineInstr *HiHalf =
9089 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9112 const TargetRegisterClass *Src0SubRC =
9113 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9114 if (RI.isSGPRClass(Src0SubRC))
9115 Src0SubRC = RI.getEquivalentVGPRClass(Src0SubRC);
9116 const TargetRegisterClass *Src1SubRC =
9117 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9118 if (RI.isSGPRClass(Src1SubRC))
9119 Src1SubRC = RI.getEquivalentVGPRClass(Src1SubRC);
9123 MachineOperand Op0L =
9125 MachineOperand Op1L =
9129 unsigned NewOpc =
Opc == AMDGPU::S_MUL_U64_U32_PSEUDO
9130 ? AMDGPU::V_MUL_HI_U32_e64
9131 : AMDGPU::V_MUL_HI_I32_e64;
9132 MachineInstr *HiHalf =
9135 MachineInstr *LoHalf =
9154 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9170 const MCInstrDesc &InstDesc =
get(Opcode);
9171 const TargetRegisterClass *Src0RC = Src0.
isReg() ?
9173 &AMDGPU::SGPR_32RegClass;
9175 const TargetRegisterClass *Src0SubRC =
9176 RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
9177 const TargetRegisterClass *Src1RC = Src1.
isReg() ?
9179 &AMDGPU::SGPR_32RegClass;
9181 const TargetRegisterClass *Src1SubRC =
9182 RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
9185 AMDGPU::sub0, Src0SubRC);
9187 AMDGPU::sub0, Src1SubRC);
9189 AMDGPU::sub1, Src0SubRC);
9191 AMDGPU::sub1, Src1SubRC);
9194 const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
9195 const TargetRegisterClass *NewDestSubRC =
9196 RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
9199 MachineInstr &LoHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub0)
9204 MachineInstr &HiHalf = *
BuildMI(
MBB, MII,
DL, InstDesc, DestSub1)
9217 Worklist.
insert(&LoHalf);
9218 Worklist.
insert(&HiHalf);
9221 addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
9241 MachineOperand* Op0;
9242 MachineOperand* Op1;
9244 if (Src0.
isReg() && RI.isSGPRReg(MRI, Src0.
getReg())) {
9277 const MCInstrDesc &InstDesc =
get(AMDGPU::V_BCNT_U32_B32_e64);
9278 const TargetRegisterClass *SrcRC = Src.isReg() ?
9280 &AMDGPU::SGPR_32RegClass;
9285 const TargetRegisterClass *SrcSubRC =
9286 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9289 AMDGPU::sub0, SrcSubRC);
9291 AMDGPU::sub1, SrcSubRC);
9301 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9320 Offset == 0 &&
"Not implemented");
9343 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9353 .
addReg(Src.getReg(), {}, AMDGPU::sub0);
9356 .
addReg(Src.getReg(), {}, AMDGPU::sub0)
9362 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9381 const MCInstrDesc &InstDesc =
get(Opcode);
9383 bool IsCtlz = Opcode == AMDGPU::V_FFBH_U32_e32;
9384 unsigned OpcodeAdd = ST.hasAddNoCarryInsts() ? AMDGPU::V_ADD_U32_e64
9385 : AMDGPU::V_ADD_CO_U32_e32;
9387 const TargetRegisterClass *SrcRC =
9388 Src.isReg() ? MRI.
getRegClass(Src.getReg()) : &AMDGPU::SGPR_32RegClass;
9389 const TargetRegisterClass *SrcSubRC =
9390 RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
9392 MachineOperand SrcRegSub0 =
9394 MachineOperand SrcRegSub1 =
9407 .
addReg(IsCtlz ? MidReg1 : MidReg2)
9413 .
addReg(IsCtlz ? MidReg2 : MidReg1);
9417 addUsersToMoveToVALUWorklist(MidReg4, MRI, Worklist);
9420void SIInstrInfo::addUsersToMoveToVALUWorklist(
9424 MachineInstr &
UseMI = *MO.getParent();
9428 switch (
UseMI.getOpcode()) {
9431 case AMDGPU::SOFT_WQM:
9432 case AMDGPU::STRICT_WWM:
9433 case AMDGPU::STRICT_WQM:
9434 case AMDGPU::REG_SEQUENCE:
9436 case AMDGPU::INSERT_SUBREG:
9439 OpNo = MO.getOperandNo();
9446 if (!RI.hasVectorRegisters(OpRC))
9463 if (ST.useRealTrue16Insts()) {
9465 if (!Src0.
isReg() || !RI.isVGPR(MRI, Src0.
getReg())) {
9468 get(Src0.
isImm() ? AMDGPU::V_MOV_B32_e32 : AMDGPU::COPY), SrcReg0)
9474 if (!Src1.
isReg() || !RI.isVGPR(MRI, Src1.
getReg())) {
9477 get(Src1.
isImm() ? AMDGPU::V_MOV_B32_e32 : AMDGPU::COPY), SrcReg1)
9486 auto NewMI =
BuildMI(*
MBB, Inst,
DL,
get(AMDGPU::REG_SEQUENCE), ResultReg);
9488 case AMDGPU::S_PACK_LL_B32_B16:
9490 .addReg(SrcReg0, {},
9491 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9492 .addImm(AMDGPU::lo16)
9493 .addReg(SrcReg1, {},
9494 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9495 .addImm(AMDGPU::hi16);
9497 case AMDGPU::S_PACK_LH_B32_B16:
9499 .addReg(SrcReg0, {},
9500 isSrc0Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9501 .addImm(AMDGPU::lo16)
9502 .addReg(SrcReg1, {}, AMDGPU::hi16)
9503 .addImm(AMDGPU::hi16);
9505 case AMDGPU::S_PACK_HL_B32_B16:
9506 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9507 .addImm(AMDGPU::lo16)
9508 .addReg(SrcReg1, {},
9509 isSrc1Reg16 ? AMDGPU::NoSubRegister : AMDGPU::lo16)
9510 .addImm(AMDGPU::hi16);
9512 case AMDGPU::S_PACK_HH_B32_B16:
9513 NewMI.addReg(SrcReg0, {}, AMDGPU::hi16)
9514 .addImm(AMDGPU::lo16)
9515 .addReg(SrcReg1, {}, AMDGPU::hi16)
9516 .addImm(AMDGPU::hi16);
9524 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9529 case AMDGPU::S_PACK_LL_B32_B16: {
9548 case AMDGPU::S_PACK_LH_B32_B16: {
9558 case AMDGPU::S_PACK_HL_B32_B16: {
9569 case AMDGPU::S_PACK_HH_B32_B16: {
9589 addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
9598 assert(
Op.isReg() &&
Op.getReg() == AMDGPU::SCC &&
Op.isDef() &&
9599 !
Op.isDead() &&
Op.getParent() == &SCCDefInst);
9600 SmallVector<MachineInstr *, 4> CopyToDelete;
9603 for (MachineInstr &
MI :
9607 int SCCIdx =
MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI,
false);
9610 MachineRegisterInfo &MRI =
MI.getMF()->getRegInfo();
9611 Register DestReg =
MI.getOperand(0).getReg();
9618 MI.getOperand(SCCIdx).setReg(NewCond);
9624 if (
MI.findRegisterDefOperandIdx(AMDGPU::SCC, &RI,
false,
false) != -1)
9627 for (
auto &Copy : CopyToDelete)
9628 Copy->eraseFromParent();
9636void SIInstrInfo::addSCCDefsToVALUWorklist(
MachineInstr *SCCUseInst,
9642 for (MachineInstr &
MI :
9645 if (
MI.modifiesRegister(AMDGPU::VCC, &RI))
9647 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
9656 const TargetRegisterClass *NewDstRC =
getOpRegClass(Inst, 0);
9664 case AMDGPU::REG_SEQUENCE:
9665 case AMDGPU::INSERT_SUBREG:
9667 case AMDGPU::SOFT_WQM:
9668 case AMDGPU::STRICT_WWM:
9669 case AMDGPU::STRICT_WQM: {
9671 if (RI.isAGPRClass(SrcRC)) {
9672 if (RI.isAGPRClass(NewDstRC))
9677 case AMDGPU::REG_SEQUENCE:
9678 case AMDGPU::INSERT_SUBREG:
9679 NewDstRC = RI.getEquivalentAGPRClass(NewDstRC);
9682 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9688 if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
9691 NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
9705 int OpIndices[3])
const {
9706 const MCInstrDesc &
Desc =
MI.getDesc();
9722 const MachineRegisterInfo &MRI =
MI.getMF()->getRegInfo();
9724 for (
unsigned i = 0; i < 3; ++i) {
9725 int Idx = OpIndices[i];
9729 const MachineOperand &MO =
MI.getOperand(Idx);
9735 const TargetRegisterClass *OpRC =
9736 RI.getRegClass(getOpRegClassID(
Desc.operands()[Idx]));
9737 bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
9744 if (RI.isSGPRClass(RegRC))
9762 if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
9763 SGPRReg = UsedSGPRs[0];
9766 if (!SGPRReg && UsedSGPRs[1]) {
9767 if (UsedSGPRs[1] == UsedSGPRs[2])
9768 SGPRReg = UsedSGPRs[1];
9775 AMDGPU::OpName OperandName)
const {
9776 if (OperandName == AMDGPU::OpName::NUM_OPERAND_NAMES)
9779 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), OperandName);
9783 return &
MI.getOperand(Idx);
9797 if (ST.isAmdHsaOS()) {
9800 RsrcDataFormat |= (1ULL << 56);
9805 RsrcDataFormat |= (2ULL << 59);
9808 return RsrcDataFormat;
9818 uint64_t EltSizeValue =
Log2_32(ST.getMaxPrivateElementSize(
true)) - 1;
9823 uint64_t IndexStride = ST.isWave64() ? 3 : 2;
9830 Rsrc23 &=
~AMDGPU::RSRC_DATA_FORMAT;
9836 unsigned Opc =
MI.getOpcode();
9842 return get(
Opc).mayLoad() &&
9849 if (!Addr || !Addr->
isFI())
9858 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdata);
9860 return MI.getOperand(VDataIdx).getReg();
9870 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::data);
9872 return MI.getOperand(DataIdx).getReg();
9909 while (++
I != E &&
I->isInsideBundle()) {
9910 assert(!
I->isBundle() &&
"No nested bundle!");
9918 unsigned Opc =
MI.getOpcode();
9920 unsigned DescSize =
Desc.getSize();
9925 unsigned Size = DescSize;
9929 if (
MI.isBranch() && ST.hasOffset3fBug())
9940 bool HasLiteral =
false;
9941 unsigned LiteralSize = 4;
9942 for (
int I = 0, E =
MI.getNumExplicitOperands();
I != E; ++
I) {
9947 if (ST.has64BitLiterals()) {
9948 switch (OpInfo.OperandType) {
9971 return HasLiteral ? DescSize + LiteralSize : DescSize;
9976 int VAddr0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr0);
9980 int RSrcIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::srsrc);
9981 return 8 + 4 * ((RSrcIdx - VAddr0Idx + 2) / 4);
9985 case TargetOpcode::BUNDLE:
9987 case TargetOpcode::INLINEASM:
9988 case TargetOpcode::INLINEASM_BR: {
9990 const char *AsmStr =
MI.getOperand(0).getSymbolName();
9994 if (
MI.isMetaInstruction())
9998 const auto *D16Info = AMDGPU::getT16D16Helper(
Opc);
10001 unsigned LoInstOpcode = D16Info->LoOp;
10003 DescSize =
Desc.getSize();
10007 if (
Opc == AMDGPU::V_FMA_MIX_F16_t16 ||
Opc == AMDGPU::V_FMA_MIX_BF16_t16) {
10010 DescSize =
Desc.getSize();
10021 if (
MI.memoperands_empty())
10033 static const std::pair<int, const char *> TargetIndices[] = {
10072std::pair<unsigned, unsigned>
10079 static const std::pair<unsigned, const char *> TargetFlags[] = {
10097 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
10113 return AMDGPU::WWM_COPY;
10115 return AMDGPU::COPY;
10132 if (!IsLRSplitInst && Opcode != AMDGPU::IMPLICIT_DEF)
10136 if (RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg)))
10137 return IsLRSplitInst;
10150 bool IsNullOrVectorRegister =
true;
10154 IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg));
10157 return IsNullOrVectorRegister &&
10159 (!
MI.isTerminator() &&
MI.getOpcode() != AMDGPU::COPY &&
10160 MI.modifiesRegister(AMDGPU::EXEC, &RI)));
10168 if (ST.hasAddNoCarryInsts())
10184 if (ST.hasAddNoCarryInsts())
10188 Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
10190 : RS.scavengeRegisterBackwards(
10191 *RI.getBoolRC(),
I,
false,
10204 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
10205 case AMDGPU::SI_KILL_I1_TERMINATOR:
10214 case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
10215 return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR);
10216 case AMDGPU::SI_KILL_I1_PSEUDO:
10217 return get(AMDGPU::SI_KILL_I1_TERMINATOR);
10229 const unsigned OffsetBits =
10231 return (1 << OffsetBits) - 1;
10235 if (!ST.isWave32())
10238 if (
MI.isInlineAsm())
10241 if (
MI.getNumOperands() <
MI.getNumExplicitOperands())
10244 for (
auto &
Op :
MI.implicit_operands()) {
10245 if (
Op.isReg() &&
Op.getReg() == AMDGPU::VCC)
10246 Op.setReg(AMDGPU::VCC_LO);
10255 int Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::sbase);
10259 const int16_t RCID = getOpRegClassID(
MI.getDesc().operands()[Idx]);
10260 return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
10276 if (Imm > MaxImm) {
10277 if (Imm <= MaxImm + 64) {
10279 Overflow = Imm - MaxImm;
10298 if (Overflow > 0) {
10306 if (ST.hasRestrictedSOffset())
10311 SOffset = Overflow;
10349 if (!ST.hasFlatInstOffsets())
10357 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10369std::pair<int64_t, int64_t>
10372 int64_t RemainderOffset = COffsetVal;
10373 int64_t ImmField = 0;
10378 if (AllowNegative) {
10380 int64_t
D = 1LL << NumBits;
10381 RemainderOffset = (COffsetVal /
D) *
D;
10382 ImmField = COffsetVal - RemainderOffset;
10384 if (ST.hasNegativeUnalignedScratchOffsetBug() &&
10386 (ImmField % 4) != 0) {
10388 RemainderOffset += ImmField % 4;
10389 ImmField -= ImmField % 4;
10391 }
else if (COffsetVal >= 0) {
10393 RemainderOffset = COffsetVal - ImmField;
10397 assert(RemainderOffset + ImmField == COffsetVal);
10398 return {ImmField, RemainderOffset};
10402 if (ST.hasNegativeScratchOffsetBug() &&
10410 switch (ST.getGeneration()) {
10439 case AMDGPU::V_MOVRELS_B32_dpp_gfx10:
10440 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
10441 case AMDGPU::V_MOVRELD_B32_dpp_gfx10:
10442 case AMDGPU::V_MOVRELD_B32_sdwa_gfx10:
10443 case AMDGPU::V_MOVRELSD_B32_dpp_gfx10:
10444 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
10445 case AMDGPU::V_MOVRELSD_2_B32_dpp_gfx10:
10446 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
10453#define GENERATE_RENAMED_GFX9_CASES(OPCODE) \
10454 case OPCODE##_dpp: \
10455 case OPCODE##_e32: \
10456 case OPCODE##_e64: \
10457 case OPCODE##_e64_dpp: \
10458 case OPCODE##_sdwa:
10472 case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
10473 case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
10474 case AMDGPU::V_FMA_F16_gfx9_e64:
10475 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
10476 case AMDGPU::V_INTERP_P2_F16:
10477 case AMDGPU::V_MAD_F16_e64:
10478 case AMDGPU::V_MAD_U16_e64:
10479 case AMDGPU::V_MAD_I16_e64:
10488 "SIInsertWaitcnts should have promoted soft waitcnt instructions!");
10502 switch (ST.getGeneration()) {
10515 if (
isMAI(Opcode)) {
10523 if (MCOp == AMDGPU::INSTRUCTION_LIST_END && ST.hasGFX11_7Insts())
10526 if (MCOp == AMDGPU::INSTRUCTION_LIST_END && ST.hasGFX1250Insts())
10533 if (ST.hasGFX90AInsts()) {
10534 uint32_t NMCOp = AMDGPU::INSTRUCTION_LIST_END;
10535 if (ST.hasGFX940Insts())
10537 if (NMCOp == AMDGPU::INSTRUCTION_LIST_END)
10539 if (NMCOp == AMDGPU::INSTRUCTION_LIST_END)
10541 if (NMCOp != AMDGPU::INSTRUCTION_LIST_END)
10547 if (MCOp == AMDGPU::INSTRUCTION_LIST_END)
10566 for (
unsigned I = 0, E = (
MI.getNumOperands() - 1)/ 2;
I < E; ++
I)
10567 if (
MI.getOperand(1 + 2 *
I + 1).getImm() == SubReg) {
10568 auto &RegOp =
MI.getOperand(1 + 2 *
I);
10580 switch (
MI.getOpcode()) {
10582 case AMDGPU::REG_SEQUENCE:
10586 case AMDGPU::INSERT_SUBREG:
10587 if (RSR.
SubReg == (
unsigned)
MI.getOperand(3).getImm())
10604 if (!
P.Reg.isVirtual())
10609 while (
auto *
MI = DefInst) {
10611 switch (
MI->getOpcode()) {
10613 case AMDGPU::V_MOV_B32_e32: {
10614 auto &Op1 =
MI->getOperand(1);
10643 auto *DefBB =
DefMI.getParent();
10647 if (
UseMI.getParent() != DefBB)
10650 const int MaxInstScan = 20;
10654 auto E =
UseMI.getIterator();
10655 for (
auto I = std::next(
DefMI.getIterator());
I != E; ++
I) {
10656 if (
I->isDebugInstr())
10659 if (++NumInst > MaxInstScan)
10662 if (
I->modifiesRegister(AMDGPU::EXEC,
TRI))
10675 auto *DefBB =
DefMI.getParent();
10677 const int MaxUseScan = 10;
10681 auto &UseInst = *
Use.getParent();
10684 if (UseInst.getParent() != DefBB || UseInst.isPHI())
10687 if (++NumUse > MaxUseScan)
10694 const int MaxInstScan = 20;
10698 for (
auto I = std::next(
DefMI.getIterator()); ; ++
I) {
10701 if (
I->isDebugInstr())
10704 if (++NumInst > MaxInstScan)
10717 if (Reg == VReg && --NumUse == 0)
10719 }
else if (
TRI->regsOverlap(Reg, AMDGPU::EXEC))
10728 auto Cur =
MBB.begin();
10729 if (Cur !=
MBB.end())
10731 if (!Cur->isPHI() && Cur->readsRegister(Dst,
nullptr))
10734 }
while (Cur !=
MBB.end() && Cur != LastPHIIt);
10743 if (InsPt !=
MBB.end() &&
10744 (InsPt->getOpcode() == AMDGPU::SI_IF ||
10745 InsPt->getOpcode() == AMDGPU::SI_ELSE ||
10746 InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
10747 InsPt->definesRegister(Src,
nullptr)) {
10751 .
addReg(Src, {}, SrcSubReg)
10776 if (isFullCopyInstr(
MI)) {
10777 Register DstReg =
MI.getOperand(0).getReg();
10778 Register SrcReg =
MI.getOperand(1).getReg();
10800 unsigned *PredCost)
const {
10801 if (
MI.isBundle()) {
10804 unsigned Lat = 0,
Count = 0;
10805 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
10807 Lat = std::max(Lat, SchedModel.computeInstrLatency(&*
I));
10809 return Lat +
Count - 1;
10812 return SchedModel.computeInstrLatency(&
MI);
10819 return *CallAddrOp;
10826 unsigned Opcode =
MI.getOpcode();
10828 auto HandleAddrSpaceCast = [
this, &MRI](
const MachineInstr &
MI) {
10831 :
MI.getOperand(1).getReg();
10835 unsigned SrcAS = SrcTy.getAddressSpace();
10838 ST.hasGloballyAddressableScratch()
10846 if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
10847 return HandleAddrSpaceCast(
MI);
10850 auto IID = GI->getIntrinsicID();
10857 case Intrinsic::amdgcn_addrspacecast_nonnull:
10858 return HandleAddrSpaceCast(
MI);
10859 case Intrinsic::amdgcn_if:
10860 case Intrinsic::amdgcn_else:
10874 if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
10875 Opcode == AMDGPU::G_SEXTLOAD) {
10876 if (
MI.memoperands_empty())
10880 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10881 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10889 if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
10890 Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
10891 Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
10900 Formatter = std::make_unique<AMDGPUMIRFormatter>(ST);
10901 return Formatter.get();
10909 unsigned opcode =
MI.getOpcode();
10910 if (opcode == AMDGPU::V_READLANE_B32 ||
10911 opcode == AMDGPU::V_READFIRSTLANE_B32 ||
10912 opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
10915 if (isCopyInstr(
MI)) {
10919 RI.getPhysRegBaseClass(srcOp.
getReg());
10927 if (
MI.isPreISelOpcode())
10942 if (
MI.memoperands_empty())
10946 return mmo->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
10947 mmo->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS;
10962 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
10964 if (!
SrcOp.isReg())
10968 if (!Reg || !
SrcOp.readsReg())
10974 if (RegBank && RegBank->
getID() != AMDGPU::SGPRRegBankID)
11001 F,
"ds_ordered_count unsupported for this calling conv"));
11015 Register &SrcReg2, int64_t &CmpMask,
11016 int64_t &CmpValue)
const {
11017 if (!
MI.getOperand(0).isReg() ||
MI.getOperand(0).getSubReg())
11020 switch (
MI.getOpcode()) {
11023 case AMDGPU::S_CMP_EQ_U32:
11024 case AMDGPU::S_CMP_EQ_I32:
11025 case AMDGPU::S_CMP_LG_U32:
11026 case AMDGPU::S_CMP_LG_I32:
11027 case AMDGPU::S_CMP_LT_U32:
11028 case AMDGPU::S_CMP_LT_I32:
11029 case AMDGPU::S_CMP_GT_U32:
11030 case AMDGPU::S_CMP_GT_I32:
11031 case AMDGPU::S_CMP_LE_U32:
11032 case AMDGPU::S_CMP_LE_I32:
11033 case AMDGPU::S_CMP_GE_U32:
11034 case AMDGPU::S_CMP_GE_I32:
11035 case AMDGPU::S_CMP_EQ_U64:
11036 case AMDGPU::S_CMP_LG_U64:
11037 SrcReg =
MI.getOperand(0).getReg();
11038 if (
MI.getOperand(1).isReg()) {
11039 if (
MI.getOperand(1).getSubReg())
11041 SrcReg2 =
MI.getOperand(1).getReg();
11043 }
else if (
MI.getOperand(1).isImm()) {
11045 CmpValue =
MI.getOperand(1).getImm();
11051 case AMDGPU::S_CMPK_EQ_U32:
11052 case AMDGPU::S_CMPK_EQ_I32:
11053 case AMDGPU::S_CMPK_LG_U32:
11054 case AMDGPU::S_CMPK_LG_I32:
11055 case AMDGPU::S_CMPK_LT_U32:
11056 case AMDGPU::S_CMPK_LT_I32:
11057 case AMDGPU::S_CMPK_GT_U32:
11058 case AMDGPU::S_CMPK_GT_I32:
11059 case AMDGPU::S_CMPK_LE_U32:
11060 case AMDGPU::S_CMPK_LE_I32:
11061 case AMDGPU::S_CMPK_GE_U32:
11062 case AMDGPU::S_CMPK_GE_I32:
11063 SrcReg =
MI.getOperand(0).getReg();
11065 CmpValue =
MI.getOperand(1).getImm();
11075 if (S->isLiveIn(AMDGPU::SCC))
11084bool SIInstrInfo::invertSCCUse(
MachineInstr *SCCDef)
const {
11087 bool SCCIsDead =
false;
11090 constexpr unsigned ScanLimit = 12;
11091 unsigned Count = 0;
11092 for (MachineInstr &
MI :
11094 if (++
Count > ScanLimit)
11096 if (
MI.readsRegister(AMDGPU::SCC, &RI)) {
11097 if (
MI.getOpcode() == AMDGPU::S_CSELECT_B32 ||
11098 MI.getOpcode() == AMDGPU::S_CSELECT_B64 ||
11099 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
11100 MI.getOpcode() == AMDGPU::S_CBRANCH_SCC1)
11105 if (
MI.definesRegister(AMDGPU::SCC, &RI)) {
11118 for (MachineInstr *
MI : InvertInstr) {
11119 if (
MI->getOpcode() == AMDGPU::S_CSELECT_B32 ||
11120 MI->getOpcode() == AMDGPU::S_CSELECT_B64) {
11122 }
else if (
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0 ||
11123 MI->getOpcode() == AMDGPU::S_CBRANCH_SCC1) {
11124 MI->setDesc(
get(
MI->getOpcode() == AMDGPU::S_CBRANCH_SCC0
11125 ? AMDGPU::S_CBRANCH_SCC1
11126 : AMDGPU::S_CBRANCH_SCC0));
11139 bool NeedInversion)
const {
11140 MachineInstr *KillsSCC =
nullptr;
11145 if (
MI.modifiesRegister(AMDGPU::SCC, &RI))
11147 if (
MI.killsRegister(AMDGPU::SCC, &RI))
11150 if (NeedInversion && !invertSCCUse(SCCRedefine))
11152 if (MachineOperand *SccDef =
11154 SccDef->setIsDead(
false);
11162 if (Def.getOpcode() != AMDGPU::S_CSELECT_B32 &&
11163 Def.getOpcode() != AMDGPU::S_CSELECT_B64)
11165 bool Op1IsNonZeroImm =
11166 Def.getOperand(1).isImm() && Def.getOperand(1).getImm() != 0;
11167 bool Op2IsZeroImm =
11168 Def.getOperand(2).isImm() && Def.getOperand(2).getImm() == 0;
11169 if (!Op1IsNonZeroImm || !Op2IsZeroImm)
11175 unsigned &NewDefOpc) {
11178 if (Def.getOpcode() != AMDGPU::S_ADD_I32 &&
11179 Def.getOpcode() != AMDGPU::S_ADD_U32)
11185 if ((!AddSrc1.
isImm() || AddSrc1.
getImm() != 1) &&
11191 if (Def.getOpcode() == AMDGPU::S_ADD_I32) {
11193 Def.findRegisterDefOperand(AMDGPU::SCC,
nullptr);
11196 NewDefOpc = AMDGPU::S_ADD_U32;
11198 NeedInversion = !NeedInversion;
11203 Register SrcReg2, int64_t CmpMask,
11212 const auto optimizeCmpSelect = [&CmpInstr, SrcReg, CmpValue, MRI,
11213 this](
bool NeedInversion) ->
bool {
11237 unsigned NewDefOpc = Def->getOpcode();
11243 if (!optimizeSCC(Def, &CmpInstr, NeedInversion))
11246 if (NewDefOpc != Def->getOpcode())
11247 Def->setDesc(
get(NewDefOpc));
11256 if (Def->getOpcode() == AMDGPU::S_OR_B32 &&
11263 if (Def1 && Def1->
getOpcode() == AMDGPU::COPY && Def2 &&
11271 optimizeSCC(
Select, Def,
false);
11278 const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
11279 this](int64_t ExpectedValue,
unsigned SrcSize,
11280 bool IsReversible,
bool IsSigned) ->
bool {
11308 if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
11309 Def->getOpcode() != AMDGPU::S_AND_B64)
11313 const auto isMask = [&Mask, SrcSize](
const MachineOperand *MO) ->
bool {
11324 SrcOp = &Def->getOperand(2);
11325 else if (isMask(&Def->getOperand(2)))
11326 SrcOp = &Def->getOperand(1);
11334 if (IsSigned && BitNo == SrcSize - 1)
11337 ExpectedValue <<= BitNo;
11339 bool IsReversedCC =
false;
11340 if (CmpValue != ExpectedValue) {
11343 IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
11348 Register DefReg = Def->getOperand(0).getReg();
11352 if (!optimizeSCC(Def, &CmpInstr,
false))
11363 unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
11364 : AMDGPU::S_BITCMP1_B32
11365 : IsReversedCC ? AMDGPU::S_BITCMP0_B64
11366 : AMDGPU::S_BITCMP1_B64;
11371 Def->eraseFromParent();
11379 case AMDGPU::S_CMP_EQ_U32:
11380 case AMDGPU::S_CMP_EQ_I32:
11381 case AMDGPU::S_CMPK_EQ_U32:
11382 case AMDGPU::S_CMPK_EQ_I32:
11383 return optimizeCmpAnd(1, 32,
true,
false) ||
11384 optimizeCmpSelect(
true);
11385 case AMDGPU::S_CMP_GE_U32:
11386 case AMDGPU::S_CMPK_GE_U32:
11387 return optimizeCmpAnd(1, 32,
false,
false);
11388 case AMDGPU::S_CMP_GE_I32:
11389 case AMDGPU::S_CMPK_GE_I32:
11390 return optimizeCmpAnd(1, 32,
false,
true);
11391 case AMDGPU::S_CMP_EQ_U64:
11392 return optimizeCmpAnd(1, 64,
true,
false);
11393 case AMDGPU::S_CMP_LG_U32:
11394 case AMDGPU::S_CMP_LG_I32:
11395 case AMDGPU::S_CMPK_LG_U32:
11396 case AMDGPU::S_CMPK_LG_I32:
11397 return optimizeCmpAnd(0, 32,
true,
false) ||
11398 optimizeCmpSelect(
false);
11399 case AMDGPU::S_CMP_GT_U32:
11400 case AMDGPU::S_CMPK_GT_U32:
11401 return optimizeCmpAnd(0, 32,
false,
false);
11402 case AMDGPU::S_CMP_GT_I32:
11403 case AMDGPU::S_CMPK_GT_I32:
11404 return optimizeCmpAnd(0, 32,
false,
true);
11405 case AMDGPU::S_CMP_LG_U64:
11406 return optimizeCmpAnd(0, 64,
true,
false) ||
11407 optimizeCmpSelect(
false);
11414 AMDGPU::OpName
OpName)
const {
11415 if (!ST.needsAlignedVGPRs())
11418 int OpNo = AMDGPU::getNamedOperandIdx(
MI.getOpcode(),
OpName);
11430 bool IsAGPR = RI.isAGPR(MRI, DataReg);
11432 IsAGPR ? &AMDGPU::AGPR_32RegClass : &AMDGPU::VGPR_32RegClass);
11436 : &AMDGPU::VReg_64_Align2RegClass);
11438 .
addReg(DataReg, {},
Op.getSubReg())
11443 Op.setSubReg(AMDGPU::sub0);
11458 if (ST.hasGFX1250Insts())
11465 unsigned Opcode =
MI.getOpcode();
11471 Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
11472 Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
11475 if (!ST.hasGFX940Insts())
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isUndef(const MachineInstr &MI)
TargetInstrInfo::RegSubRegPair RegSubRegPair
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
static cl::opt< bool > Fix16BitCopies("amdgpu-fix-16-bit-physreg-copies", cl::desc("Fix copies between 32 and 16 bit registers by extending to 32 bit"), cl::init(true), cl::ReallyHidden)
static void emitLoadScalarOpsFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef< MachineOperand * > ScalarOps, ArrayRef< Register > PhySGPRs={})
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const TargetRegisterClass *RC, bool Forward)
static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc)
static void indirectCopyToAGPR(const SIInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, RegScavenger &RS, bool RegsOverlap, Register ImpDefSuperReg=Register(), Register ImpUseSuperReg=Register())
Handle copying from SGPR to AGPR, or from AGPR to AGPR on GFX908.
static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize)
static bool compareMachineOp(const MachineOperand &Op0, const MachineOperand &Op1)
static bool isStride64(unsigned Opc)
static MachineBasicBlock * generateWaterFallLoop(const SIInstrInfo &TII, MachineInstr &MI, ArrayRef< MachineOperand * > ScalarOps, MachineDominatorTree *MDT, MachineBasicBlock::iterator Begin=nullptr, MachineBasicBlock::iterator End=nullptr, ArrayRef< Register > PhySGPRs={})
#define GENERATE_RENAMED_GFX9_CASES(OPCODE)
static std::tuple< unsigned, unsigned > extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc)
static bool followSubRegDef(MachineInstr &MI, TargetInstrInfo::RegSubRegPair &RSR)
static unsigned getIndirectSGPRWriteMovRelPseudo64(unsigned VecSize)
static MachineInstr * swapImmOperands(MachineInstr &MI, MachineOperand &NonRegOp1, MachineOperand &NonRegOp2)
static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig)
static bool offsetsDoNotOverlap(LocationSize WidthA, int OffsetA, LocationSize WidthB, int OffsetB)
static unsigned getWWMRegSpillSaveOpcode(unsigned Size, bool IsVectorSuperClass)
static bool memOpsHaveSameBaseOperands(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getWWMRegSpillRestoreOpcode(unsigned Size, bool IsVectorSuperClass)
static bool setsSCCIfResultIsZero(const MachineInstr &Def, bool &NeedInversion, unsigned &NewDefOpc)
static bool isSCCDeadOnExit(MachineBasicBlock *MBB)
static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI, int64_t &Imm, MachineInstr **DefMI=nullptr)
static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize)
static unsigned subtargetEncodingFamily(const GCNSubtarget &ST)
static void preserveCondRegFlags(MachineOperand &CondReg, const MachineOperand &OrigCond)
static Register findImplicitSGPRRead(const MachineInstr &MI)
static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc)
static cl::opt< unsigned > BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)"))
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI, MachineInstr &NewMI)
static bool memOpsHaveSameBasePtr(const MachineInstr &MI1, ArrayRef< const MachineOperand * > BaseOps1, const MachineInstr &MI2, ArrayRef< const MachineOperand * > BaseOps2)
static unsigned getSGPRSpillRestoreOpcode(unsigned Size)
static bool isRegOrFI(const MachineOperand &MO)
static unsigned getSGPRSpillSaveOpcode(unsigned Size)
static constexpr AMDGPU::OpName ModifierOpNames[]
static unsigned getVGPRSpillSaveOpcode(unsigned Size)
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, const char *Msg="illegal VGPR to SGPR copy")
static MachineInstr * swapRegAndNonRegOperand(MachineInstr &MI, MachineOperand &RegOp, MachineOperand &NonRegOp)
static bool shouldReadExec(const MachineInstr &MI)
static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc)
static bool isRenamedInGFX9(int Opcode)
static TargetInstrInfo::RegSubRegPair getRegOrUndef(const MachineOperand &RegOpnd)
static bool changesVGPRIndexingMode(const MachineInstr &MI)
static bool isSubRegOf(const SIRegisterInfo &TRI, const MachineOperand &SuperVec, const MachineOperand &SubReg)
static bool foldableSelect(const MachineInstr &Def)
static bool nodesHaveSameOperandValue(SDNode *N0, SDNode *N1, AMDGPU::OpName OpName)
Returns true if both nodes have the same value for the given operand Op, or if both nodes do not have...
static unsigned getAVSpillSaveOpcode(unsigned Size)
static unsigned getNumOperandsNoGlue(SDNode *Node)
static bool canRemat(const MachineInstr &MI)
static unsigned getAVSpillRestoreOpcode(unsigned Size)
static unsigned getVGPRSpillRestoreOpcode(unsigned Size)
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
const unsigned CSelectOpc
static const LaneMaskConstants & get(const GCNSubtarget &ST)
const unsigned XorTermOpc
const unsigned OrSaveExecOpc
const unsigned AndSaveExecOpc
static LLVM_ABI Semantics SemanticsToEnum(const llvm::fltSemantics &Sem)
Class for arbitrary precision integers.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
uint64_t getZExtValue() const
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Diagnostic information for unsupported feature in backend.
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
void getExitingBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all blocks of this cycle that have successor outside of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Itinerary data supplied by a subtarget to be used by a target.
constexpr unsigned getAddressSpace() const
This is an important class for using LLVM in a threaded context.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasInterval(Register Reg) const
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LLVM_ABI bool shrinkToUses(LiveInterval *li, SmallVectorImpl< MachineInstr * > *dead=nullptr)
After removing some uses of a register, shrink its live range to just the remaining uses.
SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
This class represents the liveness of a register, stack slot, etc.
LLVM_ABI void replaceKillInstruction(Register Reg, MachineInstr &OldMI, MachineInstr &NewMI)
replaceKillInstruction - Update register kill info by replacing a kill instruction with a new one.
LLVM_ABI VarInfo & getVarInfo(Register Reg)
getVarInfo - Return the VarInfo structure for the specified VIRTUAL register.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createAShr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
unsigned getSize() const
Return the number of bytes in the encoding of this instruction, or zero if the encoding size cannot b...
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
unsigned getOpcode() const
Return the opcode number for this descriptor.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
int16_t RegClass
This specifies the register class enumeration of the operand if the operand is a register.
Wrapper class representing physical registers. Should be passed by value.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
LLVM_ABI void setVariableValue(const MCExpr *Value)
Helper class for constructing bundles of MachineInstrs.
MachineBasicBlock::instr_iterator begin() const
Return an iterator to the first bundled instruction.
MIBundleBuilder & append(MachineInstr *MI)
Insert MI into MBB by appending it to the instructions in the bundle.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineInstrBundleIterator< MachineInstr, true > reverse_iterator
Instructions::const_iterator const_instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
@ LQR_Dead
Register is known to be fully dead.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI void addImplicitDefUseOperands(MachineFunction &MF)
Add all implicit def and use operands to this instruction.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
void untieRegOperand(unsigned OpIdx)
Break any tie involving OpIdx.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI void eraseFromBundle()
Unlink 'this' from its basic block and delete it.
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
mop_range explicit_operands()
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol)
Set a symbol that will be emitted just after the instruction itself.
LLVM_ABI void clearRegisterKills(Register Reg, const TargetRegisterInfo *RegInfo)
Clear all kill flags affecting Reg.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
const GlobalValue * getGlobal() const
void setImplicit(bool Val=true)
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isTargetIndex() const
isTargetIndex - Tests if this is a MO_TargetIndex operand.
void setTargetFlags(unsigned F)
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_Register
Register operand.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool isFPImm() const
isFPImm - Tests if this is a MO_FPImmediate operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI void moveOperands(MachineOperand *Dst, MachineOperand *Src, unsigned NumOps)
Move NumOps operands from Src to Dst, updating use-def lists as needed.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
LLVM_ABI void clearVirtRegs()
clearVirtRegs - Remove all virtual registers (after physreg assignment).
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
void setSimpleHint(Register VReg, Register PrefReg)
Specify the preferred (target independent) register allocation hint for the specified virtual registe...
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
iterator_range< use_iterator > use_operands(Register Reg) const
LLVM_ABI void removeRegOperandFromUseList(MachineOperand *MO)
Remove MO from its use-def list.
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
LLVM_ABI void addRegOperandToUseList(MachineOperand *MO)
Add MO to the linked list of operands for its register.
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isLegalMUBUFImmOffset(unsigned Imm) const
bool isInlineConstant(const APInt &Imm) const
void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const
Fix operands in MI to satisfy constant bus requirements.
bool canAddToBBProlog(const MachineInstr &MI) const
static bool isDS(const MachineInstr &MI)
MachineBasicBlock * legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT=nullptr) const
Legalize all operands in this instruction.
bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0, int64_t &Offset1) const override
unsigned getLiveRangeSplitOpcode(Register Reg, const MachineFunction &MF) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const final
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
static bool isNeverUniform(const MachineInstr &MI)
bool isXDLWMMA(const MachineInstr &MI) const
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, MachineInstr *&CopyMI, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
bool isBasicBlockPrologue(const MachineInstr &MI, Register Reg=Register()) const override
bool isSpill(uint32_t Opcode) const
uint64_t getDefaultRsrcDataFormat() const
static bool isSOPP(const MachineInstr &MI)
bool mayAccessScratch(const MachineInstr &MI) const
bool isIGLP(unsigned Opcode) const
static bool isFLATScratch(const MachineInstr &MI)
const MCInstrDesc & getIndirectRegWriteMovRelPseudo(unsigned VecSize, unsigned EltSize, bool IsSGPR) const
MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg) const
Return a partially built integer add instruction without carry.
bool mayAccessFlatAddressSpace(const MachineInstr &MI) const
bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0, int64_t Offset1, unsigned NumLoads) const override
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const
Replace the instructions opcode with the equivalent VALU opcode.
static bool isSMRD(const MachineInstr &MI)
void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, SlotIndexes *Indexes=nullptr) const
bool usesConstantBus(const MachineRegisterInfo &MRI, const MachineOperand &MO, const MCOperandInfo &OpInfo) const
Returns true if this operand uses the constant bus.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getFoldableCopySrcIdx(const MachineInstr &MI)
unsigned getOpSize(uint32_t Opcode, unsigned OpNo) const
Return the size in bytes of the operand OpNo on the given.
void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isMTBUF(const MachineInstr &MI)
const MCInstrDesc & getIndirectGPRIDXPseudo(unsigned VecSize, bool IsIndirectSrc) const
void insertReturn(MachineBasicBlock &MBB) const
static bool isDGEMM(unsigned Opcode)
static bool isEXP(const MachineInstr &MI)
static bool isSALU(const MachineInstr &MI)
static bool setsSCCIfResultIsNonZero(const MachineInstr &MI)
const MIRFormatter * getMIRFormatter() const override
void legalizeGenericOperand(MachineBasicBlock &InsertMBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DstRC, MachineOperand &Op, MachineRegisterInfo &MRI, const DebugLoc &DL) const
MachineInstr * buildShrunkInst(MachineInstr &MI, unsigned NewOpcode) const
unsigned getInstBundleSize(const MachineInstr &MI) const
static bool isVOP2(const MachineInstr &MI)
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
static bool isSDWA(const MachineInstr &MI)
const MCInstrDesc & getKillTerminatorFromPseudo(unsigned Opcode) const
void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Quantity) const override
static bool isGather4(const MachineInstr &MI)
MachineInstr * getWholeWaveFunctionSetup(MachineFunction &MF) const
bool isLegalVSrcOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO would be a valid operand for the given operand definition OpInfo.
static bool isDOT(const MachineInstr &MI)
MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const override
bool hasModifiers(unsigned Opcode) const
Return true if this instruction has any modifiers.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
static bool isSWMMAC(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *II, const ScheduleDAGMI *DAG) const override
bool isHighLatencyDef(int Opc) const override
void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const
Legalize the OpIndex operand of this instruction by inserting a MOV.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isVOPC(const MachineInstr &MI)
void removeModOperands(MachineInstr &MI) const
std::pair< int64_t, int64_t > splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, uint64_t FlatVariant) const
Split COffsetVal into {immediate offset field, remainder offset} values.
unsigned getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
bool isXDL(const MachineInstr &MI) const
Register isStackAccess(const MachineInstr &MI, int &FrameIndex, TypeSize &MemBytes) const
static bool isVIMAGE(const MachineInstr &MI)
void enforceOperandRCAlignment(MachineInstr &MI, AMDGPU::OpName OpName) const
static bool isSOP2(const MachineInstr &MI)
static bool isGWS(const MachineInstr &MI)
bool isLegalAV64PseudoImm(uint64_t Imm) const
Check if this immediate value can be used for AV_MOV_B64_IMM_PSEUDO.
bool isNeverCoissue(MachineInstr &MI) const
static bool isBUF(const MachineInstr &MI)
void handleCopyToPhysHelper(SIInstrWorklist &Worklist, Register DstReg, MachineInstr &Inst, MachineRegisterInfo &MRI, DenseMap< MachineInstr *, V2PhysSCopyInfo > &WaterFalls, DenseMap< MachineInstr *, bool > &V2SPhyCopiesToErase) const
bool hasModifiersSet(const MachineInstr &MI, AMDGPU::OpName OpName) const
const TargetRegisterClass * getPreferredSelectRegClass(unsigned Size) const
bool isLegalToSwap(const MachineInstr &MI, unsigned fromIdx, unsigned toIdx) const
static bool isFLATGlobal(const MachineInstr &MI)
bool isGlobalMemoryObject(const MachineInstr *MI) const override
static bool isVSAMPLE(const MachineInstr &MI)
bool isBufferSMRD(const MachineInstr &MI) const
static bool isKillTerminator(unsigned Opcode)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Reg, bool IsSCCLive, SlotIndexes *Indexes=nullptr) const
bool hasVALU32BitEncoding(unsigned Opcode) const
Return true if this 64-bit VALU instruction has a 32-bit encoding.
unsigned getMovOpcode(const TargetRegisterClass *DstRC) const
Register isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex, TypeSize &MemBytes) const
unsigned buildExtractSubReg(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const
Legalize operands in MI by either commuting it or inserting a copy of src1.
bool foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const final
static bool isTRANS(const MachineInstr &MI)
static bool isImage(const MachineInstr &MI)
static bool isSOPK(const MachineInstr &MI)
const TargetRegisterClass * getOpRegClass(const MachineInstr &MI, unsigned OpNo) const
Return the correct register class for OpNo.
MachineBasicBlock * insertSimulatedTrap(MachineRegisterInfo &MRI, MachineBasicBlock &MBB, MachineInstr &MI, const DebugLoc &DL) const
Build instructions that simulate the behavior of a s_trap 2 instructions for hardware (namely,...
static unsigned getNonSoftWaitcntOpcode(unsigned Opcode)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static bool isFoldableCopy(const MachineInstr &MI)
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const
bool isIgnorableUse(const MachineOperand &MO) const override
static bool isMUBUF(const MachineInstr &MI)
bool expandPostRAPseudo(MachineInstr &MI) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
void createWaterFallForSiCall(MachineInstr *MI, MachineDominatorTree *MDT, ArrayRef< MachineOperand * > ScalarOps, ArrayRef< Register > PhySGPRs={}) const
Wrapper function for generating waterfall for instruction MI This function take into consideration of...
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
bool isReMaterializableImpl(const MachineInstr &MI) const override
static bool isVOP3(const MCInstrDesc &Desc)
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool physRegUsesConstantBus(const MachineOperand &Reg) const
static bool isF16PseudoScalarTrans(unsigned Opcode)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const
static bool isDPP(const MachineInstr &MI)
bool analyzeBranchImpl(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const
static bool isMFMA(const MachineInstr &MI)
bool isLowLatencyInstruction(const MachineInstr &MI) const
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
void mutateAndCleanupImplicit(MachineInstr &MI, const MCInstrDesc &NewDesc) const
ValueUniformity getGenericValueUniformity(const MachineInstr &MI) const
static bool isMAI(const MCInstrDesc &Desc)
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, LaneBitmask UsedLanes=LaneBitmask::getAll()) const override
static bool usesLGKM_CNT(const MachineInstr &MI)
void legalizeOperandsVALUt16(MachineInstr &Inst, MachineRegisterInfo &MRI) const
Fix operands in Inst to fix 16bit SALU to VALU lowering.
bool isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, const MachineOperand &MO) const
bool canShrink(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
const MachineOperand & getCalleeOperand(const MachineInstr &MI) const override
bool isAsmOnlyOpcode(int MCOp) const
Check if this instruction should only be used by assembler.
bool isAlwaysGDS(uint32_t Opcode) const
static bool isVGPRSpill(const MachineInstr &MI)
ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override
This is used by the post-RA scheduler (SchedulePostRAList.cpp).
bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override
bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, uint64_t FlatVariant) const
Returns if Offset is legal for the subtarget as the offset to a FLAT encoded instruction with the giv...
unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, unsigned *PredCost=nullptr) const override
int64_t getNamedImmOperand(const MachineInstr &MI, AMDGPU::OpName OperandName) const
Get required immediate operand.
ArrayRef< std::pair< int, const char * > > getSerializableTargetIndices() const override
bool regUsesConstantBus(const MachineOperand &Reg, const MachineRegisterInfo &MRI) const
static bool isMIMG(const MachineInstr &MI)
MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI, const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC, unsigned SubIdx, const TargetRegisterClass *SubRC) const
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
bool isLegalRegOperand(const MachineRegisterInfo &MRI, const MCOperandInfo &OpInfo, const MachineOperand &MO) const
Check if MO (a register operand) is a legal register for the given operand description or operand ind...
bool allowNegativeFlatOffset(uint64_t FlatVariant) const
Returns true if negative offsets are allowed for the given FlatVariant.
static unsigned getNumWaitStates(const MachineInstr &MI)
Return the number of wait states that result from executing this instruction.
unsigned getVectorRegSpillSaveOpcode(Register Reg, const TargetRegisterClass *RC, unsigned Size, const SIMachineFunctionInfo &MFI) const
unsigned getVALUOp(const MachineInstr &MI) const
static bool modifiesModeRegister(const MachineInstr &MI)
Return true if the instruction modifies the mode register.q.
Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI, MachineRegisterInfo &MRI, const TargetRegisterClass *DstRC=nullptr) const
Copy a value from a VGPR (SrcReg) to SGPR.
bool hasDivergentBranch(const MachineBasicBlock *MBB) const
Return whether the block terminate with divergent branch.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
void fixImplicitOperands(MachineInstr &MI) const
bool moveFlatAddrToVGPR(MachineInstr &Inst) const
Change SADDR form of a FLAT Inst to its VADDR form if saddr operand was moved to VGPR.
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
void createReadFirstLaneFromCopyToPhysReg(MachineRegisterInfo &MRI, Register DstReg, MachineInstr &Inst) const
bool swapSourceModifiers(MachineInstr &MI, MachineOperand &Src0, AMDGPU::OpName Src0OpName, MachineOperand &Src1, AMDGPU::OpName Src1OpName) const
Register insertNE(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
This function is used to determine if an instruction can be safely executed under EXEC = 0 without ha...
bool getConstValDefinedInReg(const MachineInstr &MI, const Register Reg, int64_t &ImmVal) const override
static bool isAtomic(const MachineInstr &MI)
bool canInsertSelect(const MachineBasicBlock &MBB, ArrayRef< MachineOperand > Cond, Register DstReg, Register TrueReg, Register FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const override
bool isLiteralOperandLegal(const MCInstrDesc &InstDesc, const MCOperandInfo &OpInfo) const
static bool isWWMRegSpillOpcode(uint32_t Opcode)
static bool sopkIsZext(unsigned Opcode)
static bool isSGPRSpill(const MachineInstr &MI)
static bool isWMMA(const MachineInstr &MI)
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
MachineInstr * convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override
bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
Returns true if the instruction could potentially depend on the value of exec.
void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
void insertVectorSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
std::pair< MachineInstr *, MachineInstr * > expandMovDPP64(MachineInstr &MI) const
Register insertEQ(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SrcReg, int Value) const
static bool isSOPC(const MachineInstr &MI)
static bool isFLAT(const MachineInstr &MI)
static bool isVALU(const MachineInstr &MI)
bool isBarrier(unsigned Opcode) const
MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override
int pseudoToMCOpcode(int Opcode) const
Return a target-specific opcode if Opcode is a pseudo instruction.
const MCInstrDesc & getMCOpcodeFromPseudo(unsigned Opcode) const
Return the descriptor of the target-specific machine instruction that corresponds to the specified ps...
bool isLegalGFX12PlusPackedMathFP32Operand(const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN, const MachineOperand *MO=nullptr) const
Check if MO would be a legal operand for gfx12+ packed math FP32 instructions.
static bool usesVM_CNT(const MachineInstr &MI)
MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const override
static bool isFixedSize(const MachineInstr &MI)
bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo, MachineCycleInfo *CI) const override
LLVM_READONLY int commuteOpcode(unsigned Opc) const
ValueUniformity getValueUniformity(const MachineInstr &MI) const final
uint64_t getScratchRsrcWords23() const
LLVM_READONLY MachineOperand * getNamedOperand(MachineInstr &MI, AMDGPU::OpName OperandName) const
Returns the operand named Op.
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO=nullptr) const
Check if MO is a legal operand if it was the OpIdx Operand for MI.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
std::optional< int64_t > getImmOrMaterializedImm(MachineOperand &Op) const
void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT, MachineInstr &Inst, DenseMap< MachineInstr *, V2PhysSCopyInfo > &WaterFalls, DenseMap< MachineInstr *, bool > &V2SPhyCopiesToErase) const
static bool isLDSDMA(const MachineInstr &MI)
static bool isVOP1(const MachineInstr &MI)
SIInstrInfo(const GCNSubtarget &ST)
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
bool hasAnyModifiersSet(const MachineInstr &MI) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getLongBranchReservedReg() const
bool isWholeWaveFunction() const
Register getStackPtrOffsetReg() const
unsigned getMaxMemoryClusterDWords() const
void setHasSpilledVGPRs(bool Spill=true)
bool isWWMReg(Register Reg) const
bool checkFlag(Register Reg, uint8_t Flag) const
void setHasSpilledSGPRs(bool Spill=true)
unsigned getScratchReservedForDynamicVGPRs() const
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
unsigned getHWRegIndex(MCRegister Reg) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
unsigned getChannelFromSubReg(unsigned SubReg) const
static bool isAGPRClass(const TargetRegisterClass *RC)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAGMI *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual MachineInstr * createPHIDestinationCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual bool isReMaterializableImpl(const MachineInstr &MI) const
For instructions with opcodes for which the M_REMATERIALIZABLE flag is set, this hook lets the target...
virtual const MachineOperand & getCalleeOperand(const MachineInstr &MI) const
Returns the callee operand from the given MI.
virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, unsigned SubIdx, const MachineInstr &Orig, LaneBitmask UsedLanes=LaneBitmask::getAll()) const
Re-issue the specified 'original' instruction at the specific location targeting a new destination re...
virtual MachineInstr * createPHISourceCopy(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt, const DebugLoc &DL, Register Src, unsigned SrcSubReg, Register Dst) const
During PHI eleimination lets target to make necessary checks and insert the copy to the PHI destinati...
virtual MachineInstr * commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx1, unsigned OpIdx2) const
This method commutes the operands of the given machine instruction MI.
virtual bool isGlobalMemoryObject(const MachineInstr *MI) const
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
virtual bool expandPostRAPseudo(MachineInstr &MI) const
This function is called for all pseudo instructions that remain after register allocation.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
const uint64_t RSRC_DATA_FORMAT
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool getWMMAIsXDL(unsigned Opc)
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
LLVM_READONLY int32_t getCommuteRev(uint32_t Opcode)
LLVM_READONLY int32_t getCommuteOrig(uint32_t Opcode)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
LLVM_READONLY int32_t getGlobalVaddrOp(uint32_t Opcode)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
LLVM_READONLY int32_t getMFMAEarlyClobberOp(uint32_t Opcode)
bool getMAIIsGFX940XDL(unsigned Opc)
const uint64_t RSRC_ELEMENT_SIZE_SHIFT
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY int32_t getIfAddr64Inst(uint32_t Opcode)
Check if Opcode is an Addr64 opcode.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const uint64_t RSRC_TID_ENABLE
LLVM_READONLY int32_t getVOPe32(uint32_t Opcode)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
bool isGenericAtomic(unsigned Opc)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
LLVM_READONLY int32_t getAddr64Inst(uint32_t Opcode)
int32_t getMCOpcode(uint32_t Opcode, unsigned Gen)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_BF16
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
@ OPERAND_REG_IMM_V2FP16_SPLAT
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_IMM_V2INT32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_INLINE_C_AV64_PSEUDO
@ OPERAND_REG_INLINE_AC_FP64
@ OPERAND_REG_INLINE_C_FP16
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
LLVM_READONLY int32_t getBasicFromSDWAOp(uint32_t Opcode)
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
const uint64_t RSRC_INDEX_STRIDE_SHIFT
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
LLVM_READONLY int32_t getFlatScratchInstSVfromSS(uint32_t Opcode)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI, unsigned SubReg)
Return the SubReg component from REG_SEQUENCE.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
MachineInstr * getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, const MachineRegisterInfo &MRI)
Return the defining instruction for a given reg:subreg pair skipping copy like instructions and subre...
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
static const MachineMemOperand::Flags MOCooperative
Mark the MMO of cooperative load/store atomics.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
bool isTargetSpecificOpcode(unsigned Opcode)
Check whether the given Opcode is a target-specific opcode.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned DefaultMemoryClusterDWordsLimit
constexpr unsigned BitWidth
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
constexpr RegState getUndefRegState(bool B)
ValueUniformity
Enum describing how values behave with respect to uniformity and divergence, to answer the question: ...
@ AlwaysUniform
The result value is always uniform.
@ NeverUniform
The result value can never be assumed to be uniform.
@ Default
The result value is uniform if and only if all operands are uniform.
MachineCycleInfo::CycleT MachineCycle
static const MachineMemOperand::Flags MOThreadPrivate
Mark the MMO of accesses to memory locations that are never written to by other threads.
bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI)
Return false if EXEC is not changed between the def of VReg at DefMI and all its uses.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Helper struct for the implementation of 3-address conversion to communicate updates made to instructi...
MachineInstr * RemoveMIUse
Other instruction whose def is no longer used by the converted instruction.
static constexpr uint64_t encode(Fields... Values)
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
constexpr bool all() const
SparseBitVector AliveBlocks
AliveBlocks - Set of blocks in which this value is alive completely through.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Utility to store machine instructions worklist.
MachineInstr * top() const
bool isDeferred(MachineInstr *MI)
SetVector< MachineInstr * > & getDeferredList()
void insert(MachineInstr *MI)
A pair composed of a register and a sub-register index.
VirtRegInfo - Information about a virtual register used by a set of operands.
bool Reads
Reads - One of the operands read the virtual register.
bool Writes
Writes - One of the operands writes the virtual register.