84#include "llvm/IR/IntrinsicsAMDGPU.h"
86#define GET_TARGET_REGBANK_IMPL
87#include "AMDGPUGenRegisterBank.inc"
90#include "AMDGPUGenRegisterBankInfo.def"
93using namespace MIPatternMatch;
108 : RBI(RBI_),
MRI(MRI_), NewBank(RB) {}
110 ~ApplyRegBankMapping() {
117 const unsigned Opc =
MI.getOpcode();
118 if (Opc == AMDGPU::G_ANYEXT || Opc == AMDGPU::G_ZEXT ||
119 Opc == AMDGPU::G_SEXT) {
126 if (SrcBank == &AMDGPU::VCCRegBank) {
130 assert(NewBank == &AMDGPU::VGPRRegBank);
135 auto True =
B.buildConstant(S32, Opc == AMDGPU::G_SEXT ? -1 : 1);
136 auto False =
B.buildConstant(S32, 0);
137 B.buildSelect(DstReg, SrcReg, True, False);
138 MRI.setRegBank(True.getReg(0), *NewBank);
139 MRI.setRegBank(False.getReg(0), *NewBank);
140 MI.eraseFromParent();
143 assert(!
MRI.getRegClassOrRegBank(DstReg));
144 MRI.setRegBank(DstReg, *NewBank);
149 if (Opc == AMDGPU::G_TRUNC) {
152 assert(DstBank != &AMDGPU::VCCRegBank);
162 if (Reg.isPhysical() ||
MRI.getRegClassOrRegBank(Reg))
167 assert(NewBank == &AMDGPU::VGPRRegBank &&
168 "s1 operands should only be used for vector bools");
169 assert((
MI.getOpcode() != AMDGPU::G_TRUNC &&
170 MI.getOpcode() != AMDGPU::G_ANYEXT) &&
171 "not expecting legalization artifacts here");
172 RB = &AMDGPU::VCCRegBank;
175 MRI.setRegBank(Reg, *RB);
197 : Subtarget(ST),
TRI(Subtarget.getRegisterInfo()),
198 TII(Subtarget.getInstrInfo()) {
203 static auto InitializeRegisterBankOnce = [
this]() {
205 &
getRegBank(AMDGPU::VGPRRegBankID) == &AMDGPU::VGPRRegBank &&
206 &
getRegBank(AMDGPU::AGPRRegBankID) == &AMDGPU::AGPRRegBank);
210 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
214 unsigned BankID = Bank.
getID();
215 return BankID == AMDGPU::VGPRRegBankID || BankID == AMDGPU::AGPRRegBankID;
220 unsigned Size)
const {
222 if (Dst.getID() == AMDGPU::SGPRRegBankID &&
224 return std::numeric_limits<unsigned>::max();
235 (Dst.getID() == AMDGPU::SGPRRegBankID) &&
237 Src.getID() == AMDGPU::SGPRRegBankID ||
238 Src.getID() == AMDGPU::VCCRegBankID))
239 return std::numeric_limits<unsigned>::max();
242 if (Dst.getID() == AMDGPU::AGPRRegBankID &&
243 Src.getID() == AMDGPU::AGPRRegBankID)
277 if (&RC == &AMDGPU::SReg_1RegClass)
278 return AMDGPU::VCCRegBank;
287 return AMDGPU::SGPRRegBank;
289 return Ty ==
LLT::scalar(1) ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
292 return TRI->
isAGPRClass(&RC) ? AMDGPU::AGPRRegBank : AMDGPU::VGPRRegBank;
295template <
unsigned NumOps>
299 const std::array<unsigned, NumOps> RegSrcOpIdx,
306 unsigned Sizes[NumOps];
307 for (
unsigned I = 0;
I < NumOps; ++
I) {
308 Register Reg =
MI.getOperand(RegSrcOpIdx[
I]).getReg();
312 for (
unsigned I = 0,
E =
MI.getNumExplicitDefs();
I !=
E; ++
I) {
314 Operands[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
318 unsigned MappingID = 2;
319 for (
const auto &Entry : Table) {
320 for (
unsigned I = 0;
I < NumOps; ++
I) {
321 int OpIdx = RegSrcOpIdx[
I];
322 Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[
I], Sizes[
I]);
336 switch (
MI.getIntrinsicID()) {
337 case Intrinsic::amdgcn_readlane: {
340 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
343 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
346 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
347 return addMappingFromTable<3>(
MI,
MRI, RegSrcOpIdx, Table);
349 case Intrinsic::amdgcn_writelane: {
352 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
355 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
358 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
361 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
365 const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };
366 return addMappingFromTable<4>(
MI,
MRI, RegSrcOpIdx, Table);
377 switch (
MI.getIntrinsicID()) {
378 case Intrinsic::amdgcn_s_buffer_load: {
381 { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
384 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },
387 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
390 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }
394 const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
395 return addMappingFromTable<2>(
MI,
MRI, RegSrcOpIdx, Table);
397 case Intrinsic::amdgcn_ds_ordered_add:
398 case Intrinsic::amdgcn_ds_ordered_swap: {
402 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
405 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
408 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
409 return addMappingFromTable<3>(
MI,
MRI, RegSrcOpIdx, Table);
411 case Intrinsic::amdgcn_s_sendmsg:
412 case Intrinsic::amdgcn_s_sendmsghalt: {
416 { { AMDGPU::SGPRRegBankID }, 1 },
419 { { AMDGPU::VGPRRegBankID }, 3 }
422 const std::array<unsigned, 1> RegSrcOpIdx = { { 2 } };
423 return addMappingFromTable<1>(
MI,
MRI, RegSrcOpIdx, Table);
433 if (!
MI.hasOneMemOperand())
461 switch (
MI.getOpcode()) {
462 case TargetOpcode::G_CONSTANT:
463 case TargetOpcode::G_IMPLICIT_DEF: {
467 { { AMDGPU::VGPRRegBankID }, 1 },
468 { { AMDGPU::SGPRRegBankID }, 1 },
469 { { AMDGPU::VCCRegBankID }, 1 }
472 return addMappingFromTable<1>(
MI,
MRI, {{ 0 }}, Table);
477 case TargetOpcode::G_FCONSTANT:
478 case TargetOpcode::G_FRAME_INDEX:
479 case TargetOpcode::G_GLOBAL_VALUE: {
481 { { AMDGPU::VGPRRegBankID }, 1 },
482 { { AMDGPU::SGPRRegBankID }, 1 }
485 return addMappingFromTable<1>(
MI,
MRI, {{ 0 }}, Table);
487 case TargetOpcode::G_AND:
488 case TargetOpcode::G_OR:
489 case TargetOpcode::G_XOR: {
496 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
497 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
498 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32)}),
504 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size),
505 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size),
506 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size)}),
517 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
518 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
519 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size)}),
525 {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
526 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
527 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size)}),
532 case TargetOpcode::G_LOAD:
533 case TargetOpcode::G_ZEXTLOAD:
534 case TargetOpcode::G_SEXTLOAD: {
536 LLT PtrTy =
MRI.getType(
MI.getOperand(1).getReg());
545 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
546 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize)}),
554 {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
555 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}),
568 case TargetOpcode::G_SELECT: {
572 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
573 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
574 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size)}),
580 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
581 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
582 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size)}),
588 case TargetOpcode::G_UADDE:
589 case TargetOpcode::G_USUBE:
590 case TargetOpcode::G_SADDE:
591 case TargetOpcode::G_SSUBE: {
595 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
596 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
597 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
598 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
599 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1)}),
605 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
606 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
607 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
608 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
613 case AMDGPU::G_BRCOND: {
614 assert(
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() == 1);
619 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
nullptr}),
625 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
nullptr }),
630 case AMDGPU::G_INTRINSIC:
632 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
647 Register LoLHS =
MRI->createGenericVirtualRegister(HalfTy);
648 Register HiLHS =
MRI->createGenericVirtualRegister(HalfTy);
650 MRI->setRegBank(LoLHS, *Bank);
651 MRI->setRegBank(HiLHS, *Bank);
656 B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
667 MRI.setType(Reg, NewTy);
687 LLT Ty =
MRI.getType(Src);
690 if (Bank == &AMDGPU::SGPRRegBank)
696 if (Bank != &AMDGPU::VGPRRegBank) {
698 Src =
B.buildCopy(Ty, Src).getReg(0);
699 MRI.setRegBank(Src, AMDGPU::VGPRRegBank);
703 unsigned NumParts = Bits / 32;
710 auto Unmerge =
B.buildUnmerge(S32, Src);
711 for (
unsigned i = 0; i < NumParts; ++i)
715 for (
unsigned i = 0; i < NumParts; ++i) {
717 Register DstPart =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
718 MRI.setType(DstPart, NumParts == 1 ? Ty : S32);
723 assert(Constrained &&
"Failed to constrain readfirstlane src reg");
725 B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart});
733 Register Dst =
B.buildMergeLikeInstr(Ty, DstParts).getReg(0);
734 MRI.setRegBank(Dst, AMDGPU::SGPRRegBank);
770 const unsigned MovExecOpc =
772 const unsigned MovExecTermOpc =
776 AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
778 AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
780 AMDGPU::EXEC_LO : AMDGPU::EXEC;
783 const int OrigRangeSize = std::distance(Range.begin(), Range.end());
786 Register SaveExecReg =
MRI.createVirtualRegister(WaveRC);
787 Register InitSaveExecReg =
MRI.createVirtualRegister(WaveRC);
790 B.buildInstr(TargetOpcode::IMPLICIT_DEF)
791 .addDef(InitSaveExecReg);
793 Register PhiExec =
MRI.createVirtualRegister(WaveRC);
794 Register NewExec =
MRI.createVirtualRegister(WaveRC);
820 B.setInsertPt(*LoopBB, LoopBB->
end());
822 B.buildInstr(TargetOpcode::PHI)
824 .addReg(InitSaveExecReg)
839 auto NewEnd = BodyBB->
end();
846 assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);
850 if (!Op.isReg() || Op.isDef())
854 if (!SGPROperandRegs.
count(OldReg))
859 auto OldVal = WaterfalledRegMap.
find(OldReg);
860 if (OldVal != WaterfalledRegMap.
end()) {
861 Op.setReg(OldVal->second);
866 LLT OpTy =
MRI.getType(OpReg);
869 if (OpBank != &AMDGPU::VGPRRegBank) {
872 OpReg =
B.buildCopy(OpTy, OpReg).getReg(0);
873 MRI.setRegBank(OpReg, AMDGPU::VGPRRegBank);
881 bool Is64 = OpSize % 64 == 0;
882 unsigned PartSize = Is64 ? 64 : 32;
884 unsigned NumParts = OpSize / PartSize;
890 CurrentLaneParts.
push_back(CurrentLaneReg);
892 auto UnmergeOp =
B.buildUnmerge(PartTy, OpReg);
893 auto UnmergeCurrentLane =
B.buildUnmerge(PartTy, CurrentLaneReg);
894 for (
unsigned i = 0; i < NumParts; ++i) {
896 CurrentLaneParts.
push_back(UnmergeCurrentLane.getReg(i));
897 MRI.setRegBank(OpParts[i], AMDGPU::VGPRRegBank);
898 MRI.setRegBank(CurrentLaneParts[i], AMDGPU::SGPRRegBank);
902 for (
unsigned i = 0; i < NumParts; ++i) {
904 OpParts[i]).getReg(0);
905 MRI.setRegBank(CmpReg, AMDGPU::VCCRegBank);
910 CondReg =
B.buildAnd(S1, CondReg, CmpReg).getReg(0);
911 MRI.setRegBank(CondReg, AMDGPU::VCCRegBank);
915 Op.setReg(CurrentLaneReg);
918 WaterfalledRegMap.
insert(std::pair(OldReg, Op.getReg()));
923 CondReg =
B.buildIntrinsic(Intrinsic::amdgcn_ballot,
928 MRI.setRegClass(CondReg, WaveRC);
931 B.buildInstr(AndSaveExecOpc)
935 MRI.setSimpleHint(NewExec, CondReg);
937 B.setInsertPt(*BodyBB, BodyBB->
end());
940 B.buildInstr(XorTermOpc)
949 B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
956 B.setMBB(*RestoreExecBB);
957 B.buildInstr(MovExecTermOpc)
959 .addReg(SaveExecReg);
963 B.setInsertPt(*RemainderBB, RemainderBB->
begin());
975 for (
unsigned Op : OpIndices) {
979 if (OpBank->
getID() != AMDGPU::SGPRRegBankID)
980 SGPROperandRegs.
insert(Reg);
984 return !SGPROperandRegs.
empty();
999 SGPROperandRegs,
MRI);
1012 Register Reg =
MI.getOperand(OpIdx).getReg();
1014 if (Bank == &AMDGPU::SGPRRegBank)
1020 MI.getOperand(OpIdx).setReg(Reg);
1032 assert(FirstSize % EltSize == 0);
1034 unsigned FirstPartNumElts = FirstSize / EltSize;
1035 unsigned RemainderElts = (TotalSize - FirstSize) / EltSize;
1054 const LLT LoadTy =
MRI.getType(DstReg);
1056 const unsigned MaxNonSmrdLoadSize = 128;
1060 if (DstBank == &AMDGPU::SGPRRegBank) {
1063 if (LoadSize != 32 && LoadSize != 96)
1067 const unsigned MemSize = 8 * MMO->
getSize();
1072 if (LoadSize == 32 &&
1078 ApplyRegBankMapping O(*
this,
MRI, &AMDGPU::SGPRRegBank);
1081 if (LoadSize == 32) {
1085 if (
MI.getOpcode() == AMDGPU::G_SEXTLOAD) {
1087 auto WideLoad =
B.buildLoadFromOffset(S32, PtrReg, *MMO, 0);
1088 B.buildSExtInReg(
MI.getOperand(0), WideLoad, MemSize);
1089 }
else if (
MI.getOpcode() == AMDGPU::G_ZEXTLOAD) {
1091 auto WideLoad =
B.buildLoadFromOffset(S32, PtrReg, *MMO, 0);
1092 B.buildZExtInReg(
MI.getOperand(0), WideLoad, MemSize);
1095 B.buildLoadFromOffset(
MI.getOperand(0), PtrReg, *MMO, 0);
1101 ApplyRegBankMapping ApplyBank(*
this,
MRI, DstBank);
1112 auto WideLoad =
B.buildLoadFromOffset(WiderTy, PtrReg, *MMO, 0);
1114 B.buildTrunc(
MI.getOperand(0), WideLoad);
1116 B.buildDeleteTrailingVectorElements(
MI.getOperand(0).getReg(),
1122 MI.eraseFromParent();
1127 if (LoadSize <= MaxNonSmrdLoadSize)
1133 if (SrcRegs.
empty())
1136 assert(LoadSize % MaxNonSmrdLoadSize == 0);
1141 LLT PtrTy =
MRI.getType(
MI.getOperand(1).getReg());
1142 MRI.setType(BasePtrReg, PtrTy);
1144 unsigned NumSplitParts = LoadTy.
getSizeInBits() / MaxNonSmrdLoadSize;
1145 const LLT LoadSplitTy = LoadTy.
divide(NumSplitParts);
1146 ApplyRegBankMapping Observer(*
this,
MRI, &AMDGPU::VGPRRegBank);
1158 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
1168 const auto &TFI = *ST.getFrameLowering();
1176 Register AllocSize =
MI.getOperand(1).getReg();
1182 if (SizeBank != &AMDGPU::SGPRRegBank)
1185 LLT PtrTy =
MRI.getType(Dst);
1190 ApplyRegBankMapping ApplyBank(*
this,
MRI, &AMDGPU::SGPRRegBank);
1193 auto WaveSize =
B.buildConstant(
LLT::scalar(32), ST.getWavefrontSizeLog2());
1194 auto ScaledSize =
B.buildShl(IntPtrTy, AllocSize, WaveSize);
1196 auto SPCopy =
B.buildCopy(PtrTy, SPReg);
1197 if (Alignment > TFI.getStackAlign()) {
1198 auto PtrAdd =
B.buildPtrAdd(PtrTy, SPCopy, ScaledSize);
1199 B.buildMaskLowPtrBits(Dst, PtrAdd,
1200 Log2(Alignment) + ST.getWavefrontSizeLog2());
1202 B.buildPtrAdd(Dst, SPCopy, ScaledSize);
1205 MI.eraseFromParent();
1212 const int NumDefs =
MI.getNumExplicitDefs();
1216 RsrcIdx += NumDefs + 1;
1223 for (
int I = NumDefs, NumOps =
MI.getNumOperands();
I != NumOps; ++
I) {
1224 if (!
MI.getOperand(
I).isReg())
1228 if (
I == RsrcIdx ||
I == RsrcIdx + 1)
1240 Register &SOffsetReg, int64_t &InstOffsetVal,
Align Alignment)
const {
1244 if (std::optional<int64_t> Imm =
1248 VOffsetReg =
B.buildConstant(S32, 0).getReg(0);
1249 SOffsetReg =
B.buildConstant(S32, SOffset).getReg(0);
1250 InstOffsetVal = ImmOffset;
1252 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1253 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1254 return SOffset + ImmOffset;
1269 SOffsetReg =
B.buildConstant(S32, SOffset).getReg(0);
1270 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1271 InstOffsetVal = ImmOffset;
1277 VOffsetReg =
B.buildConstant(S32, 0).getReg(0);
1278 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1280 InstOffsetVal = ImmOffset;
1294 if (Src0Bank == &AMDGPU::VGPRRegBank && Src1Bank == &AMDGPU::SGPRRegBank) {
1300 if (Src0Bank == &AMDGPU::SGPRRegBank && Src1Bank == &AMDGPU::VGPRRegBank) {
1310 VOffsetReg = CombinedOffset;
1312 VOffsetReg =
B.buildCopy(S32, CombinedOffset).getReg(0);
1313 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1316 SOffsetReg =
B.buildConstant(S32, 0).getReg(0);
1317 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1328 LLT Ty =
MRI.getType(Dst);
1334 if (RSrcBank == &AMDGPU::SGPRRegBank &&
1335 OffsetBank == &AMDGPU::SGPRRegBank)
1343 if (LoadSize == 256 || LoadSize == 512) {
1344 NumLoads = LoadSize / 128;
1345 Ty = Ty.
divide(NumLoads);
1350 const Align Alignment = NumLoads > 1 ?
Align(16 * NumLoads) :
Align(1);
1357 int64_t ImmOffset = 0;
1360 SOffset, ImmOffset, Alignment);
1365 const Align MemAlign(4);
1378 Register VIndex =
B.buildConstant(S32, 0).getReg(0);
1379 B.getMRI()->setRegBank(VIndex, AMDGPU::VGPRRegBank);
1386 for (
int i = 0; i < NumLoads; ++i) {
1387 if (NumLoads == 1) {
1390 LoadParts[i] =
MRI.createGenericVirtualRegister(Ty);
1391 MRI.setRegBank(LoadParts[i], AMDGPU::VGPRRegBank);
1398 B.buildInstr(AMDGPU::G_AMDGPU_BUFFER_LOAD)
1399 .addDef(LoadParts[i])
1404 .addImm(ImmOffset + 16 * i)
1407 .addMemOperand(MMO);
1413 if (RSrcBank != &AMDGPU::SGPRRegBank) {
1416 B.setInstr(*Span.
begin());
1417 MI.eraseFromParent();
1421 OpsToWaterfall.
insert(RSrc);
1423 OpsToWaterfall,
MRI);
1426 if (NumLoads != 1) {
1428 B.buildConcatVectors(Dst, LoadParts);
1430 B.buildMergeLikeInstr(Dst, LoadParts);
1434 if (RSrcBank == &AMDGPU::SGPRRegBank)
1435 MI.eraseFromParent();
1449 LLT Ty =
MRI.getType(DstReg);
1453 unsigned FirstOpnd =
MI.getOpcode() == AMDGPU::G_INTRINSIC ? 2 : 1;
1454 Register SrcReg =
MI.getOperand(FirstOpnd).getReg();
1455 Register OffsetReg =
MI.getOperand(FirstOpnd + 1).getReg();
1456 Register WidthReg =
MI.getOperand(FirstOpnd + 2).getReg();
1460 if (DstBank == &AMDGPU::VGPRRegBank) {
1466 ApplyRegBankMapping ApplyBank(*
this,
MRI, &AMDGPU::VGPRRegBank);
1471 auto ShiftOffset =
Signed ?
B.buildAShr(S64, SrcReg, OffsetReg)
1472 :
B.buildLShr(S64, SrcReg, OffsetReg);
1473 auto UnmergeSOffset =
B.buildUnmerge({S32, S32}, ShiftOffset);
1480 auto Zero =
B.buildConstant(S32, 0);
1481 auto WidthImm = ConstWidth->Value.getZExtValue();
1482 if (WidthImm <= 32) {
1486 Signed ?
B.buildSbfx(S32, UnmergeSOffset.getReg(0), Zero, WidthReg)
1487 :
B.buildUbfx(S32, UnmergeSOffset.getReg(0), Zero, WidthReg);
1489 Signed ?
B.buildAShr(S32, Extract,
B.buildConstant(S32, 31)) : Zero;
1490 B.buildMergeLikeInstr(DstReg, {Extract, Extend});
1494 auto UpperWidth =
B.buildConstant(S32, WidthImm - 32);
1497 ?
B.buildSbfx(S32, UnmergeSOffset.getReg(1), Zero, UpperWidth)
1498 :
B.buildUbfx(S32, UnmergeSOffset.getReg(1), Zero, UpperWidth);
1499 B.buildMergeLikeInstr(DstReg, {UnmergeSOffset.getReg(0), Extract});
1501 MI.eraseFromParent();
1507 auto ExtShift =
B.buildSub(S32,
B.buildConstant(S32, 64), WidthReg);
1508 auto SignBit =
B.buildShl(S64, ShiftOffset, ExtShift);
1510 B.buildAShr(S64, SignBit, ExtShift);
1512 B.buildLShr(S64, SignBit, ExtShift);
1513 MI.eraseFromParent();
1519 ApplyRegBankMapping ApplyBank(*
this,
MRI, &AMDGPU::SGPRRegBank);
1523 auto OffsetMask =
B.buildConstant(S32, maskTrailingOnes<unsigned>(6));
1524 auto ClampOffset =
B.buildAnd(S32, OffsetReg, OffsetMask);
1527 auto ShiftWidth =
B.buildShl(S32, WidthReg,
B.buildConstant(S32, 16));
1532 auto MergedInputs =
B.buildOr(S32, ClampOffset, ShiftWidth);
1536 unsigned Opc = Ty == S32 ? (
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
1537 (
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1539 auto MIB =
B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs});
1543 MI.eraseFromParent();
1561 if (
MRI.getRegBankOrNull(Src0) == &AMDGPU::VGPRRegBank)
1564 bool IsUnsigned =
MI.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
1568 bool DstOnValu =
MRI.getRegBankOrNull(Src2) == &AMDGPU::VGPRRegBank;
1569 bool Accumulate =
true;
1580 Register DstLo =
B.buildMul(S32, Src0, Src1).getReg(0);
1581 bool MulHiInVgpr =
false;
1583 MRI.setRegBank(DstLo, AMDGPU::SGPRRegBank);
1586 DstHi = IsUnsigned ?
B.buildUMulH(S32, Src0, Src1).getReg(0)
1587 :
B.buildSMulH(S32, Src0, Src1).getReg(0);
1588 MRI.setRegBank(DstHi, AMDGPU::SGPRRegBank);
1590 Register VSrc0 =
B.buildCopy(S32, Src0).getReg(0);
1591 Register VSrc1 =
B.buildCopy(S32, Src1).getReg(0);
1593 MRI.setRegBank(VSrc0, AMDGPU::VGPRRegBank);
1594 MRI.setRegBank(VSrc1, AMDGPU::VGPRRegBank);
1596 DstHi = IsUnsigned ?
B.buildUMulH(S32, VSrc0, VSrc1).getReg(0)
1597 :
B.buildSMulH(S32, VSrc0, VSrc1).getReg(0);
1598 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1614 LLT CarryType = DstOnValu ? S1 : S32;
1616 DstOnValu ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
1618 DstOnValu ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank;
1623 Zero =
B.buildConstant(S32, 0).getReg(0);
1624 MRI.setRegBank(Zero,
1625 MulHiInVgpr ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank);
1629 MRI.setRegBank(Carry, MulHiInVgpr ? AMDGPU::VCCRegBank
1630 : AMDGPU::SGPRRegBank);
1632 if (DstOnValu && !MulHiInVgpr) {
1633 Carry =
B.buildTrunc(S1, Carry).getReg(0);
1634 MRI.setRegBank(Carry, AMDGPU::VCCRegBank);
1640 DstLo =
B.buildCopy(S32, DstLo).getReg(0);
1641 DstHi =
B.buildCopy(S32, DstHi).getReg(0);
1642 MRI.setRegBank(DstLo, AMDGPU::VGPRRegBank);
1643 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1646 auto Unmerge =
B.buildUnmerge(S32, Src2);
1647 Register Src2Lo = Unmerge.getReg(0);
1648 Register Src2Hi = Unmerge.getReg(1);
1649 MRI.setRegBank(Src2Lo, DstBank);
1650 MRI.setRegBank(Src2Hi, DstBank);
1654 MRI.setRegBank(Src2Sign.getReg(0), CarryBank);
1656 Carry =
B.buildXor(CarryType, Carry, Src2Sign).getReg(0);
1657 MRI.setRegBank(Carry, CarryBank);
1660 auto AddLo =
B.buildUAddo(S32, CarryType, DstLo, Src2Lo);
1661 DstLo = AddLo.getReg(0);
1662 Register CarryLo = AddLo.getReg(1);
1663 MRI.setRegBank(DstLo, DstBank);
1664 MRI.setRegBank(CarryLo, CarryBank);
1666 auto AddHi =
B.buildUAdde(S32, CarryType, DstHi, Src2Hi, CarryLo);
1667 DstHi = AddHi.getReg(0);
1668 MRI.setRegBank(DstHi, DstBank);
1670 Register CarryHi = AddHi.getReg(1);
1671 MRI.setRegBank(CarryHi, CarryBank);
1676 Carry =
B.buildXor(CarryType, Carry, CarryHi).getReg(0);
1677 MRI.setRegBank(Carry, CarryBank);
1681 Carry =
B.buildConstant(CarryType, 0).getReg(0);
1682 MRI.setRegBank(Carry, CarryBank);
1686 B.buildMergeLikeInstr(Dst0, {DstLo, DstHi});
1689 B.buildCopy(Dst1, Carry);
1691 B.buildTrunc(Dst1, Carry);
1694 MI.eraseFromParent();
1701 case TargetOpcode::G_ASHR:
1702 case TargetOpcode::G_SMIN:
1703 case TargetOpcode::G_SMAX:
1704 return TargetOpcode::G_SEXT;
1705 case TargetOpcode::G_LSHR:
1706 case TargetOpcode::G_UMIN:
1707 case TargetOpcode::G_UMAX:
1708 return TargetOpcode::G_ZEXT;
1710 return TargetOpcode::G_ANYEXT;
1716static std::pair<Register, Register>
1719 auto Bitcast =
B.buildBitcast(S32, Src);
1721 if (ExtOpcode == TargetOpcode::G_SEXT) {
1722 auto ExtLo =
B.buildSExtInReg(S32, Bitcast, 16);
1723 auto ShiftHi =
B.buildAShr(S32, Bitcast,
B.buildConstant(S32, 16));
1724 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1727 auto ShiftHi =
B.buildLShr(S32, Bitcast,
B.buildConstant(S32, 16));
1728 if (ExtOpcode == TargetOpcode::G_ZEXT) {
1729 auto ExtLo =
B.buildAnd(S32, Bitcast,
B.buildConstant(S32, 0xffff));
1730 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1733 assert(ExtOpcode == TargetOpcode::G_ANYEXT);
1734 return std::pair(Bitcast.getReg(0), ShiftHi.getReg(0));
1742 if (!SrcReg.
empty()) {
1759 LLT StoreVT =
MRI.getType(Reg);
1763 auto Unmerge =
B.buildUnmerge(S16, Reg);
1767 for (
int I = 0,
E = Unmerge->getNumOperands() - 1;
I !=
E; ++
I)
1777static std::pair<Register, unsigned>
1781 return std::pair(
Register(), Const);
1785 return std::pair(
Base, Const);
1788 return std::pair(Reg, 0);
1791std::pair<Register, unsigned>
1804 if (ImmOffset != 0) {
1813 unsigned Overflow = ImmOffset & ~MaxImm;
1814 ImmOffset -= Overflow;
1815 if ((int32_t)Overflow < 0) {
1816 Overflow += ImmOffset;
1821 if (Overflow != 0) {
1823 BaseReg =
B.buildConstant(S32, Overflow).getReg(0);
1825 auto OverflowVal =
B.buildConstant(S32, Overflow);
1826 BaseReg =
B.buildAdd(S32, BaseReg, OverflowVal).getReg(0);
1832 BaseReg =
B.buildConstant(S32, 0).getReg(0);
1834 return {BaseReg, C1};
1840 LLT SrcTy =
MRI.getType(SrcReg);
1843 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1850 Register TmpReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1851 Register TmpReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1853 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1855 .addUse(SrcReg, 0, AMDGPU::sub0);
1856 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1858 .addUse(SrcReg, 0, AMDGPU::sub1);
1859 B.buildInstr(AMDGPU::REG_SEQUENCE)
1862 .addImm(AMDGPU::sub0)
1864 .addImm(AMDGPU::sub1);
1875 unsigned ConstOffset) {
1881 auto MaterializedOffset =
B.buildConstant(S32, ConstOffset);
1883 auto Add =
B.buildAdd(S32, WaterfallIdx, MaterializedOffset);
1884 MRI.setRegBank(MaterializedOffset.getReg(0), AMDGPU::SGPRRegBank);
1885 MRI.setRegBank(
Add.getReg(0), AMDGPU::SGPRRegBank);
1897 bool IsBooleanSrc =
false) {
1898 if (ExtOpc == AMDGPU::G_ZEXT) {
1899 B.buildConstant(Hi32Reg, 0);
1900 }
else if (ExtOpc == AMDGPU::G_SEXT) {
1904 B.buildCopy(Hi32Reg, Lo32Reg);
1908 B.getMRI()->setRegBank(ShiftAmt.getReg(0), RegBank);
1909 B.buildAShr(Hi32Reg, Lo32Reg, ShiftAmt);
1912 assert(ExtOpc == AMDGPU::G_ANYEXT &&
"not an integer extension");
1913 B.buildUndef(Hi32Reg);
1917bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect(
1919 const OperandsMapper &OpdMapper)
const {
1925 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
1927 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
1929 LLT VecTy =
MRI.getType(VecReg);
1941 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
1943 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
1946 (DstBank == AMDGPU::SGPRRegBank &&
1947 SrcBank == AMDGPU::SGPRRegBank &&
1948 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
1949 : AMDGPU::VCCRegBank;
1950 LLT CCTy = (CCBank == AMDGPU::SGPRRegBank) ? S32 :
LLT::scalar(1);
1952 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
1953 Idx =
B.buildCopy(S32,
Idx)->getOperand(0).getReg();
1954 MRI.setRegBank(
Idx, AMDGPU::VGPRRegBank);
1959 unsigned NumLanes = DstRegs.size();
1963 EltTy =
MRI.getType(DstRegs[0]);
1965 auto UnmergeToEltTy =
B.buildUnmerge(EltTy, VecReg);
1967 for (
unsigned L = 0;
L < NumLanes; ++
L)
1968 Res[L] = UnmergeToEltTy.getReg(L);
1970 for (
unsigned I = 1;
I < NumElem; ++
I) {
1971 auto IC =
B.buildConstant(S32,
I);
1972 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
1974 MRI.setRegBank(
Cmp->getOperand(0).getReg(), CCBank);
1976 for (
unsigned L = 0;
L < NumLanes; ++
L) {
1977 auto S =
B.buildSelect(EltTy, Cmp,
1978 UnmergeToEltTy.getReg(
I * NumLanes + L), Res[L]);
1980 for (
unsigned N : { 0, 2, 3 })
1981 MRI.setRegBank(S->getOperand(
N).getReg(), DstBank);
1983 Res[
L] = S->getOperand(0).getReg();
1987 for (
unsigned L = 0;
L < NumLanes; ++
L) {
1988 Register DstReg = (NumLanes == 1) ?
MI.getOperand(0).getReg() : DstRegs[
L];
1989 B.buildCopy(DstReg, Res[L]);
1990 MRI.setRegBank(DstReg, DstBank);
1993 MRI.setRegBank(
MI.getOperand(0).getReg(), DstBank);
1994 MI.eraseFromParent();
2005 if (CurrBank && *CurrBank != Bank) {
2006 Register Copy =
B.buildCopy(
MRI.getType(Reg), Reg).getReg(0);
2007 MRI.setRegBank(Copy, Bank);
2011 MRI.setRegBank(Reg, Bank);
2015bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect(
2017 const OperandsMapper &OpdMapper)
const {
2023 *OpdMapper.getInstrMapping().getOperandMapping(3).BreakDown[0].RegBank;
2025 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
2027 LLT VecTy =
MRI.getType(VecReg);
2039 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
2041 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
2043 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
2046 (DstBank == AMDGPU::SGPRRegBank &&
2047 SrcBank == AMDGPU::SGPRRegBank &&
2048 InsBank == AMDGPU::SGPRRegBank &&
2049 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
2050 : AMDGPU::VCCRegBank;
2051 LLT CCTy = (CCBank == AMDGPU::SGPRRegBank) ? S32 :
LLT::scalar(1);
2053 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
2054 Idx =
B.buildCopy(S32,
Idx)->getOperand(0).getReg();
2055 MRI.setRegBank(
Idx, AMDGPU::VGPRRegBank);
2060 unsigned NumLanes = InsRegs.size();
2063 InsRegs.push_back(
MI.getOperand(2).getReg());
2065 EltTy =
MRI.getType(InsRegs[0]);
2068 auto UnmergeToEltTy =
B.buildUnmerge(EltTy, VecReg);
2071 for (
unsigned I = 0;
I < NumElem; ++
I) {
2072 auto IC =
B.buildConstant(S32,
I);
2073 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
2075 MRI.setRegBank(
Cmp->getOperand(0).getReg(), CCBank);
2077 for (
unsigned L = 0;
L < NumLanes; ++
L) {
2079 Register Op1 = UnmergeToEltTy.getReg(
I * NumLanes + L);
2090 if (MergeTy ==
MRI.getType(
MI.getOperand(0).getReg())) {
2091 B.buildBuildVector(
MI.getOperand(0), Ops);
2093 auto Vec =
B.buildBuildVector(MergeTy, Ops);
2094 MRI.setRegBank(Vec->getOperand(0).getReg(), DstBank);
2095 B.buildBitcast(
MI.getOperand(0).getReg(), Vec);
2098 MRI.setRegBank(
MI.getOperand(0).getReg(), DstBank);
2099 MI.eraseFromParent();
2107 unsigned Opc =
MI.getOpcode();
2110 case AMDGPU::G_CONSTANT:
2111 case AMDGPU::G_IMPLICIT_DEF: {
2113 LLT DstTy =
MRI.getType(DstReg);
2119 if (DstBank == &AMDGPU::VCCRegBank)
2122 if (DefRegs.
empty())
2126 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2129 LLVMContext &Ctx =
B.getMF().getFunction().getContext();
2131 MI.getOperand(0).setReg(NewDstReg);
2132 if (Opc != AMDGPU::G_IMPLICIT_DEF) {
2133 uint64_t ConstVal =
MI.getOperand(1).getCImm()->getZExtValue();
2134 MI.getOperand(1).setCImm(
2138 MRI.setRegBank(NewDstReg, *DstBank);
2139 B.buildTrunc(DefRegs[0], NewDstReg);
2142 case AMDGPU::G_PHI: {
2144 LLT DstTy =
MRI.getType(DstReg);
2151 if (DstBank == &AMDGPU::VCCRegBank) {
2160 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E;
I += 2) {
2164 if (SrcBank != &AMDGPU::VCCRegBank) {
2169 MRI.setRegBank(Copy.getReg(0), AMDGPU::VCCRegBank);
2170 MI.getOperand(
I).setReg(Copy.getReg(0));
2182 ApplyRegBankMapping ApplyBank(*
this,
MRI, DstBank);
2191 case AMDGPU::G_ICMP:
2192 case AMDGPU::G_UADDO:
2193 case AMDGPU::G_USUBO:
2194 case AMDGPU::G_UADDE:
2195 case AMDGPU::G_SADDE:
2196 case AMDGPU::G_USUBE:
2197 case AMDGPU::G_SSUBE: {
2198 unsigned BoolDstOp = Opc == AMDGPU::G_ICMP ? 0 : 1;
2199 Register DstReg =
MI.getOperand(BoolDstOp).getReg();
2203 if (DstBank != &AMDGPU::SGPRRegBank)
2206 const bool HasCarryIn =
MI.getNumOperands() == 5;
2211 Register NewDstReg =
MRI.createGenericVirtualRegister(S32);
2212 MRI.setRegBank(NewDstReg, AMDGPU::SGPRRegBank);
2213 MI.getOperand(BoolDstOp).setReg(NewDstReg);
2217 Register NewSrcReg =
MRI.createGenericVirtualRegister(S32);
2218 MRI.setRegBank(NewSrcReg, AMDGPU::SGPRRegBank);
2219 B.buildZExt(NewSrcReg,
MI.getOperand(4).getReg());
2220 MI.getOperand(4).setReg(NewSrcReg);
2224 B.setInsertPt(*
MBB, std::next(
MI.getIterator()));
2229 if (DefRegs.
empty())
2231 B.buildTrunc(DefRegs[0], NewDstReg);
2234 case AMDGPU::G_SELECT: {
2236 LLT DstTy =
MRI.getType(DstReg);
2239 if (CondRegs.
empty())
2246 if (CondBank == &AMDGPU::SGPRRegBank) {
2249 Register NewCondReg =
MRI.createGenericVirtualRegister(S32);
2250 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2252 MI.getOperand(1).setReg(NewCondReg);
2253 B.buildZExt(NewCondReg, CondRegs[0]);
2267 if (DefRegs.
empty()) {
2272 if (Src1Regs.
empty())
2278 if (Src2Regs.
empty())
2285 B.buildSelect(DefRegs[0], CondRegs[0], Src1Regs[0], Src2Regs[0]);
2286 B.buildSelect(DefRegs[1], CondRegs[0], Src1Regs[1], Src2Regs[1]);
2288 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2289 MI.eraseFromParent();
2292 case AMDGPU::G_BRCOND: {
2293 Register CondReg =
MI.getOperand(0).getReg();
2298 if (CondBank == &AMDGPU::SGPRRegBank) {
2301 Register NewCondReg =
MRI.createGenericVirtualRegister(S32);
2302 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2304 MI.getOperand(0).setReg(NewCondReg);
2305 B.buildZExt(NewCondReg, CondReg);
2313 case AMDGPU::G_XOR: {
2317 LLT DstTy =
MRI.getType(DstReg);
2322 if (DstBank == &AMDGPU::VCCRegBank)
2326 ApplyRegBankMapping ApplyBank(*
this,
MRI, DstBank);
2345 if (DefRegs.
empty()) {
2352 (Src0Regs.
empty() || Src0Regs.
size() == 2));
2359 if (Src0Regs.
empty())
2364 if (Src1Regs.
empty())
2371 B.buildInstr(Opc, {DefRegs[0]}, {Src0Regs[0], Src1Regs[0]});
2372 B.buildInstr(Opc, {DefRegs[1]}, {Src0Regs[1], Src1Regs[1]});
2374 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2375 MI.eraseFromParent();
2378 case AMDGPU::G_ABS: {
2384 if (SrcBank && SrcBank == &AMDGPU::VGPRRegBank) {
2386 ApplyRegBankMapping Apply(*
this,
MRI, &AMDGPU::VGPRRegBank);
2400 case AMDGPU::G_LSHR:
2401 case AMDGPU::G_ASHR:
2402 case AMDGPU::G_SMIN:
2403 case AMDGPU::G_SMAX:
2404 case AMDGPU::G_UMIN:
2405 case AMDGPU::G_UMAX: {
2407 LLT DstTy =
MRI.getType(DstReg);
2416 if (DstBank == &AMDGPU::VGPRRegBank)
2422 ApplyRegBankMapping ApplySALU(*
this,
MRI, &AMDGPU::SGPRRegBank);
2430 std::tie(WideSrc0Lo, WideSrc0Hi)
2432 std::tie(WideSrc1Lo, WideSrc1Hi)
2434 auto Lo =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Lo, WideSrc1Lo});
2435 auto Hi =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Hi, WideSrc1Hi});
2436 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2437 MI.eraseFromParent();
2445 if (Opc == AMDGPU::G_SHL || Opc == AMDGPU::G_LSHR ||
2446 Opc == AMDGPU::G_ASHR) {
2447 B.setInsertPt(*
MBB,
MI.getIterator());
2455 case AMDGPU::G_SEXT_INREG: {
2457 if (SrcRegs.
empty())
2462 ApplyRegBankMapping O(*
this,
MRI, &AMDGPU::VGPRRegBank);
2464 B.setChangeObserver(Observer);
2471 int Amt =
MI.getOperand(2).getImm();
2477 B.buildFreeze(DstRegs[0], SrcRegs[0]);
2479 auto Freeze =
B.buildFreeze(S32, SrcRegs[0]);
2481 B.buildSExtInReg(DstRegs[0], Freeze, Amt);
2484 B.buildAShr(DstRegs[1], DstRegs[0],
B.buildConstant(S32, 31));
2488 B.buildCopy(DstRegs[0], SrcRegs[0]);
2489 B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
2493 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2494 MI.eraseFromParent();
2497 case AMDGPU::G_CTPOP:
2498 case AMDGPU::G_BITREVERSE: {
2501 if (DstBank == &AMDGPU::SGPRRegBank)
2506 LLT Ty =
MRI.getType(SrcReg);
2510 ApplyRegBankMapping ApplyVALU(*
this,
MRI, &AMDGPU::VGPRRegBank);
2520 case AMDGPU::G_AMDGPU_FFBH_U32:
2521 case AMDGPU::G_AMDGPU_FFBL_B32:
2522 case AMDGPU::G_CTLZ_ZERO_UNDEF:
2523 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
2526 if (DstBank == &AMDGPU::SGPRRegBank)
2531 LLT Ty =
MRI.getType(SrcReg);
2541 ApplyRegBankMapping ApplyVALU(*
this,
MRI, &AMDGPU::VGPRRegBank);
2544 unsigned NewOpc = Opc == AMDGPU::G_CTLZ_ZERO_UNDEF
2545 ? (
unsigned)AMDGPU::G_AMDGPU_FFBH_U32
2546 : Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2547 ? (
unsigned)AMDGPU::G_AMDGPU_FFBL_B32
2549 unsigned Idx = NewOpc == AMDGPU::G_AMDGPU_FFBH_U32;
2550 auto X =
B.buildInstr(NewOpc, {S32}, {SrcRegs[
Idx]});
2551 auto Y =
B.buildInstr(NewOpc, {S32}, {SrcRegs[
Idx ^ 1]});
2553 Opc == AMDGPU::G_CTLZ_ZERO_UNDEF || Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2555 : AMDGPU::G_UADDSAT;
2556 Y =
B.buildInstr(AddOpc, {S32}, {
Y,
B.buildConstant(S32, 32)});
2558 B.buildUMin(DstReg,
X,
Y);
2559 MI.eraseFromParent();
2562 case AMDGPU::G_SEXT:
2563 case AMDGPU::G_ZEXT:
2564 case AMDGPU::G_ANYEXT: {
2566 LLT SrcTy =
MRI.getType(SrcReg);
2567 const bool Signed = Opc == AMDGPU::G_SEXT;
2576 LLT DstTy =
MRI.getType(DstReg);
2578 SrcBank != &AMDGPU::SGPRRegBank &&
2579 SrcBank != &AMDGPU::VCCRegBank &&
2589 B.buildSExtOrTrunc(DefRegs[0], SrcReg);
2590 }
else if (Opc == AMDGPU::G_ZEXT) {
2591 B.buildZExtOrTrunc(DefRegs[0], SrcReg);
2593 B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
2597 MRI.setRegBank(DstReg, *SrcBank);
2598 MI.eraseFromParent();
2608 if (SrcBank == &AMDGPU::VCCRegBank) {
2615 const bool UseSel64 = DstSize > 32 &&
2616 SrcBank->
getID() == AMDGPU::SGPRRegBankID;
2620 auto True =
B.buildConstant(SelType,
Signed ? -1 : 1);
2621 auto False =
B.buildConstant(SelType, 0);
2623 MRI.setRegBank(True.getReg(0), *DstBank);
2624 MRI.setRegBank(False.getReg(0), *DstBank);
2625 MRI.setRegBank(DstReg, *DstBank);
2628 B.buildSelect(DefRegs[0], SrcReg, True, False);
2630 }
else if (DstSize < 32) {
2631 auto Sel =
B.buildSelect(SelType, SrcReg, True, False);
2632 MRI.setRegBank(Sel.getReg(0), *DstBank);
2633 B.buildTrunc(DstReg, Sel);
2635 B.buildSelect(DstReg, SrcReg, True, False);
2638 MI.eraseFromParent();
2644 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
2653 LLT DstTy =
MRI.getType(DstReg);
2654 LLT SrcTy =
MRI.getType(SrcReg);
2656 if (foldExtractEltToCmpSelect(
MI,
MRI, OpdMapper))
2670 unsigned ConstOffset;
2671 std::tie(BaseIdxReg, ConstOffset) =
2678 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2683 if (ShouldMoveIndexIntoLoop)
2684 MI.getOperand(2).setReg(BaseIdxReg);
2690 const bool NeedCopyToVGPR = DstBank == &AMDGPU::VGPRRegBank &&
2691 SrcBank == &AMDGPU::SGPRRegBank;
2692 if (DstRegs.
empty()) {
2697 if (NeedCopyToVGPR) {
2699 Register TmpReg =
MRI.createGenericVirtualRegister(DstTy);
2700 MRI.setRegBank(TmpReg, AMDGPU::SGPRRegBank);
2701 MI.getOperand(0).setReg(TmpReg);
2702 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2709 if (ShouldMoveIndexIntoLoop)
2719 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
2720 auto One =
B.buildConstant(S32, 1);
2731 auto IdxLo =
B.buildShl(S32, BaseIdxReg, One);
2732 auto IdxHi =
B.buildAdd(S32, IdxLo, One);
2734 auto Extract0 =
B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);
2735 auto Extract1 =
B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi);
2737 MRI.setRegBank(DstReg, *DstBank);
2738 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
2739 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
2740 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
2741 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
2745 MI.eraseFromParent();
2751 B.setInstr(*Span.
begin());
2752 MI.eraseFromParent();
2754 OpsToWaterfall,
MRI);
2756 if (NeedCopyToVGPR) {
2758 Register TmpReg0 =
MRI.createGenericVirtualRegister(S32);
2759 Register TmpReg1 =
MRI.createGenericVirtualRegister(S32);
2760 MRI.setRegBank(TmpReg0, AMDGPU::SGPRRegBank);
2761 MRI.setRegBank(TmpReg1, AMDGPU::SGPRRegBank);
2763 Extract0->getOperand(0).setReg(TmpReg0);
2764 Extract1->getOperand(0).setReg(TmpReg1);
2772 if (ShouldMoveIndexIntoLoop)
2777 case AMDGPU::G_INSERT_VECTOR_ELT: {
2781 LLT VecTy =
MRI.getType(DstReg);
2787 MRI.setType(
MI.getOperand(1).getReg(), VecTy);
2789 if (foldInsertEltToCmpSelect(
MI,
MRI, OpdMapper))
2797 LLT InsTy =
MRI.getType(InsReg);
2801 unsigned ConstOffset;
2802 std::tie(BaseIdxReg, ConstOffset) =
2809 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2814 if (ShouldMoveIndexIntoLoop)
2815 MI.getOperand(3).setReg(BaseIdxReg);
2818 if (InsRegs.
empty()) {
2822 if (ShouldMoveIndexIntoLoop) {
2837 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
2838 auto One =
B.buildConstant(S32, 1);
2847 auto IdxLo =
B.buildShl(S32, BaseIdxReg, One);
2848 auto IdxHi =
B.buildAdd(S32, IdxLo, One);
2850 auto InsLo =
B.buildInsertVectorElement(Vec32, CastSrc, InsRegs[0], IdxLo);
2851 auto InsHi =
B.buildInsertVectorElement(Vec32, InsLo, InsRegs[1], IdxHi);
2860 MRI.setRegBank(InsReg, *InsSrcBank);
2861 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
2862 MRI.setRegBank(InsLo.getReg(0), *DstBank);
2863 MRI.setRegBank(InsHi.getReg(0), *DstBank);
2864 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
2865 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
2866 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
2871 B.setInsertPt(
B.getMBB(),
MI);
2872 B.buildBitcast(DstReg, InsHi);
2873 MI.eraseFromParent();
2877 B.setInstr(*Span.
begin());
2878 MI.eraseFromParent();
2883 OpsToWaterfall,
MRI);
2889 B.buildBitcast(DstReg, InsHi);
2892 if (ShouldMoveIndexIntoLoop)
2897 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
2898 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
2899 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
2900 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
2901 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
2902 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
2903 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
2904 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
2905 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
2906 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
2907 case AMDGPU::G_AMDGPU_BUFFER_STORE:
2908 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
2909 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
2910 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
2911 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16:
2912 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
2913 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16: {
2918 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
2919 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
2920 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
2921 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
2922 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
2923 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
2924 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
2925 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
2926 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
2927 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
2928 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
2929 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC: {
2934 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
2935 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
2936 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
2941 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
2946 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD: {
2950 case AMDGPU::G_INTRINSIC: {
2951 switch (
MI.getIntrinsicID()) {
2952 case Intrinsic::amdgcn_readlane: {
2963 case Intrinsic::amdgcn_writelane: {
2973 case Intrinsic::amdgcn_interp_p1:
2974 case Intrinsic::amdgcn_interp_p2:
2975 case Intrinsic::amdgcn_interp_mov:
2976 case Intrinsic::amdgcn_interp_p1_f16:
2977 case Intrinsic::amdgcn_interp_p2_f16:
2978 case Intrinsic::amdgcn_lds_param_load: {
2986 case Intrinsic::amdgcn_interp_inreg_p10:
2987 case Intrinsic::amdgcn_interp_inreg_p2:
2988 case Intrinsic::amdgcn_interp_inreg_p10_f16:
2989 case Intrinsic::amdgcn_interp_inreg_p2_f16:
2992 case Intrinsic::amdgcn_permlane16:
2993 case Intrinsic::amdgcn_permlanex16: {
3001 case Intrinsic::amdgcn_sbfe:
3004 case Intrinsic::amdgcn_ubfe:
3007 case Intrinsic::amdgcn_ballot:
3013 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3014 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3015 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3016 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3026 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
3027 unsigned N =
MI.getNumExplicitOperands() - 2;
3032 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
3033 auto IntrID =
MI.getIntrinsicID();
3035 case Intrinsic::amdgcn_ds_ordered_add:
3036 case Intrinsic::amdgcn_ds_ordered_swap: {
3043 case Intrinsic::amdgcn_ds_gws_init:
3044 case Intrinsic::amdgcn_ds_gws_barrier:
3045 case Intrinsic::amdgcn_ds_gws_sema_br: {
3051 case Intrinsic::amdgcn_ds_gws_sema_v:
3052 case Intrinsic::amdgcn_ds_gws_sema_p:
3053 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
3058 case Intrinsic::amdgcn_ds_append:
3059 case Intrinsic::amdgcn_ds_consume: {
3063 case Intrinsic::amdgcn_s_sendmsg:
3064 case Intrinsic::amdgcn_s_sendmsghalt: {
3069 case Intrinsic::amdgcn_s_setreg: {
3073 case Intrinsic::amdgcn_raw_buffer_load_lds: {
3080 case Intrinsic::amdgcn_struct_buffer_load_lds: {
3087 case Intrinsic::amdgcn_global_load_lds: {
3092 case Intrinsic::amdgcn_lds_direct_load: {
3098 case Intrinsic::amdgcn_exp_row:
3108 if (RSrcIntrin->IsImage) {
3119 case AMDGPU::G_SI_CALL: {
3130 unsigned FrameSetupOpcode = AMDGPU::ADJCALLSTACKUP;
3131 unsigned FrameDestroyOpcode = AMDGPU::ADJCALLSTACKDOWN;
3137 unsigned NonCopyInstrsLen = 0;
3143 while (Start->getOpcode() != FrameSetupOpcode) {
3145 bool IsCopy =
false;
3146 if (Start->getOpcode() == AMDGPU::COPY) {
3147 auto &Dst = Start->getOperand(0);
3150 if (Reg.isPhysical() &&
MI.readsRegister(Reg,
TRI)) {
3155 auto &Src = Start->getOperand(1);
3158 IsCopy =
Info->getScratchRSrcReg() == Reg;
3166 NonCopyInstrsLen = NonCopyInstrs.
size();
3171 NonCopyInstrs.
resize(NonCopyInstrsLen);
3173 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3179 NonCopyInstrs.
clear();
3180 NonCopyInstrsLen = 0;
3183 while (End->getOpcode() != FrameDestroyOpcode) {
3185 bool IsCopy =
false;
3186 if (End->getOpcode() == AMDGPU::COPY) {
3187 auto &Src = End->getOperand(1);
3190 IsCopy = Reg.isPhysical() &&
MI.modifiesRegister(Reg,
TRI);
3196 NonCopyInstrsLen = NonCopyInstrs.
size();
3201 NonCopyInstrs.
resize(NonCopyInstrsLen);
3205 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3214 case AMDGPU::G_LOAD:
3215 case AMDGPU::G_ZEXTLOAD:
3216 case AMDGPU::G_SEXTLOAD: {
3221 case AMDGPU::G_DYN_STACKALLOC:
3224 case AMDGPU::G_SBFX:
3227 case AMDGPU::G_UBFX:
3230 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3231 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3246 if (RB0 == AMDGPU::InvalidRegBankID)
3248 if (RB1 == AMDGPU::InvalidRegBankID)
3251 if (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID)
3252 return AMDGPU::SGPRRegBankID;
3254 if (RB0 == AMDGPU::AGPRRegBankID && RB1 == AMDGPU::AGPRRegBankID)
3255 return AMDGPU::AGPRRegBankID;
3257 return AMDGPU::VGPRRegBankID;
3261 if (RB0 == AMDGPU::InvalidRegBankID)
3263 if (RB1 == AMDGPU::InvalidRegBankID)
3269 if (RB0 == AMDGPU::VCCRegBankID || RB1 == AMDGPU::VCCRegBankID)
3270 return AMDGPU::VCCRegBankID;
3278 unsigned RegBank = AMDGPU::InvalidRegBankID;
3286 if (RegBank == AMDGPU::VGPRRegBankID)
3302 if (Bank->getID() != AMDGPU::SGPRRegBankID)
3315 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
3321 OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3324 MI.getNumOperands());
3337 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
3343 unsigned BankID =
Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
3344 OpdsMapping[i] = AMDGPU::getValueMapping(BankID,
Size);
3348 MI.getNumOperands());
3357 for (
unsigned I = 0,
E =
MI.getNumOperands();
I !=
E; ++
I) {
3363 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3367 MI.getNumOperands());
3373 int RsrcIdx)
const {
3376 RsrcIdx +=
MI.getNumExplicitDefs() + 1;
3378 const int NumOps =
MI.getNumOperands();
3383 for (
int I = 0;
I != NumOps; ++
I) {
3384 if (!
MI.getOperand(
I).isReg())
3398 const bool MustBeSGPR =
I == RsrcIdx ||
I == RsrcIdx + 1;
3403 OpdsMapping[
I] = AMDGPU::getValueMapping(NewBank,
Size);
3406 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3417 LLT PtrTy =
MRI.getType(PtrReg);
3421 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3426 return AMDGPU::getValueMapping(PtrBank->
getID(),
Size);
3437 LLT PtrTy =
MRI.getType(PtrReg);
3449 ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3450 PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
3452 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3457 AMDGPU::VGPRRegBankID : AMDGPU::SGPRRegBankID;
3459 PtrMapping = AMDGPU::getValueMapping(PtrBankID, PtrSize);
3462 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3463 PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
3466 OpdsMapping[0] = ValMapping;
3467 OpdsMapping[1] = PtrMapping;
3492 return AMDGPU::getValueMapping(Bank,
Size);
3500 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3508 return AMDGPU::getValueMapping(AMDGPU::AGPRRegBankID,
Size);
3525 if (
MI.isCopy() ||
MI.getOpcode() == AMDGPU::G_FREEZE) {
3532 assert(SrcBank &&
"src bank should have been assigned already");
3537 if (
MI.getOpcode() != AMDGPU::G_FREEZE &&
3542 unsigned OpdsMappingSize =
MI.isCopy() ? 1 : 2;
3544 OpdsMapping[0] = &ValMap;
3545 if (
MI.getOpcode() == AMDGPU::G_FREEZE)
3546 OpdsMapping[1] = &ValMap;
3553 if (
MI.isRegSequence()) {
3556 unsigned BankID = AMDGPU::SGPRRegBankID;
3558 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E;
I += 2) {
3562 if (OpBank != AMDGPU::SGPRRegBankID) {
3563 BankID = AMDGPU::VGPRRegBankID;
3579 if (
MI.getOpcode() == TargetOpcode::G_PHI) {
3580 unsigned ResultBank = AMDGPU::InvalidRegBankID;
3585 ResultBank = DstBank->
getID();
3587 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E;
I += 2) {
3592 if (!Bank || Bank->
getID() == AMDGPU::VGPRRegBankID) {
3593 ResultBank = AMDGPU::VGPRRegBankID;
3598 unsigned OpBank = Bank->
getID();
3602 assert(ResultBank != AMDGPU::InvalidRegBankID);
3604 unsigned Size =
MRI.getType(DstReg).getSizeInBits();
3619 switch (
MI.getOpcode()) {
3625 case AMDGPU::G_XOR: {
3626 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3631 unsigned TargetBankID = AMDGPU::InvalidRegBankID;
3632 unsigned BankLHS = AMDGPU::InvalidRegBankID;
3633 unsigned BankRHS = AMDGPU::InvalidRegBankID;
3635 TargetBankID = DstBank->
getID();
3636 if (DstBank == &AMDGPU::VCCRegBank) {
3637 TargetBankID = AMDGPU::VCCRegBankID;
3638 BankLHS = AMDGPU::VCCRegBankID;
3639 BankRHS = AMDGPU::VCCRegBankID;
3642 AMDGPU::SGPRRegBankID);
3644 AMDGPU::SGPRRegBankID);
3648 AMDGPU::VCCRegBankID);
3650 AMDGPU::VCCRegBankID);
3653 if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {
3654 TargetBankID = AMDGPU::VGPRRegBankID;
3655 }
else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {
3656 TargetBankID = AMDGPU::VCCRegBankID;
3657 BankLHS = AMDGPU::VCCRegBankID;
3658 BankRHS = AMDGPU::VCCRegBankID;
3659 }
else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
3660 TargetBankID = AMDGPU::SGPRRegBankID;
3664 OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID,
Size);
3665 OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS,
Size);
3666 OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS,
Size);
3673 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID,
Size);
3674 OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
3676 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size);
3678 OpdsMapping[1] = AMDGPU::getValueMapping(Bank1,
Size);
3681 OpdsMapping[2] = AMDGPU::getValueMapping(Bank2,
Size);
3689 case AMDGPU::G_PTR_ADD:
3690 case AMDGPU::G_PTRMASK:
3695 case AMDGPU::G_LSHR:
3696 case AMDGPU::G_ASHR:
3697 case AMDGPU::G_UADDO:
3698 case AMDGPU::G_USUBO:
3699 case AMDGPU::G_UADDE:
3700 case AMDGPU::G_SADDE:
3701 case AMDGPU::G_USUBE:
3702 case AMDGPU::G_SSUBE:
3703 case AMDGPU::G_SMIN:
3704 case AMDGPU::G_SMAX:
3705 case AMDGPU::G_UMIN:
3706 case AMDGPU::G_UMAX:
3708 case AMDGPU::G_SHUFFLE_VECTOR:
3709 case AMDGPU::G_SBFX:
3710 case AMDGPU::G_UBFX:
3715 case AMDGPU::G_SADDSAT:
3716 case AMDGPU::G_SSUBSAT:
3717 case AMDGPU::G_UADDSAT:
3718 case AMDGPU::G_USUBSAT:
3719 case AMDGPU::G_FADD:
3720 case AMDGPU::G_FSUB:
3721 case AMDGPU::G_FPTOSI:
3722 case AMDGPU::G_FPTOUI:
3723 case AMDGPU::G_FMUL:
3725 case AMDGPU::G_FMAD:
3726 case AMDGPU::G_FSQRT:
3727 case AMDGPU::G_FFLOOR:
3728 case AMDGPU::G_FCEIL:
3729 case AMDGPU::G_FRINT:
3730 case AMDGPU::G_SITOFP:
3731 case AMDGPU::G_UITOFP:
3732 case AMDGPU::G_FPTRUNC:
3733 case AMDGPU::G_FPEXT:
3734 case AMDGPU::G_FEXP2:
3735 case AMDGPU::G_FLOG2:
3736 case AMDGPU::G_FMINNUM:
3737 case AMDGPU::G_FMAXNUM:
3738 case AMDGPU::G_FMINNUM_IEEE:
3739 case AMDGPU::G_FMAXNUM_IEEE:
3740 case AMDGPU::G_FCANONICALIZE:
3741 case AMDGPU::G_INTRINSIC_TRUNC:
3742 case AMDGPU::G_STRICT_FADD:
3743 case AMDGPU::G_STRICT_FSUB:
3744 case AMDGPU::G_STRICT_FMUL:
3745 case AMDGPU::G_STRICT_FMA:
3746 case AMDGPU::G_BSWAP:
3747 case AMDGPU::G_FSHR:
3748 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
3749 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
3750 case AMDGPU::G_AMDGPU_RCP_IFLAG:
3751 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
3752 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
3753 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
3754 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
3755 case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
3756 case AMDGPU::G_AMDGPU_SMED3:
3758 case AMDGPU::G_UMULH:
3759 case AMDGPU::G_SMULH: {
3764 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3765 case AMDGPU::G_AMDGPU_MAD_I64_I32: {
3774 bool AllSalu =
true;
3775 bool MulSalu =
true;
3776 for (
unsigned i = 0; i < 5; ++i) {
3779 if (Bank->getID() != AMDGPU::SGPRRegBankID) {
3781 if (i == 2 || i == 3) {
3799 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
3800 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
3801 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
3802 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
3803 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
3806 case AMDGPU::G_IMPLICIT_DEF: {
3807 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3808 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3811 case AMDGPU::G_FCONSTANT:
3812 case AMDGPU::G_CONSTANT:
3813 case AMDGPU::G_GLOBAL_VALUE:
3814 case AMDGPU::G_BLOCK_ADDR:
3815 case AMDGPU::G_READCYCLECOUNTER: {
3816 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3817 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3820 case AMDGPU::G_FRAME_INDEX: {
3823 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3824 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3827 case AMDGPU::G_DYN_STACKALLOC: {
3829 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
3831 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, 32);
3834 case AMDGPU::G_AMDGPU_WAVE_ADDRESS: {
3840 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
3841 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
3844 case AMDGPU::G_INSERT: {
3849 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
3850 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
3851 OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
3852 OpdsMapping[3] =
nullptr;
3855 case AMDGPU::G_EXTRACT: {
3859 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
3860 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
3861 OpdsMapping[2] =
nullptr;
3864 case AMDGPU::G_BUILD_VECTOR:
3865 case AMDGPU::G_BUILD_VECTOR_TRUNC: {
3866 LLT DstTy =
MRI.getType(
MI.getOperand(0).getReg());
3869 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
3872 unsigned DstBankID =
regBankUnion(Src0BankID, Src1BankID);
3874 OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
3875 OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
3876 OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
3882 case AMDGPU::G_MERGE_VALUES:
3883 case AMDGPU::G_CONCAT_VECTORS: {
3885 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3886 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
3888 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
3890 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; ++i)
3891 OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
3894 case AMDGPU::G_BITREVERSE:
3895 case AMDGPU::G_BITCAST:
3896 case AMDGPU::G_INTTOPTR:
3897 case AMDGPU::G_PTRTOINT:
3898 case AMDGPU::G_FABS:
3899 case AMDGPU::G_FNEG: {
3900 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3902 OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
3905 case AMDGPU::G_AMDGPU_FFBH_U32:
3906 case AMDGPU::G_AMDGPU_FFBL_B32:
3907 case AMDGPU::G_CTLZ_ZERO_UNDEF:
3908 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
3909 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
3911 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
3912 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(BankID,
Size);
3915 case AMDGPU::G_CTPOP: {
3916 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
3918 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
3923 OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
3926 case AMDGPU::G_TRUNC: {
3932 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
3933 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
3936 case AMDGPU::G_ZEXT:
3937 case AMDGPU::G_SEXT:
3938 case AMDGPU::G_ANYEXT:
3939 case AMDGPU::G_SEXT_INREG: {
3948 switch (SrcBank->
getID()) {
3949 case AMDGPU::SGPRRegBankID:
3950 DstBank = AMDGPU::SGPRRegBankID;
3953 DstBank = AMDGPU::VGPRRegBankID;
3959 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
3960 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->
getID(),
3964 case AMDGPU::G_FCMP: {
3965 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
3966 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
3967 OpdsMapping[1] =
nullptr;
3968 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3969 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3972 case AMDGPU::G_IS_FPCLASS: {
3974 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
3975 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3976 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
3977 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
3980 case AMDGPU::G_STORE: {
3982 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3987 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3988 OpdsMapping[0] = ValMapping;
3992 case AMDGPU::G_ICMP: {
3999 AMDGPU::SGPRRegBankID);
4003 bool CanUseSCC = DstBank == AMDGPU::SGPRRegBankID &&
4004 Op2Bank == AMDGPU::SGPRRegBankID &&
4005 Op3Bank == AMDGPU::SGPRRegBankID &&
4010 DstBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
4011 unsigned SrcBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4015 const unsigned ResultSize = 1;
4017 OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, ResultSize);
4018 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank,
Size);
4019 OpdsMapping[3] = AMDGPU::getValueMapping(SrcBank,
Size);
4022 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
4025 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4026 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4027 unsigned IdxSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4029 unsigned OutputBankID =
regBankUnion(SrcBankID, IdxBank);
4031 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(OutputBankID, DstSize);
4032 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, SrcSize);
4035 OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4038 case AMDGPU::G_INSERT_VECTOR_ELT: {
4040 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4042 unsigned VecSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4043 unsigned InsertSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4044 unsigned IdxSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4048 OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4049 OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4053 if (InsertSize == 64 && OutputBankID == AMDGPU::VGPRRegBankID) {
4054 OpdsMapping[2] = AMDGPU::getValueMappingSplit64(InsertEltBankID,
4057 assert(InsertSize == 32 || InsertSize == 64);
4058 OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBankID, InsertSize);
4062 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBankID, IdxSize);
4065 case AMDGPU::G_UNMERGE_VALUES: {
4070 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
4072 OpdsMapping[i] = AMDGPU::getValueMapping(Bank,
Size);
4076 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
4077 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
4078 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
4079 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
4080 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
4081 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
4082 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
4083 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
4084 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
4085 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
4086 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
4087 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16:
4088 case AMDGPU::G_AMDGPU_BUFFER_STORE:
4089 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
4090 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
4091 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
4092 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16: {
4111 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
4112 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
4113 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
4114 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
4115 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
4116 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
4117 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
4118 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
4119 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
4120 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
4121 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
4122 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
4123 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
4124 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
4125 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
4148 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
4174 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD: {
4182 unsigned RSrcBank = OpdsMapping[1]->BreakDown[0].RegBank->getID();
4183 unsigned OffsetBank = OpdsMapping[2]->BreakDown[0].RegBank->getID();
4184 unsigned ResultBank =
regBankUnion(RSrcBank, OffsetBank);
4186 unsigned Size0 =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4187 OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
4190 case AMDGPU::G_INTRINSIC: {
4191 switch (
MI.getIntrinsicID()) {
4194 case Intrinsic::amdgcn_div_fmas:
4195 case Intrinsic::amdgcn_div_fixup:
4196 case Intrinsic::amdgcn_trig_preop:
4197 case Intrinsic::amdgcn_sin:
4198 case Intrinsic::amdgcn_cos:
4199 case Intrinsic::amdgcn_log_clamp:
4200 case Intrinsic::amdgcn_rcp:
4201 case Intrinsic::amdgcn_rcp_legacy:
4202 case Intrinsic::amdgcn_sqrt:
4203 case Intrinsic::amdgcn_rsq:
4204 case Intrinsic::amdgcn_rsq_legacy:
4205 case Intrinsic::amdgcn_rsq_clamp:
4206 case Intrinsic::amdgcn_fmul_legacy:
4207 case Intrinsic::amdgcn_fma_legacy:
4208 case Intrinsic::amdgcn_ldexp:
4209 case Intrinsic::amdgcn_frexp_mant:
4210 case Intrinsic::amdgcn_frexp_exp:
4211 case Intrinsic::amdgcn_fract:
4212 case Intrinsic::amdgcn_cvt_pkrtz:
4213 case Intrinsic::amdgcn_cvt_pknorm_i16:
4214 case Intrinsic::amdgcn_cvt_pknorm_u16:
4215 case Intrinsic::amdgcn_cvt_pk_i16:
4216 case Intrinsic::amdgcn_cvt_pk_u16:
4217 case Intrinsic::amdgcn_fmed3:
4218 case Intrinsic::amdgcn_cubeid:
4219 case Intrinsic::amdgcn_cubema:
4220 case Intrinsic::amdgcn_cubesc:
4221 case Intrinsic::amdgcn_cubetc:
4222 case Intrinsic::amdgcn_sffbh:
4223 case Intrinsic::amdgcn_fmad_ftz:
4224 case Intrinsic::amdgcn_mbcnt_lo:
4225 case Intrinsic::amdgcn_mbcnt_hi:
4226 case Intrinsic::amdgcn_mul_u24:
4227 case Intrinsic::amdgcn_mul_i24:
4228 case Intrinsic::amdgcn_mulhi_u24:
4229 case Intrinsic::amdgcn_mulhi_i24:
4230 case Intrinsic::amdgcn_lerp:
4231 case Intrinsic::amdgcn_sad_u8:
4232 case Intrinsic::amdgcn_msad_u8:
4233 case Intrinsic::amdgcn_sad_hi_u8:
4234 case Intrinsic::amdgcn_sad_u16:
4235 case Intrinsic::amdgcn_qsad_pk_u16_u8:
4236 case Intrinsic::amdgcn_mqsad_pk_u16_u8:
4237 case Intrinsic::amdgcn_mqsad_u32_u8:
4238 case Intrinsic::amdgcn_cvt_pk_u8_f32:
4239 case Intrinsic::amdgcn_alignbyte:
4240 case Intrinsic::amdgcn_perm:
4241 case Intrinsic::amdgcn_fdot2:
4242 case Intrinsic::amdgcn_sdot2:
4243 case Intrinsic::amdgcn_udot2:
4244 case Intrinsic::amdgcn_sdot4:
4245 case Intrinsic::amdgcn_udot4:
4246 case Intrinsic::amdgcn_sdot8:
4247 case Intrinsic::amdgcn_udot8:
4248 case Intrinsic::amdgcn_fdot2_bf16_bf16:
4249 case Intrinsic::amdgcn_fdot2_f16_f16:
4250 case Intrinsic::amdgcn_fdot2_f32_bf16:
4251 case Intrinsic::amdgcn_sudot4:
4252 case Intrinsic::amdgcn_sudot8:
4253 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16:
4254 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16:
4255 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16:
4256 case Intrinsic::amdgcn_wmma_f32_16x16x16_f16:
4257 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4:
4258 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8:
4260 case Intrinsic::amdgcn_sbfe:
4261 case Intrinsic::amdgcn_ubfe:
4265 case Intrinsic::amdgcn_ds_swizzle:
4266 case Intrinsic::amdgcn_ds_permute:
4267 case Intrinsic::amdgcn_ds_bpermute:
4268 case Intrinsic::amdgcn_update_dpp:
4269 case Intrinsic::amdgcn_mov_dpp8:
4270 case Intrinsic::amdgcn_mov_dpp:
4271 case Intrinsic::amdgcn_strict_wwm:
4272 case Intrinsic::amdgcn_wwm:
4273 case Intrinsic::amdgcn_strict_wqm:
4274 case Intrinsic::amdgcn_wqm:
4275 case Intrinsic::amdgcn_softwqm:
4276 case Intrinsic::amdgcn_set_inactive:
4277 case Intrinsic::amdgcn_permlane64:
4279 case Intrinsic::amdgcn_kernarg_segment_ptr:
4280 case Intrinsic::amdgcn_s_getpc:
4281 case Intrinsic::amdgcn_groupstaticsize:
4282 case Intrinsic::amdgcn_reloc_constant:
4283 case Intrinsic::returnaddress: {
4284 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4285 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4288 case Intrinsic::amdgcn_wqm_vote: {
4289 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4290 OpdsMapping[0] = OpdsMapping[2]
4291 = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size);
4294 case Intrinsic::amdgcn_ps_live: {
4295 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4298 case Intrinsic::amdgcn_div_scale: {
4299 unsigned Dst0Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4300 unsigned Dst1Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4301 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);
4302 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
4304 unsigned SrcSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4305 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4306 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4309 case Intrinsic::amdgcn_class: {
4310 Register Src0Reg =
MI.getOperand(2).getReg();
4311 Register Src1Reg =
MI.getOperand(3).getReg();
4312 unsigned Src0Size =
MRI.getType(Src0Reg).getSizeInBits();
4313 unsigned Src1Size =
MRI.getType(Src1Reg).getSizeInBits();
4314 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4315 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4316 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src0Size);
4317 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src1Size);
4320 case Intrinsic::amdgcn_icmp:
4321 case Intrinsic::amdgcn_fcmp: {
4322 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4324 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4325 unsigned OpSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4326 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4327 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4330 case Intrinsic::amdgcn_readlane: {
4333 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4335 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4338 case Intrinsic::amdgcn_readfirstlane: {
4339 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4340 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4341 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4342 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4345 case Intrinsic::amdgcn_writelane: {
4346 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4348 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
4351 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4353 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4357 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
4358 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4359 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4362 case Intrinsic::amdgcn_if_break: {
4364 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4365 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4366 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4369 case Intrinsic::amdgcn_permlane16:
4370 case Intrinsic::amdgcn_permlanex16: {
4372 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4373 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4374 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4379 case Intrinsic::amdgcn_mfma_f32_4x4x1f32:
4380 case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
4381 case Intrinsic::amdgcn_mfma_i32_4x4x4i8:
4382 case Intrinsic::amdgcn_mfma_f32_4x4x2bf16:
4383 case Intrinsic::amdgcn_mfma_f32_16x16x1f32:
4384 case Intrinsic::amdgcn_mfma_f32_16x16x4f32:
4385 case Intrinsic::amdgcn_mfma_f32_16x16x4f16:
4386 case Intrinsic::amdgcn_mfma_f32_16x16x16f16:
4387 case Intrinsic::amdgcn_mfma_i32_16x16x4i8:
4388 case Intrinsic::amdgcn_mfma_i32_16x16x16i8:
4389 case Intrinsic::amdgcn_mfma_f32_16x16x2bf16:
4390 case Intrinsic::amdgcn_mfma_f32_16x16x8bf16:
4391 case Intrinsic::amdgcn_mfma_f32_32x32x1f32:
4392 case Intrinsic::amdgcn_mfma_f32_32x32x2f32:
4393 case Intrinsic::amdgcn_mfma_f32_32x32x4f16:
4394 case Intrinsic::amdgcn_mfma_f32_32x32x8f16:
4395 case Intrinsic::amdgcn_mfma_i32_32x32x4i8:
4396 case Intrinsic::amdgcn_mfma_i32_32x32x8i8:
4397 case Intrinsic::amdgcn_mfma_f32_32x32x2bf16:
4398 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16:
4399 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16_1k:
4400 case Intrinsic::amdgcn_mfma_f32_16x16x4bf16_1k:
4401 case Intrinsic::amdgcn_mfma_f32_4x4x4bf16_1k:
4402 case Intrinsic::amdgcn_mfma_f32_32x32x8bf16_1k:
4403 case Intrinsic::amdgcn_mfma_f32_16x16x16bf16_1k:
4404 case Intrinsic::amdgcn_mfma_f64_16x16x4f64:
4405 case Intrinsic::amdgcn_mfma_f64_4x4x4f64:
4406 case Intrinsic::amdgcn_mfma_i32_16x16x32_i8:
4407 case Intrinsic::amdgcn_mfma_i32_32x32x16_i8:
4408 case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32:
4409 case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32:
4410 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_bf8:
4411 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_fp8:
4412 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_bf8:
4413 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_fp8:
4414 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_bf8:
4415 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_fp8:
4416 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_bf8:
4417 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_fp8: {
4426 Info->mayNeedAGPRs()
4432 Info->mayNeedAGPRs()
4437 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
4438 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
4439 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
4440 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
4441 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
4442 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
4443 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
4444 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
4445 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
4446 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
4447 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
4448 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
4449 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
4450 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8: {
4459 case Intrinsic::amdgcn_interp_p1:
4460 case Intrinsic::amdgcn_interp_p2:
4461 case Intrinsic::amdgcn_interp_mov:
4462 case Intrinsic::amdgcn_interp_p1_f16:
4463 case Intrinsic::amdgcn_interp_p2_f16:
4464 case Intrinsic::amdgcn_lds_param_load: {
4465 const int M0Idx =
MI.getNumOperands() - 1;
4466 Register M0Reg =
MI.getOperand(M0Idx).getReg();
4468 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4470 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4471 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
4472 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4476 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
4479 case Intrinsic::amdgcn_interp_inreg_p10:
4480 case Intrinsic::amdgcn_interp_inreg_p2:
4481 case Intrinsic::amdgcn_interp_inreg_p10_f16:
4482 case Intrinsic::amdgcn_interp_inreg_p2_f16: {
4483 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4484 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4485 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4486 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4487 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4490 case Intrinsic::amdgcn_ballot: {
4491 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4492 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4493 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4494 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, SrcSize);
4500 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4501 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4502 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4503 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4504 auto IntrID =
MI.getIntrinsicID();
4506 assert(RSrcIntrin &&
"missing RsrcIntrinsic for image intrinsic");
4513 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
4514 unsigned N =
MI.getNumExplicitOperands() - 2;
4515 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 128);
4519 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4522 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4525 for (
unsigned I = 2;
I <
N; ++
I) {
4526 unsigned Size =
MRI.getType(
MI.getOperand(
I).getReg()).getSizeInBits();
4527 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4532 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
4533 auto IntrID =
MI.getIntrinsicID();
4535 case Intrinsic::amdgcn_s_getreg:
4536 case Intrinsic::amdgcn_s_memtime:
4537 case Intrinsic::amdgcn_s_memrealtime:
4538 case Intrinsic::amdgcn_s_get_waveid_in_workgroup:
4539 case Intrinsic::amdgcn_s_sendmsg_rtn: {
4540 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4541 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4544 case Intrinsic::amdgcn_global_atomic_fadd:
4545 case Intrinsic::amdgcn_global_atomic_csub:
4546 case Intrinsic::amdgcn_global_atomic_fmin:
4547 case Intrinsic::amdgcn_global_atomic_fmax:
4548 case Intrinsic::amdgcn_flat_atomic_fadd:
4549 case Intrinsic::amdgcn_flat_atomic_fmin:
4550 case Intrinsic::amdgcn_flat_atomic_fmax:
4551 case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
4552 case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16:
4554 case Intrinsic::amdgcn_ds_ordered_add:
4555 case Intrinsic::amdgcn_ds_ordered_swap:
4556 case Intrinsic::amdgcn_ds_fadd_v2bf16: {
4557 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4558 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4560 AMDGPU::SGPRRegBankID);
4561 OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
4562 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4565 case Intrinsic::amdgcn_ds_append:
4566 case Intrinsic::amdgcn_ds_consume: {
4567 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4568 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4572 case Intrinsic::amdgcn_exp_compr:
4573 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4574 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4576 case Intrinsic::amdgcn_exp:
4578 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4579 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4580 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4581 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4583 case Intrinsic::amdgcn_exp_row:
4584 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4585 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4586 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4587 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4590 case Intrinsic::amdgcn_s_sendmsg:
4591 case Intrinsic::amdgcn_s_sendmsghalt: {
4594 AMDGPU::SGPRRegBankID);
4595 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
4598 case Intrinsic::amdgcn_s_setreg: {
4601 AMDGPU::SGPRRegBankID);
4602 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
4605 case Intrinsic::amdgcn_end_cf: {
4607 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4610 case Intrinsic::amdgcn_else: {
4612 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4613 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
4614 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
4617 case Intrinsic::amdgcn_live_mask: {
4618 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4621 case Intrinsic::amdgcn_wqm_demote:
4622 case Intrinsic::amdgcn_kill: {
4623 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4626 case Intrinsic::amdgcn_raw_buffer_load:
4627 case Intrinsic::amdgcn_raw_tbuffer_load: {
4636 case Intrinsic::amdgcn_raw_buffer_load_lds: {
4643 case Intrinsic::amdgcn_raw_buffer_store:
4644 case Intrinsic::amdgcn_raw_buffer_store_format:
4645 case Intrinsic::amdgcn_raw_tbuffer_store: {
4652 case Intrinsic::amdgcn_struct_buffer_load:
4653 case Intrinsic::amdgcn_struct_tbuffer_load: {
4661 case Intrinsic::amdgcn_struct_buffer_load_lds: {
4669 case Intrinsic::amdgcn_struct_buffer_store:
4670 case Intrinsic::amdgcn_struct_tbuffer_store: {
4678 case Intrinsic::amdgcn_init_exec_from_input: {
4680 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4683 case Intrinsic::amdgcn_ds_gws_init:
4684 case Intrinsic::amdgcn_ds_gws_barrier:
4685 case Intrinsic::amdgcn_ds_gws_sema_br: {
4686 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4690 AMDGPU::SGPRRegBankID);
4691 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
4694 case Intrinsic::amdgcn_ds_gws_sema_v:
4695 case Intrinsic::amdgcn_ds_gws_sema_p:
4696 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
4699 AMDGPU::SGPRRegBankID);
4700 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
4703 case Intrinsic::amdgcn_global_load_lds: {
4708 case Intrinsic::amdgcn_lds_direct_load: {
4709 const int M0Idx =
MI.getNumOperands() - 1;
4710 Register M0Reg =
MI.getOperand(M0Idx).getReg();
4712 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4714 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4715 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
4716 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4720 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
4723 case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
4724 case Intrinsic::amdgcn_ds_sub_gs_reg_rtn:
4728 case Intrinsic::amdgcn_ds_bvh_stack_rtn: {
4747 case AMDGPU::G_SELECT: {
4748 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4750 AMDGPU::SGPRRegBankID);
4752 AMDGPU::SGPRRegBankID);
4753 bool SGPRSrcs = Op2Bank == AMDGPU::SGPRRegBankID &&
4754 Op3Bank == AMDGPU::SGPRRegBankID;
4756 unsigned CondBankDefault = SGPRSrcs ?
4757 AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
4760 if (CondBank == AMDGPU::SGPRRegBankID)
4761 CondBank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
4762 else if (CondBank == AMDGPU::VGPRRegBankID)
4763 CondBank = AMDGPU::VCCRegBankID;
4765 unsigned Bank = SGPRSrcs && CondBank == AMDGPU::SGPRRegBankID ?
4766 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4768 assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SGPRRegBankID);
4772 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
4773 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
4774 OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
4775 OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
4777 OpdsMapping[0] = AMDGPU::getValueMapping(Bank,
Size);
4778 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
4779 OpdsMapping[2] = AMDGPU::getValueMapping(Bank,
Size);
4780 OpdsMapping[3] = AMDGPU::getValueMapping(Bank,
Size);
4786 case AMDGPU::G_SI_CALL: {
4787 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);