84#include "llvm/IR/IntrinsicsAMDGPU.h"
86#define GET_TARGET_REGBANK_IMPL
87#include "AMDGPUGenRegisterBank.inc"
90#include "AMDGPUGenRegisterBankInfo.def"
93using namespace MIPatternMatch;
109 :
B(B), RBI(RBI_),
MRI(MRI_), NewBank(RB) {
110 assert(!B.isObservingChanges());
111 B.setChangeObserver(*
this);
114 ~ApplyRegBankMapping() {
118 B.stopObservingChanges();
123 const unsigned Opc =
MI.getOpcode();
124 if (Opc == AMDGPU::G_ANYEXT || Opc == AMDGPU::G_ZEXT ||
125 Opc == AMDGPU::G_SEXT) {
132 if (SrcBank == &AMDGPU::VCCRegBank) {
136 assert(NewBank == &AMDGPU::VGPRRegBank);
140 B.setInsertPt(*
MI.getParent(),
MI);
142 auto True = B.buildConstant(S32, Opc == AMDGPU::G_SEXT ? -1 : 1);
143 auto False = B.buildConstant(S32, 0);
144 B.buildSelect(DstReg, SrcReg, True, False);
145 MRI.setRegBank(True.getReg(0), *NewBank);
146 MRI.setRegBank(False.getReg(0), *NewBank);
147 MI.eraseFromParent();
150 assert(!
MRI.getRegClassOrRegBank(DstReg));
151 MRI.setRegBank(DstReg, *NewBank);
156 if (Opc == AMDGPU::G_TRUNC) {
159 assert(DstBank != &AMDGPU::VCCRegBank);
169 if (Reg.isPhysical() ||
MRI.getRegClassOrRegBank(Reg))
174 assert(NewBank == &AMDGPU::VGPRRegBank &&
175 "s1 operands should only be used for vector bools");
176 assert((
MI.getOpcode() != AMDGPU::G_TRUNC &&
177 MI.getOpcode() != AMDGPU::G_ANYEXT) &&
178 "not expecting legalization artifacts here");
179 RB = &AMDGPU::VCCRegBank;
182 MRI.setRegBank(Reg, *RB);
205 : Subtarget(ST),
TRI(Subtarget.getRegisterInfo()),
206 TII(Subtarget.getInstrInfo()) {
211 static auto InitializeRegisterBankOnce = [
this]() {
213 &
getRegBank(AMDGPU::VGPRRegBankID) == &AMDGPU::VGPRRegBank &&
214 &
getRegBank(AMDGPU::AGPRRegBankID) == &AMDGPU::AGPRRegBank);
218 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
222 unsigned BankID = Bank.
getID();
223 return BankID == AMDGPU::VGPRRegBankID || BankID == AMDGPU::AGPRRegBankID;
227 return RB != &AMDGPU::SGPRRegBank;
232 unsigned Size)
const {
234 if (Dst.getID() == AMDGPU::SGPRRegBankID &&
236 return std::numeric_limits<unsigned>::max();
247 (Dst.getID() == AMDGPU::SGPRRegBankID) &&
249 Src.getID() == AMDGPU::SGPRRegBankID ||
250 Src.getID() == AMDGPU::VCCRegBankID))
251 return std::numeric_limits<unsigned>::max();
254 if (Dst.getID() == AMDGPU::AGPRRegBankID &&
255 Src.getID() == AMDGPU::AGPRRegBankID)
289 if (&RC == &AMDGPU::SReg_1RegClass)
290 return AMDGPU::VCCRegBank;
299 return AMDGPU::SGPRRegBank;
301 return Ty ==
LLT::scalar(1) ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
304 return TRI->
isAGPRClass(&RC) ? AMDGPU::AGPRRegBank : AMDGPU::VGPRRegBank;
307template <
unsigned NumOps>
311 const std::array<unsigned, NumOps> RegSrcOpIdx,
318 unsigned Sizes[NumOps];
319 for (
unsigned I = 0;
I < NumOps; ++
I) {
320 Register Reg =
MI.getOperand(RegSrcOpIdx[
I]).getReg();
324 for (
unsigned I = 0,
E =
MI.getNumExplicitDefs();
I !=
E; ++
I) {
326 Operands[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
330 unsigned MappingID = 2;
331 for (
const auto &Entry : Table) {
332 for (
unsigned I = 0;
I < NumOps; ++
I) {
333 int OpIdx = RegSrcOpIdx[
I];
334 Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[
I],
Sizes[
I]);
349 case Intrinsic::amdgcn_readlane: {
352 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
355 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
358 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
359 return addMappingFromTable<3>(
MI,
MRI, RegSrcOpIdx, Table);
361 case Intrinsic::amdgcn_writelane: {
364 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
367 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
370 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
373 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
377 const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };
378 return addMappingFromTable<4>(
MI,
MRI, RegSrcOpIdx, Table);
390 case Intrinsic::amdgcn_s_buffer_load: {
393 { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
396 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },
399 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
402 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }
406 const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
407 return addMappingFromTable<2>(
MI,
MRI, RegSrcOpIdx, Table);
409 case Intrinsic::amdgcn_ds_ordered_add:
410 case Intrinsic::amdgcn_ds_ordered_swap: {
414 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
417 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
420 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
421 return addMappingFromTable<3>(
MI,
MRI, RegSrcOpIdx, Table);
423 case Intrinsic::amdgcn_s_sendmsg:
424 case Intrinsic::amdgcn_s_sendmsghalt: {
428 { { AMDGPU::SGPRRegBankID }, 1 },
431 { { AMDGPU::VGPRRegBankID }, 3 }
434 const std::array<unsigned, 1> RegSrcOpIdx = { { 2 } };
435 return addMappingFromTable<1>(
MI,
MRI, RegSrcOpIdx, Table);
445 if (!
MI.hasOneMemOperand())
473 switch (
MI.getOpcode()) {
474 case TargetOpcode::G_CONSTANT:
475 case TargetOpcode::G_IMPLICIT_DEF: {
479 { { AMDGPU::VGPRRegBankID }, 1 },
480 { { AMDGPU::SGPRRegBankID }, 1 },
481 { { AMDGPU::VCCRegBankID }, 1 }
484 return addMappingFromTable<1>(
MI,
MRI, {{ 0 }}, Table);
489 case TargetOpcode::G_FCONSTANT:
490 case TargetOpcode::G_FRAME_INDEX:
491 case TargetOpcode::G_GLOBAL_VALUE: {
493 { { AMDGPU::VGPRRegBankID }, 1 },
494 { { AMDGPU::SGPRRegBankID }, 1 }
497 return addMappingFromTable<1>(
MI,
MRI, {{ 0 }}, Table);
499 case TargetOpcode::G_AND:
500 case TargetOpcode::G_OR:
501 case TargetOpcode::G_XOR: {
508 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
509 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
510 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32)}),
516 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size),
517 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size),
518 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size)}),
529 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
530 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
531 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size)}),
537 {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
538 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
539 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size)}),
544 case TargetOpcode::G_LOAD:
545 case TargetOpcode::G_ZEXTLOAD:
546 case TargetOpcode::G_SEXTLOAD: {
548 LLT PtrTy =
MRI.getType(
MI.getOperand(1).getReg());
557 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
558 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize)}),
566 {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
567 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}),
580 case TargetOpcode::G_SELECT: {
584 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
585 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
586 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size)}),
592 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
593 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
594 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size)}),
600 case TargetOpcode::G_UADDE:
601 case TargetOpcode::G_USUBE:
602 case TargetOpcode::G_SADDE:
603 case TargetOpcode::G_SSUBE: {
607 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
608 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
609 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
610 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
611 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1)}),
617 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
618 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
619 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
620 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
625 case AMDGPU::G_BRCOND: {
626 assert(
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() == 1);
631 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
nullptr}),
637 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
nullptr }),
642 case AMDGPU::G_INTRINSIC:
643 case AMDGPU::G_INTRINSIC_CONVERGENT:
645 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
646 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
661 Register LoLHS =
MRI->createGenericVirtualRegister(HalfTy);
662 Register HiLHS =
MRI->createGenericVirtualRegister(HalfTy);
664 MRI->setRegBank(LoLHS, *Bank);
665 MRI->setRegBank(HiLHS, *Bank);
670 B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
681 MRI.setType(Reg, NewTy);
701 LLT Ty =
MRI.getType(Src);
704 if (Bank == &AMDGPU::SGPRRegBank)
710 if (Bank != &AMDGPU::VGPRRegBank) {
712 Src =
B.buildCopy(Ty, Src).getReg(0);
713 MRI.setRegBank(Src, AMDGPU::VGPRRegBank);
717 unsigned NumParts = Bits / 32;
724 auto Unmerge =
B.buildUnmerge(S32, Src);
725 for (
unsigned i = 0; i < NumParts; ++i)
729 for (
unsigned i = 0; i < NumParts; ++i) {
731 Register DstPart =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
732 MRI.setType(DstPart, NumParts == 1 ? Ty : S32);
737 assert(Constrained &&
"Failed to constrain readfirstlane src reg");
739 B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart});
747 Register Dst =
B.buildMergeLikeInstr(Ty, DstParts).getReg(0);
748 MRI.setRegBank(Dst, AMDGPU::SGPRRegBank);
781 const unsigned MovExecOpc =
783 const unsigned MovExecTermOpc =
787 AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
789 AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
791 AMDGPU::EXEC_LO : AMDGPU::EXEC;
794 const int OrigRangeSize = std::distance(Range.begin(), Range.end());
798 Register SaveExecReg =
MRI.createVirtualRegister(WaveRC);
799 Register InitSaveExecReg =
MRI.createVirtualRegister(WaveRC);
802 B.buildInstr(TargetOpcode::IMPLICIT_DEF)
803 .addDef(InitSaveExecReg);
805 Register PhiExec =
MRI.createVirtualRegister(WaveRC);
806 Register NewExec =
MRI.createVirtualRegister(WaveRC);
832 B.setInsertPt(*LoopBB, LoopBB->
end());
834 B.buildInstr(TargetOpcode::PHI)
836 .addReg(InitSaveExecReg)
851 auto NewEnd = BodyBB->
end();
858 assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);
863 if (!SGPROperandRegs.
count(OldReg))
868 auto OldVal = WaterfalledRegMap.
find(OldReg);
869 if (OldVal != WaterfalledRegMap.
end()) {
870 Op.setReg(OldVal->second);
875 LLT OpTy =
MRI.getType(OpReg);
878 if (OpBank != &AMDGPU::VGPRRegBank) {
881 OpReg =
B.buildCopy(OpTy, OpReg).getReg(0);
882 MRI.setRegBank(OpReg, AMDGPU::VGPRRegBank);
890 bool Is64 = OpSize % 64 == 0;
891 unsigned PartSize = Is64 ? 64 : 32;
893 unsigned NumParts = OpSize / PartSize;
899 CurrentLaneParts.
push_back(CurrentLaneReg);
901 auto UnmergeOp =
B.buildUnmerge(PartTy, OpReg);
902 auto UnmergeCurrentLane =
B.buildUnmerge(PartTy, CurrentLaneReg);
903 for (
unsigned i = 0; i < NumParts; ++i) {
905 CurrentLaneParts.
push_back(UnmergeCurrentLane.getReg(i));
906 MRI.setRegBank(OpParts[i], AMDGPU::VGPRRegBank);
907 MRI.setRegBank(CurrentLaneParts[i], AMDGPU::SGPRRegBank);
911 for (
unsigned i = 0; i < NumParts; ++i) {
913 OpParts[i]).getReg(0);
914 MRI.setRegBank(CmpReg, AMDGPU::VCCRegBank);
919 CondReg =
B.buildAnd(S1, CondReg, CmpReg).getReg(0);
920 MRI.setRegBank(CondReg, AMDGPU::VCCRegBank);
924 Op.setReg(CurrentLaneReg);
927 WaterfalledRegMap.
insert(std::pair(OldReg,
Op.getReg()));
932 CondReg =
B.buildIntrinsic(Intrinsic::amdgcn_ballot,
936 MRI.setRegClass(CondReg, WaveRC);
939 B.buildInstr(AndSaveExecOpc)
943 MRI.setSimpleHint(NewExec, CondReg);
945 B.setInsertPt(*BodyBB, BodyBB->
end());
948 B.buildInstr(XorTermOpc)
957 B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
964 B.setMBB(*RestoreExecBB);
965 B.buildInstr(MovExecTermOpc)
967 .addReg(SaveExecReg);
971 B.setInsertPt(*RemainderBB, RemainderBB->
begin());
983 for (
unsigned Op : OpIndices) {
987 if (OpBank->
getID() != AMDGPU::SGPRRegBankID)
988 SGPROperandRegs.
insert(Reg);
992 return !SGPROperandRegs.
empty();
1012 Register Reg =
MI.getOperand(OpIdx).getReg();
1015 if (Bank == &AMDGPU::SGPRRegBank)
1019 MI.getOperand(OpIdx).setReg(Reg);
1031 assert(FirstSize % EltSize == 0);
1033 unsigned FirstPartNumElts = FirstSize / EltSize;
1034 unsigned RemainderElts = (TotalSize - FirstSize) / EltSize;
1055 const LLT LoadTy =
MRI.getType(DstReg);
1057 const unsigned MaxNonSmrdLoadSize = 128;
1061 if (DstBank == &AMDGPU::SGPRRegBank) {
1064 if (LoadSize != 32 && LoadSize != 96)
1068 const unsigned MemSize = 8 * MMO->
getSize();
1073 if (LoadSize == 32 &&
1079 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
1081 if (LoadSize == 32) {
1085 if (
MI.getOpcode() == AMDGPU::G_SEXTLOAD) {
1087 auto WideLoad =
B.buildLoadFromOffset(S32, PtrReg, *MMO, 0);
1088 B.buildSExtInReg(
MI.getOperand(0), WideLoad, MemSize);
1089 }
else if (
MI.getOpcode() == AMDGPU::G_ZEXTLOAD) {
1091 auto WideLoad =
B.buildLoadFromOffset(S32, PtrReg, *MMO, 0);
1092 B.buildZExtInReg(
MI.getOperand(0), WideLoad, MemSize);
1095 B.buildLoadFromOffset(
MI.getOperand(0), PtrReg, *MMO, 0);
1109 auto WideLoad =
B.buildLoadFromOffset(WiderTy, PtrReg, *MMO, 0);
1111 B.buildTrunc(
MI.getOperand(0), WideLoad);
1113 B.buildDeleteTrailingVectorElements(
MI.getOperand(0).getReg(),
1119 MI.eraseFromParent();
1124 if (LoadSize <= MaxNonSmrdLoadSize)
1130 if (SrcRegs.
empty())
1133 assert(LoadSize % MaxNonSmrdLoadSize == 0);
1138 LLT PtrTy =
MRI.getType(
MI.getOperand(1).getReg());
1139 MRI.setType(BasePtrReg, PtrTy);
1141 unsigned NumSplitParts = LoadTy.
getSizeInBits() / MaxNonSmrdLoadSize;
1142 const LLT LoadSplitTy = LoadTy.
divide(NumSplitParts);
1143 ApplyRegBankMapping O(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
1154 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
1165 const auto &TFI = *ST.getFrameLowering();
1173 Register AllocSize =
MI.getOperand(1).getReg();
1179 if (SizeBank != &AMDGPU::SGPRRegBank)
1182 LLT PtrTy =
MRI.getType(Dst);
1187 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
1189 auto WaveSize =
B.buildConstant(
LLT::scalar(32), ST.getWavefrontSizeLog2());
1190 auto ScaledSize =
B.buildShl(IntPtrTy, AllocSize, WaveSize);
1192 auto SPCopy =
B.buildCopy(PtrTy, SPReg);
1193 if (Alignment > TFI.getStackAlign()) {
1194 auto PtrAdd =
B.buildPtrAdd(PtrTy, SPCopy, ScaledSize);
1195 B.buildMaskLowPtrBits(Dst, PtrAdd,
1196 Log2(Alignment) + ST.getWavefrontSizeLog2());
1198 B.buildPtrAdd(Dst, SPCopy, ScaledSize);
1201 MI.eraseFromParent();
1208 int RsrcIdx)
const {
1209 const int NumDefs =
MI.getNumExplicitDefs();
1213 RsrcIdx += NumDefs + 1;
1220 for (
int I = NumDefs, NumOps =
MI.getNumOperands();
I != NumOps; ++
I) {
1221 if (!
MI.getOperand(
I).isReg())
1225 if (
I == RsrcIdx ||
I == RsrcIdx + 1)
1237 Register &SOffsetReg, int64_t &InstOffsetVal,
Align Alignment)
const {
1241 if (std::optional<int64_t> Imm =
1245 VOffsetReg =
B.buildConstant(S32, 0).getReg(0);
1246 SOffsetReg =
B.buildConstant(S32, SOffset).getReg(0);
1247 InstOffsetVal = ImmOffset;
1249 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1250 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1251 return SOffset + ImmOffset;
1266 SOffsetReg =
B.buildConstant(S32, SOffset).getReg(0);
1267 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1268 InstOffsetVal = ImmOffset;
1274 VOffsetReg =
B.buildConstant(S32, 0).getReg(0);
1275 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1277 InstOffsetVal = ImmOffset;
1291 if (Src0Bank == &AMDGPU::VGPRRegBank && Src1Bank == &AMDGPU::SGPRRegBank) {
1297 if (Src0Bank == &AMDGPU::SGPRRegBank && Src1Bank == &AMDGPU::VGPRRegBank) {
1307 VOffsetReg = CombinedOffset;
1309 VOffsetReg =
B.buildCopy(S32, CombinedOffset).getReg(0);
1310 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1313 SOffsetReg =
B.buildConstant(S32, 0).getReg(0);
1314 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1325 LLT Ty =
MRI.getType(Dst);
1331 if (RSrcBank == &AMDGPU::SGPRRegBank &&
1332 OffsetBank == &AMDGPU::SGPRRegBank)
1340 if (LoadSize == 256 || LoadSize == 512) {
1341 NumLoads = LoadSize / 128;
1342 Ty = Ty.
divide(NumLoads);
1347 const Align Alignment = NumLoads > 1 ?
Align(16 * NumLoads) :
Align(1);
1353 int64_t ImmOffset = 0;
1356 SOffset, ImmOffset, Alignment);
1361 const Align MemAlign(4);
1374 Register VIndex =
B.buildConstant(S32, 0).getReg(0);
1375 B.getMRI()->setRegBank(VIndex, AMDGPU::VGPRRegBank);
1382 for (
int i = 0; i < NumLoads; ++i) {
1383 if (NumLoads == 1) {
1386 LoadParts[i] =
MRI.createGenericVirtualRegister(Ty);
1387 MRI.setRegBank(LoadParts[i], AMDGPU::VGPRRegBank);
1394 B.buildInstr(AMDGPU::G_AMDGPU_BUFFER_LOAD)
1395 .addDef(LoadParts[i])
1400 .addImm(ImmOffset + 16 * i)
1403 .addMemOperand(MMO);
1409 if (RSrcBank != &AMDGPU::SGPRRegBank) {
1412 B.setInstr(*Span.
begin());
1413 MI.eraseFromParent();
1417 OpsToWaterfall.
insert(RSrc);
1422 if (NumLoads != 1) {
1424 B.buildConcatVectors(Dst, LoadParts);
1426 B.buildMergeLikeInstr(Dst, LoadParts);
1430 if (RSrcBank == &AMDGPU::SGPRRegBank)
1431 MI.eraseFromParent();
1446 LLT Ty =
MRI.getType(DstReg);
1450 unsigned FirstOpnd = isa<GIntrinsic>(
MI) ? 2 : 1;
1451 Register SrcReg =
MI.getOperand(FirstOpnd).getReg();
1452 Register OffsetReg =
MI.getOperand(FirstOpnd + 1).getReg();
1453 Register WidthReg =
MI.getOperand(FirstOpnd + 2).getReg();
1457 if (DstBank == &AMDGPU::VGPRRegBank) {
1463 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
1467 auto ShiftOffset =
Signed ?
B.buildAShr(S64, SrcReg, OffsetReg)
1468 :
B.buildLShr(S64, SrcReg, OffsetReg);
1469 auto UnmergeSOffset =
B.buildUnmerge({S32, S32}, ShiftOffset);
1476 auto Zero =
B.buildConstant(S32, 0);
1477 auto WidthImm = ConstWidth->Value.getZExtValue();
1478 if (WidthImm <= 32) {
1482 Signed ?
B.buildSbfx(S32, UnmergeSOffset.getReg(0), Zero, WidthReg)
1483 :
B.buildUbfx(S32, UnmergeSOffset.getReg(0), Zero, WidthReg);
1485 Signed ?
B.buildAShr(S32, Extract,
B.buildConstant(S32, 31)) : Zero;
1486 B.buildMergeLikeInstr(DstReg, {Extract, Extend});
1490 auto UpperWidth =
B.buildConstant(S32, WidthImm - 32);
1493 ?
B.buildSbfx(S32, UnmergeSOffset.getReg(1), Zero, UpperWidth)
1494 :
B.buildUbfx(S32, UnmergeSOffset.getReg(1), Zero, UpperWidth);
1495 B.buildMergeLikeInstr(DstReg, {UnmergeSOffset.getReg(0), Extract});
1497 MI.eraseFromParent();
1503 auto ExtShift =
B.buildSub(S32,
B.buildConstant(S32, 64), WidthReg);
1504 auto SignBit =
B.buildShl(S64, ShiftOffset, ExtShift);
1506 B.buildAShr(S64, SignBit, ExtShift);
1508 B.buildLShr(S64, SignBit, ExtShift);
1509 MI.eraseFromParent();
1515 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
1518 auto OffsetMask =
B.buildConstant(S32, maskTrailingOnes<unsigned>(6));
1519 auto ClampOffset =
B.buildAnd(S32, OffsetReg, OffsetMask);
1522 auto ShiftWidth =
B.buildShl(S32, WidthReg,
B.buildConstant(S32, 16));
1527 auto MergedInputs =
B.buildOr(S32, ClampOffset, ShiftWidth);
1531 unsigned Opc = Ty == S32 ? (
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
1532 (
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1534 auto MIB =
B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs});
1538 MI.eraseFromParent();
1556 if (
MRI.getRegBankOrNull(Src0) == &AMDGPU::VGPRRegBank)
1559 bool IsUnsigned =
MI.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
1563 bool DstOnValu =
MRI.getRegBankOrNull(Src2) == &AMDGPU::VGPRRegBank;
1564 bool Accumulate =
true;
1573 Register DstLo =
B.buildMul(S32, Src0, Src1).getReg(0);
1574 bool MulHiInVgpr =
false;
1576 MRI.setRegBank(DstLo, AMDGPU::SGPRRegBank);
1579 DstHi = IsUnsigned ?
B.buildUMulH(S32, Src0, Src1).getReg(0)
1580 :
B.buildSMulH(S32, Src0, Src1).getReg(0);
1581 MRI.setRegBank(DstHi, AMDGPU::SGPRRegBank);
1583 Register VSrc0 =
B.buildCopy(S32, Src0).getReg(0);
1584 Register VSrc1 =
B.buildCopy(S32, Src1).getReg(0);
1586 MRI.setRegBank(VSrc0, AMDGPU::VGPRRegBank);
1587 MRI.setRegBank(VSrc1, AMDGPU::VGPRRegBank);
1589 DstHi = IsUnsigned ?
B.buildUMulH(S32, VSrc0, VSrc1).getReg(0)
1590 :
B.buildSMulH(S32, VSrc0, VSrc1).getReg(0);
1591 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1607 LLT CarryType = DstOnValu ? S1 : S32;
1609 DstOnValu ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
1611 DstOnValu ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank;
1616 Zero =
B.buildConstant(S32, 0).getReg(0);
1617 MRI.setRegBank(Zero,
1618 MulHiInVgpr ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank);
1622 MRI.setRegBank(Carry, MulHiInVgpr ? AMDGPU::VCCRegBank
1623 : AMDGPU::SGPRRegBank);
1625 if (DstOnValu && !MulHiInVgpr) {
1626 Carry =
B.buildTrunc(S1, Carry).getReg(0);
1627 MRI.setRegBank(Carry, AMDGPU::VCCRegBank);
1633 DstLo =
B.buildCopy(S32, DstLo).getReg(0);
1634 DstHi =
B.buildCopy(S32, DstHi).getReg(0);
1635 MRI.setRegBank(DstLo, AMDGPU::VGPRRegBank);
1636 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1639 auto Unmerge =
B.buildUnmerge(S32, Src2);
1640 Register Src2Lo = Unmerge.getReg(0);
1641 Register Src2Hi = Unmerge.getReg(1);
1642 MRI.setRegBank(Src2Lo, DstBank);
1643 MRI.setRegBank(Src2Hi, DstBank);
1647 MRI.setRegBank(Src2Sign.getReg(0), CarryBank);
1649 Carry =
B.buildXor(CarryType, Carry, Src2Sign).getReg(0);
1650 MRI.setRegBank(Carry, CarryBank);
1653 auto AddLo =
B.buildUAddo(S32, CarryType, DstLo, Src2Lo);
1654 DstLo = AddLo.getReg(0);
1655 Register CarryLo = AddLo.getReg(1);
1656 MRI.setRegBank(DstLo, DstBank);
1657 MRI.setRegBank(CarryLo, CarryBank);
1659 auto AddHi =
B.buildUAdde(S32, CarryType, DstHi, Src2Hi, CarryLo);
1660 DstHi = AddHi.getReg(0);
1661 MRI.setRegBank(DstHi, DstBank);
1663 Register CarryHi = AddHi.getReg(1);
1664 MRI.setRegBank(CarryHi, CarryBank);
1669 Carry =
B.buildXor(CarryType, Carry, CarryHi).getReg(0);
1670 MRI.setRegBank(Carry, CarryBank);
1674 Carry =
B.buildConstant(CarryType, 0).getReg(0);
1675 MRI.setRegBank(Carry, CarryBank);
1679 B.buildMergeLikeInstr(Dst0, {DstLo, DstHi});
1682 B.buildCopy(Dst1, Carry);
1684 B.buildTrunc(Dst1, Carry);
1687 MI.eraseFromParent();
1694 case TargetOpcode::G_ASHR:
1695 case TargetOpcode::G_SMIN:
1696 case TargetOpcode::G_SMAX:
1697 return TargetOpcode::G_SEXT;
1698 case TargetOpcode::G_LSHR:
1699 case TargetOpcode::G_UMIN:
1700 case TargetOpcode::G_UMAX:
1701 return TargetOpcode::G_ZEXT;
1703 return TargetOpcode::G_ANYEXT;
1709static std::pair<Register, Register>
1712 auto Bitcast =
B.buildBitcast(S32, Src);
1714 if (ExtOpcode == TargetOpcode::G_SEXT) {
1715 auto ExtLo =
B.buildSExtInReg(S32, Bitcast, 16);
1716 auto ShiftHi =
B.buildAShr(S32, Bitcast,
B.buildConstant(S32, 16));
1717 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1720 auto ShiftHi =
B.buildLShr(S32, Bitcast,
B.buildConstant(S32, 16));
1721 if (ExtOpcode == TargetOpcode::G_ZEXT) {
1722 auto ExtLo =
B.buildAnd(S32, Bitcast,
B.buildConstant(S32, 0xffff));
1723 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1726 assert(ExtOpcode == TargetOpcode::G_ANYEXT);
1727 return std::pair(Bitcast.getReg(0), ShiftHi.getReg(0));
1735 if (!SrcReg.
empty()) {
1752 LLT StoreVT =
MRI.getType(Reg);
1756 auto Unmerge =
B.buildUnmerge(S16, Reg);
1760 for (
int I = 0,
E = Unmerge->getNumOperands() - 1;
I !=
E; ++
I)
1770static std::pair<Register, unsigned>
1774 return std::pair(
Register(), Const);
1778 return std::pair(
Base, Const);
1781 return std::pair(Reg, 0);
1784std::pair<Register, unsigned>
1797 if (ImmOffset != 0) {
1806 unsigned Overflow = ImmOffset & ~MaxImm;
1807 ImmOffset -= Overflow;
1808 if ((int32_t)Overflow < 0) {
1809 Overflow += ImmOffset;
1814 if (Overflow != 0) {
1816 BaseReg =
B.buildConstant(S32, Overflow).getReg(0);
1818 auto OverflowVal =
B.buildConstant(S32, Overflow);
1819 BaseReg =
B.buildAdd(S32, BaseReg, OverflowVal).getReg(0);
1825 BaseReg =
B.buildConstant(S32, 0).getReg(0);
1827 return {BaseReg, C1};
1833 LLT SrcTy =
MRI.getType(SrcReg);
1836 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1843 Register TmpReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1844 Register TmpReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1846 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1848 .addUse(SrcReg, 0, AMDGPU::sub0);
1849 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1851 .addUse(SrcReg, 0, AMDGPU::sub1);
1852 B.buildInstr(AMDGPU::REG_SEQUENCE)
1855 .addImm(AMDGPU::sub0)
1857 .addImm(AMDGPU::sub1);
1868 unsigned ConstOffset) {
1874 auto MaterializedOffset =
B.buildConstant(S32, ConstOffset);
1876 auto Add =
B.buildAdd(S32, WaterfallIdx, MaterializedOffset);
1877 MRI.setRegBank(MaterializedOffset.getReg(0), AMDGPU::SGPRRegBank);
1878 MRI.setRegBank(
Add.getReg(0), AMDGPU::SGPRRegBank);
1890 bool IsBooleanSrc =
false) {
1891 if (ExtOpc == AMDGPU::G_ZEXT) {
1892 B.buildConstant(Hi32Reg, 0);
1893 }
else if (ExtOpc == AMDGPU::G_SEXT) {
1897 B.buildCopy(Hi32Reg, Lo32Reg);
1901 B.getMRI()->setRegBank(ShiftAmt.getReg(0), RegBank);
1902 B.buildAShr(Hi32Reg, Lo32Reg, ShiftAmt);
1905 assert(ExtOpc == AMDGPU::G_ANYEXT &&
"not an integer extension");
1906 B.buildUndef(Hi32Reg);
1910bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect(
1912 const OperandsMapper &OpdMapper)
const {
1919 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
1921 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
1923 LLT VecTy =
MRI.getType(VecReg);
1934 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
1936 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
1939 (DstBank == AMDGPU::SGPRRegBank &&
1940 SrcBank == AMDGPU::SGPRRegBank &&
1941 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
1942 : AMDGPU::VCCRegBank;
1943 LLT CCTy = (CCBank == AMDGPU::SGPRRegBank) ? S32 :
LLT::scalar(1);
1945 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
1946 Idx =
B.buildCopy(S32,
Idx)->getOperand(0).getReg();
1947 MRI.setRegBank(
Idx, AMDGPU::VGPRRegBank);
1952 unsigned NumLanes = DstRegs.size();
1956 EltTy =
MRI.getType(DstRegs[0]);
1958 auto UnmergeToEltTy =
B.buildUnmerge(EltTy, VecReg);
1960 for (
unsigned L = 0;
L < NumLanes; ++
L)
1961 Res[L] = UnmergeToEltTy.getReg(L);
1963 for (
unsigned I = 1;
I < NumElem; ++
I) {
1964 auto IC =
B.buildConstant(S32,
I);
1965 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
1967 MRI.setRegBank(
Cmp->getOperand(0).getReg(), CCBank);
1969 for (
unsigned L = 0;
L < NumLanes; ++
L) {
1970 auto S =
B.buildSelect(EltTy, Cmp,
1971 UnmergeToEltTy.getReg(
I * NumLanes + L), Res[L]);
1973 for (
unsigned N : { 0, 2, 3 })
1974 MRI.setRegBank(S->getOperand(
N).getReg(), DstBank);
1976 Res[
L] = S->getOperand(0).getReg();
1980 for (
unsigned L = 0;
L < NumLanes; ++
L) {
1981 Register DstReg = (NumLanes == 1) ?
MI.getOperand(0).getReg() : DstRegs[
L];
1982 B.buildCopy(DstReg, Res[L]);
1983 MRI.setRegBank(DstReg, DstBank);
1986 MRI.setRegBank(
MI.getOperand(0).getReg(), DstBank);
1987 MI.eraseFromParent();
1998 if (CurrBank && *CurrBank != Bank) {
1999 Register Copy =
B.buildCopy(
MRI.getType(Reg), Reg).getReg(0);
2000 MRI.setRegBank(Copy, Bank);
2004 MRI.setRegBank(Reg, Bank);
2008bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect(
2010 const OperandsMapper &OpdMapper)
const {
2017 *OpdMapper.getInstrMapping().getOperandMapping(3).BreakDown[0].RegBank;
2019 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
2021 LLT VecTy =
MRI.getType(VecReg);
2032 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
2034 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
2036 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
2039 (DstBank == AMDGPU::SGPRRegBank &&
2040 SrcBank == AMDGPU::SGPRRegBank &&
2041 InsBank == AMDGPU::SGPRRegBank &&
2042 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
2043 : AMDGPU::VCCRegBank;
2044 LLT CCTy = (CCBank == AMDGPU::SGPRRegBank) ? S32 :
LLT::scalar(1);
2046 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
2047 Idx =
B.buildCopy(S32,
Idx)->getOperand(0).getReg();
2048 MRI.setRegBank(
Idx, AMDGPU::VGPRRegBank);
2053 unsigned NumLanes = InsRegs.size();
2056 InsRegs.push_back(
MI.getOperand(2).getReg());
2058 EltTy =
MRI.getType(InsRegs[0]);
2061 auto UnmergeToEltTy =
B.buildUnmerge(EltTy, VecReg);
2064 for (
unsigned I = 0;
I < NumElem; ++
I) {
2065 auto IC =
B.buildConstant(S32,
I);
2066 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
2068 MRI.setRegBank(
Cmp->getOperand(0).getReg(), CCBank);
2070 for (
unsigned L = 0;
L < NumLanes; ++
L) {
2072 Register Op1 = UnmergeToEltTy.getReg(
I * NumLanes + L);
2083 if (MergeTy ==
MRI.getType(
MI.getOperand(0).getReg())) {
2084 B.buildBuildVector(
MI.getOperand(0), Ops);
2086 auto Vec =
B.buildBuildVector(MergeTy, Ops);
2087 MRI.setRegBank(Vec->getOperand(0).getReg(), DstBank);
2088 B.buildBitcast(
MI.getOperand(0).getReg(), Vec);
2091 MRI.setRegBank(
MI.getOperand(0).getReg(), DstBank);
2092 MI.eraseFromParent();
2100 B.setInstrAndDebugLoc(
MI);
2101 unsigned Opc =
MI.getOpcode();
2104 case AMDGPU::G_CONSTANT:
2105 case AMDGPU::G_IMPLICIT_DEF: {
2107 LLT DstTy =
MRI.getType(DstReg);
2113 if (DstBank == &AMDGPU::VCCRegBank)
2116 if (DefRegs.
empty())
2119 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2122 LLVMContext &Ctx =
B.getMF().getFunction().getContext();
2124 MI.getOperand(0).setReg(NewDstReg);
2125 if (Opc != AMDGPU::G_IMPLICIT_DEF) {
2126 uint64_t ConstVal =
MI.getOperand(1).getCImm()->getZExtValue();
2127 MI.getOperand(1).setCImm(
2131 MRI.setRegBank(NewDstReg, *DstBank);
2132 B.buildTrunc(DefRegs[0], NewDstReg);
2135 case AMDGPU::G_PHI: {
2137 LLT DstTy =
MRI.getType(DstReg);
2144 if (DstBank == &AMDGPU::VCCRegBank) {
2151 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E;
I += 2) {
2155 if (SrcBank != &AMDGPU::VCCRegBank) {
2160 MRI.setRegBank(Copy.getReg(0), AMDGPU::VCCRegBank);
2161 MI.getOperand(
I).setReg(Copy.getReg(0));
2172 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
2173 B.setInsertPt(
B.getMBB(),
MI);
2181 case AMDGPU::G_FCMP:
2185 case AMDGPU::G_ICMP:
2186 case AMDGPU::G_UADDO:
2187 case AMDGPU::G_USUBO:
2188 case AMDGPU::G_UADDE:
2189 case AMDGPU::G_SADDE:
2190 case AMDGPU::G_USUBE:
2191 case AMDGPU::G_SSUBE: {
2192 unsigned BoolDstOp =
2193 (Opc == AMDGPU::G_ICMP || Opc == AMDGPU::G_FCMP) ? 0 : 1;
2194 Register DstReg =
MI.getOperand(BoolDstOp).getReg();
2198 if (DstBank != &AMDGPU::SGPRRegBank)
2201 const bool HasCarryIn =
MI.getNumOperands() == 5;
2206 Register NewDstReg =
MRI.createGenericVirtualRegister(S32);
2207 MRI.setRegBank(NewDstReg, AMDGPU::SGPRRegBank);
2208 MI.getOperand(BoolDstOp).setReg(NewDstReg);
2211 Register NewSrcReg =
MRI.createGenericVirtualRegister(S32);
2212 MRI.setRegBank(NewSrcReg, AMDGPU::SGPRRegBank);
2213 B.buildZExt(NewSrcReg,
MI.getOperand(4).getReg());
2214 MI.getOperand(4).setReg(NewSrcReg);
2218 B.setInsertPt(*
MBB, std::next(
MI.getIterator()));
2223 if (DefRegs.
empty())
2225 B.buildTrunc(DefRegs[0], NewDstReg);
2228 case AMDGPU::G_SELECT: {
2230 LLT DstTy =
MRI.getType(DstReg);
2233 if (CondRegs.
empty())
2240 if (CondBank == &AMDGPU::SGPRRegBank) {
2242 Register NewCondReg =
MRI.createGenericVirtualRegister(S32);
2243 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2245 MI.getOperand(1).setReg(NewCondReg);
2246 B.buildZExt(NewCondReg, CondRegs[0]);
2259 if (DefRegs.
empty()) {
2264 if (Src1Regs.
empty())
2270 if (Src2Regs.
empty())
2277 B.buildSelect(DefRegs[0], CondRegs[0], Src1Regs[0], Src2Regs[0]);
2278 B.buildSelect(DefRegs[1], CondRegs[0], Src1Regs[1], Src2Regs[1]);
2280 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2281 MI.eraseFromParent();
2284 case AMDGPU::G_BRCOND: {
2285 Register CondReg =
MI.getOperand(0).getReg();
2290 if (CondBank == &AMDGPU::SGPRRegBank) {
2292 Register NewCondReg =
MRI.createGenericVirtualRegister(S32);
2293 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2295 MI.getOperand(0).setReg(NewCondReg);
2296 B.buildZExt(NewCondReg, CondReg);
2304 case AMDGPU::G_XOR: {
2308 LLT DstTy =
MRI.getType(DstReg);
2313 if (DstBank == &AMDGPU::VCCRegBank)
2317 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
2335 if (DefRegs.
empty()) {
2342 (Src0Regs.
empty() || Src0Regs.
size() == 2));
2348 if (Src0Regs.
empty())
2353 if (Src1Regs.
empty())
2360 B.buildInstr(Opc, {DefRegs[0]}, {Src0Regs[0], Src1Regs[0]});
2361 B.buildInstr(Opc, {DefRegs[1]}, {Src0Regs[1], Src1Regs[1]});
2363 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2364 MI.eraseFromParent();
2367 case AMDGPU::G_ABS: {
2373 if (SrcBank && SrcBank == &AMDGPU::VGPRRegBank) {
2375 ApplyRegBankMapping Apply(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2388 case AMDGPU::G_LSHR:
2389 case AMDGPU::G_ASHR:
2390 case AMDGPU::G_SMIN:
2391 case AMDGPU::G_SMAX:
2392 case AMDGPU::G_UMIN:
2393 case AMDGPU::G_UMAX: {
2395 LLT DstTy =
MRI.getType(DstReg);
2404 if (DstBank == &AMDGPU::VGPRRegBank)
2410 ApplyRegBankMapping ApplySALU(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
2412 if (DstTy.
isVector() && Opc == AMDGPU::G_ABS) {
2415 std::tie(WideSrcLo, WideSrcHi) =
2417 auto Lo =
B.buildInstr(AMDGPU::G_ABS, {S32}, {WideSrcLo});
2418 auto Hi =
B.buildInstr(AMDGPU::G_ABS, {S32}, {WideSrcHi});
2419 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2420 MI.eraseFromParent();
2429 std::tie(WideSrc0Lo, WideSrc0Hi)
2431 std::tie(WideSrc1Lo, WideSrc1Hi)
2433 auto Lo =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Lo, WideSrc1Lo});
2434 auto Hi =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Hi, WideSrc1Hi});
2435 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2436 MI.eraseFromParent();
2444 if (Opc == AMDGPU::G_SHL || Opc == AMDGPU::G_LSHR ||
2445 Opc == AMDGPU::G_ASHR) {
2446 B.setInsertPt(*
MBB,
MI.getIterator());
2454 case AMDGPU::G_SEXT_INREG: {
2456 if (SrcRegs.
empty())
2460 ApplyRegBankMapping O(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2467 int Amt =
MI.getOperand(2).getImm();
2473 B.buildFreeze(DstRegs[0], SrcRegs[0]);
2475 auto Freeze =
B.buildFreeze(S32, SrcRegs[0]);
2477 B.buildSExtInReg(DstRegs[0], Freeze, Amt);
2480 B.buildAShr(DstRegs[1], DstRegs[0],
B.buildConstant(S32, 31));
2484 B.buildCopy(DstRegs[0], SrcRegs[0]);
2485 B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
2489 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2490 MI.eraseFromParent();
2493 case AMDGPU::G_CTPOP:
2494 case AMDGPU::G_BITREVERSE: {
2497 if (DstBank == &AMDGPU::SGPRRegBank)
2502 LLT Ty =
MRI.getType(SrcReg);
2506 ApplyRegBankMapping ApplyVALU(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2515 case AMDGPU::G_AMDGPU_FFBH_U32:
2516 case AMDGPU::G_AMDGPU_FFBL_B32:
2517 case AMDGPU::G_CTLZ_ZERO_UNDEF:
2518 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
2521 if (DstBank == &AMDGPU::SGPRRegBank)
2526 LLT Ty =
MRI.getType(SrcReg);
2536 ApplyRegBankMapping ApplyVALU(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2538 unsigned NewOpc = Opc == AMDGPU::G_CTLZ_ZERO_UNDEF
2539 ? (
unsigned)AMDGPU::G_AMDGPU_FFBH_U32
2540 : Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2541 ? (
unsigned)AMDGPU::G_AMDGPU_FFBL_B32
2543 unsigned Idx = NewOpc == AMDGPU::G_AMDGPU_FFBH_U32;
2544 auto X =
B.buildInstr(NewOpc, {S32}, {SrcRegs[
Idx]});
2545 auto Y =
B.buildInstr(NewOpc, {S32}, {SrcRegs[
Idx ^ 1]});
2547 Opc == AMDGPU::G_CTLZ_ZERO_UNDEF || Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2549 : AMDGPU::G_UADDSAT;
2550 Y =
B.buildInstr(AddOpc, {S32}, {
Y,
B.buildConstant(S32, 32)});
2552 B.buildUMin(DstReg,
X,
Y);
2553 MI.eraseFromParent();
2556 case AMDGPU::G_SEXT:
2557 case AMDGPU::G_ZEXT:
2558 case AMDGPU::G_ANYEXT: {
2560 LLT SrcTy =
MRI.getType(SrcReg);
2561 const bool Signed = Opc == AMDGPU::G_SEXT;
2569 LLT DstTy =
MRI.getType(DstReg);
2571 SrcBank != &AMDGPU::SGPRRegBank &&
2572 SrcBank != &AMDGPU::VCCRegBank &&
2582 B.buildSExtOrTrunc(DefRegs[0], SrcReg);
2583 }
else if (Opc == AMDGPU::G_ZEXT) {
2584 B.buildZExtOrTrunc(DefRegs[0], SrcReg);
2586 B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
2590 MRI.setRegBank(DstReg, *SrcBank);
2591 MI.eraseFromParent();
2601 if (SrcBank == &AMDGPU::VCCRegBank) {
2608 const bool UseSel64 = DstSize > 32 &&
2609 SrcBank->
getID() == AMDGPU::SGPRRegBankID;
2613 auto True =
B.buildConstant(SelType,
Signed ? -1 : 1);
2614 auto False =
B.buildConstant(SelType, 0);
2616 MRI.setRegBank(True.getReg(0), *DstBank);
2617 MRI.setRegBank(False.getReg(0), *DstBank);
2618 MRI.setRegBank(DstReg, *DstBank);
2621 B.buildSelect(DefRegs[0], SrcReg, True, False);
2623 }
else if (DstSize < 32) {
2624 auto Sel =
B.buildSelect(SelType, SrcReg, True, False);
2625 MRI.setRegBank(Sel.getReg(0), *DstBank);
2626 B.buildTrunc(DstReg, Sel);
2628 B.buildSelect(DstReg, SrcReg, True, False);
2631 MI.eraseFromParent();
2637 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
2646 LLT DstTy =
MRI.getType(DstReg);
2647 LLT SrcTy =
MRI.getType(SrcReg);
2649 if (foldExtractEltToCmpSelect(
B,
MI, OpdMapper))
2661 unsigned ConstOffset;
2662 std::tie(BaseIdxReg, ConstOffset) =
2669 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2674 if (ShouldMoveIndexIntoLoop)
2675 MI.getOperand(2).setReg(BaseIdxReg);
2681 const bool NeedCopyToVGPR = DstBank == &AMDGPU::VGPRRegBank &&
2682 SrcBank == &AMDGPU::SGPRRegBank;
2683 if (DstRegs.
empty()) {
2688 if (NeedCopyToVGPR) {
2690 Register TmpReg =
MRI.createGenericVirtualRegister(DstTy);
2691 MRI.setRegBank(TmpReg, AMDGPU::SGPRRegBank);
2692 MI.getOperand(0).setReg(TmpReg);
2693 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2700 if (ShouldMoveIndexIntoLoop)
2710 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
2711 auto One =
B.buildConstant(S32, 1);
2722 auto IdxLo =
B.buildShl(S32, BaseIdxReg, One);
2723 auto IdxHi =
B.buildAdd(S32, IdxLo, One);
2725 auto Extract0 =
B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);
2726 auto Extract1 =
B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi);
2728 MRI.setRegBank(DstReg, *DstBank);
2729 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
2730 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
2731 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
2732 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
2736 MI.eraseFromParent();
2742 B.setInstr(*Span.
begin());
2743 MI.eraseFromParent();
2747 if (NeedCopyToVGPR) {
2749 Register TmpReg0 =
MRI.createGenericVirtualRegister(S32);
2750 Register TmpReg1 =
MRI.createGenericVirtualRegister(S32);
2751 MRI.setRegBank(TmpReg0, AMDGPU::SGPRRegBank);
2752 MRI.setRegBank(TmpReg1, AMDGPU::SGPRRegBank);
2754 Extract0->getOperand(0).setReg(TmpReg0);
2755 Extract1->getOperand(0).setReg(TmpReg1);
2763 if (ShouldMoveIndexIntoLoop)
2768 case AMDGPU::G_INSERT_VECTOR_ELT: {
2772 LLT VecTy =
MRI.getType(DstReg);
2778 MRI.setType(
MI.getOperand(1).getReg(), VecTy);
2780 if (foldInsertEltToCmpSelect(
B,
MI, OpdMapper))
2788 LLT InsTy =
MRI.getType(InsReg);
2792 unsigned ConstOffset;
2793 std::tie(BaseIdxReg, ConstOffset) =
2800 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2805 if (ShouldMoveIndexIntoLoop)
2806 MI.getOperand(3).setReg(BaseIdxReg);
2809 if (InsRegs.
empty()) {
2813 if (ShouldMoveIndexIntoLoop) {
2825 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
2826 auto One =
B.buildConstant(S32, 1);
2835 auto IdxLo =
B.buildShl(S32, BaseIdxReg, One);
2836 auto IdxHi =
B.buildAdd(S32, IdxLo, One);
2838 auto InsLo =
B.buildInsertVectorElement(Vec32, CastSrc, InsRegs[0], IdxLo);
2839 auto InsHi =
B.buildInsertVectorElement(Vec32, InsLo, InsRegs[1], IdxHi);
2848 MRI.setRegBank(InsReg, *InsSrcBank);
2849 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
2850 MRI.setRegBank(InsLo.getReg(0), *DstBank);
2851 MRI.setRegBank(InsHi.getReg(0), *DstBank);
2852 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
2853 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
2854 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
2859 B.setInsertPt(
B.getMBB(),
MI);
2860 B.buildBitcast(DstReg, InsHi);
2861 MI.eraseFromParent();
2865 B.setInstr(*Span.
begin());
2866 MI.eraseFromParent();
2877 B.buildBitcast(DstReg, InsHi);
2880 if (ShouldMoveIndexIntoLoop)
2885 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
2886 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
2887 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
2888 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
2889 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
2890 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
2891 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
2892 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
2893 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
2894 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
2895 case AMDGPU::G_AMDGPU_BUFFER_STORE:
2896 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
2897 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
2898 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
2899 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16:
2900 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
2901 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16: {
2906 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
2907 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
2908 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
2909 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
2910 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
2911 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
2912 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
2913 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
2914 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
2915 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
2916 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
2917 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC: {
2922 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
2923 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
2924 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
2929 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
2934 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD: {
2938 case AMDGPU::G_INTRINSIC:
2939 case AMDGPU::G_INTRINSIC_CONVERGENT: {
2941 case Intrinsic::amdgcn_readlane: {
2952 case Intrinsic::amdgcn_writelane: {
2962 case Intrinsic::amdgcn_interp_p1:
2963 case Intrinsic::amdgcn_interp_p2:
2964 case Intrinsic::amdgcn_interp_mov:
2965 case Intrinsic::amdgcn_interp_p1_f16:
2966 case Intrinsic::amdgcn_interp_p2_f16:
2967 case Intrinsic::amdgcn_lds_param_load: {
2975 case Intrinsic::amdgcn_interp_inreg_p10:
2976 case Intrinsic::amdgcn_interp_inreg_p2:
2977 case Intrinsic::amdgcn_interp_inreg_p10_f16:
2978 case Intrinsic::amdgcn_interp_inreg_p2_f16:
2981 case Intrinsic::amdgcn_permlane16:
2982 case Intrinsic::amdgcn_permlanex16: {
2990 case Intrinsic::amdgcn_sbfe:
2993 case Intrinsic::amdgcn_ubfe:
2996 case Intrinsic::amdgcn_inverse_ballot:
3000 case Intrinsic::amdgcn_ballot:
3006 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3007 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3008 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3009 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3019 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
3020 unsigned N =
MI.getNumExplicitOperands() - 2;
3025 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
3026 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
3027 auto IntrID = cast<GIntrinsic>(
MI).getIntrinsicID();
3029 case Intrinsic::amdgcn_ds_ordered_add:
3030 case Intrinsic::amdgcn_ds_ordered_swap: {
3037 case Intrinsic::amdgcn_ds_gws_init:
3038 case Intrinsic::amdgcn_ds_gws_barrier:
3039 case Intrinsic::amdgcn_ds_gws_sema_br: {
3045 case Intrinsic::amdgcn_ds_gws_sema_v:
3046 case Intrinsic::amdgcn_ds_gws_sema_p:
3047 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
3052 case Intrinsic::amdgcn_ds_append:
3053 case Intrinsic::amdgcn_ds_consume: {
3057 case Intrinsic::amdgcn_s_sendmsg:
3058 case Intrinsic::amdgcn_s_sendmsghalt: {
3063 case Intrinsic::amdgcn_s_setreg: {
3067 case Intrinsic::amdgcn_raw_buffer_load_lds:
3068 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
3075 case Intrinsic::amdgcn_struct_buffer_load_lds:
3076 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
3083 case Intrinsic::amdgcn_global_load_lds: {
3088 case Intrinsic::amdgcn_lds_direct_load: {
3094 case Intrinsic::amdgcn_exp_row:
3104 if (RSrcIntrin->IsImage) {
3115 case AMDGPU::G_SI_CALL: {
3126 unsigned FrameSetupOpcode = AMDGPU::ADJCALLSTACKUP;
3127 unsigned FrameDestroyOpcode = AMDGPU::ADJCALLSTACKDOWN;
3133 unsigned NonCopyInstrsLen = 0;
3139 while (Start->getOpcode() != FrameSetupOpcode) {
3141 bool IsCopy =
false;
3142 if (Start->getOpcode() == AMDGPU::COPY) {
3143 auto &Dst = Start->getOperand(0);
3146 if (Reg.isPhysical() &&
MI.readsRegister(Reg,
TRI)) {
3151 auto &Src = Start->getOperand(1);
3154 IsCopy =
Info->getScratchRSrcReg() == Reg;
3162 NonCopyInstrsLen = NonCopyInstrs.
size();
3167 NonCopyInstrs.
resize(NonCopyInstrsLen);
3169 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3175 NonCopyInstrs.
clear();
3176 NonCopyInstrsLen = 0;
3179 while (
End->getOpcode() != FrameDestroyOpcode) {
3181 bool IsCopy =
false;
3182 if (
End->getOpcode() == AMDGPU::COPY) {
3183 auto &Src =
End->getOperand(1);
3186 IsCopy = Reg.isPhysical() &&
MI.modifiesRegister(Reg,
TRI);
3192 NonCopyInstrsLen = NonCopyInstrs.
size();
3197 NonCopyInstrs.
resize(NonCopyInstrsLen);
3201 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3206 B.setInsertPt(
B.getMBB(), Start);
3210 case AMDGPU::G_LOAD:
3211 case AMDGPU::G_ZEXTLOAD:
3212 case AMDGPU::G_SEXTLOAD: {
3217 case AMDGPU::G_DYN_STACKALLOC:
3220 case AMDGPU::G_STACKRESTORE: {
3225 case AMDGPU::G_SBFX:
3228 case AMDGPU::G_UBFX:
3231 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3232 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3247 if (RB0 == AMDGPU::InvalidRegBankID)
3249 if (RB1 == AMDGPU::InvalidRegBankID)
3252 if (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID)
3253 return AMDGPU::SGPRRegBankID;
3255 if (RB0 == AMDGPU::AGPRRegBankID && RB1 == AMDGPU::AGPRRegBankID)
3256 return AMDGPU::AGPRRegBankID;
3258 return AMDGPU::VGPRRegBankID;
3262 if (RB0 == AMDGPU::InvalidRegBankID)
3264 if (RB1 == AMDGPU::InvalidRegBankID)
3270 if (RB0 == AMDGPU::VCCRegBankID || RB1 == AMDGPU::VCCRegBankID)
3271 return AMDGPU::VCCRegBankID;
3279 unsigned RegBank = AMDGPU::InvalidRegBankID;
3287 if (RegBank == AMDGPU::VGPRRegBankID)
3303 if (Bank->getID() != AMDGPU::SGPRRegBankID)
3316 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
3322 OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3325 MI.getNumOperands());
3338 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
3344 unsigned BankID =
Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
3345 OpdsMapping[i] = AMDGPU::getValueMapping(BankID,
Size);
3349 MI.getNumOperands());
3358 for (
unsigned I = 0,
E =
MI.getNumOperands();
I !=
E; ++
I) {
3364 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3368 MI.getNumOperands());
3374 int RsrcIdx)
const {
3377 RsrcIdx +=
MI.getNumExplicitDefs() + 1;
3379 const int NumOps =
MI.getNumOperands();
3384 for (
int I = 0;
I != NumOps; ++
I) {
3385 if (!
MI.getOperand(
I).isReg())
3399 const bool MustBeSGPR =
I == RsrcIdx ||
I == RsrcIdx + 1;
3404 OpdsMapping[
I] = AMDGPU::getValueMapping(NewBank,
Size);
3407 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3418 LLT PtrTy =
MRI.getType(PtrReg);
3422 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3427 return AMDGPU::getValueMapping(PtrBank->
getID(),
Size);
3438 LLT PtrTy =
MRI.getType(PtrReg);
3450 ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3451 PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
3453 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3458 AMDGPU::VGPRRegBankID : AMDGPU::SGPRRegBankID;
3460 PtrMapping = AMDGPU::getValueMapping(PtrBankID, PtrSize);
3463 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3464 PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
3467 OpdsMapping[0] = ValMapping;
3468 OpdsMapping[1] = PtrMapping;
3493 return AMDGPU::getValueMapping(Bank,
Size);
3501 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3509 return AMDGPU::getValueMapping(AMDGPU::AGPRRegBankID,
Size);
3526 if (
MI.isCopy() ||
MI.getOpcode() == AMDGPU::G_FREEZE) {
3533 assert(SrcBank &&
"src bank should have been assigned already");
3538 if (
MI.getOpcode() != AMDGPU::G_FREEZE &&
3543 unsigned OpdsMappingSize =
MI.isCopy() ? 1 : 2;
3545 OpdsMapping[0] = &ValMap;
3546 if (
MI.getOpcode() == AMDGPU::G_FREEZE)
3547 OpdsMapping[1] = &ValMap;
3554 if (
MI.isRegSequence()) {
3557 unsigned BankID = AMDGPU::SGPRRegBankID;
3559 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E;
I += 2) {
3563 if (OpBank != AMDGPU::SGPRRegBankID) {
3564 BankID = AMDGPU::VGPRRegBankID;
3580 if (
MI.getOpcode() == TargetOpcode::G_PHI) {
3581 unsigned ResultBank = AMDGPU::InvalidRegBankID;
3586 ResultBank = DstBank->
getID();
3588 for (
unsigned I = 1,
E =
MI.getNumOperands();
I !=
E;
I += 2) {
3593 if (!Bank || Bank->
getID() == AMDGPU::VGPRRegBankID) {
3594 ResultBank = AMDGPU::VGPRRegBankID;
3599 unsigned OpBank = Bank->
getID();
3603 assert(ResultBank != AMDGPU::InvalidRegBankID);
3605 unsigned Size =
MRI.getType(DstReg).getSizeInBits();
3620 switch (
MI.getOpcode()) {
3626 case AMDGPU::G_XOR: {
3627 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3632 unsigned TargetBankID = AMDGPU::InvalidRegBankID;
3633 unsigned BankLHS = AMDGPU::InvalidRegBankID;
3634 unsigned BankRHS = AMDGPU::InvalidRegBankID;
3636 TargetBankID = DstBank->
getID();
3637 if (DstBank == &AMDGPU::VCCRegBank) {
3638 TargetBankID = AMDGPU::VCCRegBankID;
3639 BankLHS = AMDGPU::VCCRegBankID;
3640 BankRHS = AMDGPU::VCCRegBankID;
3643 AMDGPU::SGPRRegBankID);
3645 AMDGPU::SGPRRegBankID);
3649 AMDGPU::VCCRegBankID);
3651 AMDGPU::VCCRegBankID);
3654 if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {
3655 TargetBankID = AMDGPU::VGPRRegBankID;
3656 }
else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {
3657 TargetBankID = AMDGPU::VCCRegBankID;
3658 BankLHS = AMDGPU::VCCRegBankID;
3659 BankRHS = AMDGPU::VCCRegBankID;
3660 }
else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
3661 TargetBankID = AMDGPU::SGPRRegBankID;
3665 OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID,
Size);
3666 OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS,
Size);
3667 OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS,
Size);
3674 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID,
Size);
3675 OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
3677 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size);
3679 OpdsMapping[1] = AMDGPU::getValueMapping(Bank1,
Size);
3682 OpdsMapping[2] = AMDGPU::getValueMapping(Bank2,
Size);
3690 case AMDGPU::G_PTR_ADD:
3691 case AMDGPU::G_PTRMASK:
3696 case AMDGPU::G_LSHR:
3697 case AMDGPU::G_ASHR:
3698 case AMDGPU::G_UADDO:
3699 case AMDGPU::G_USUBO:
3700 case AMDGPU::G_UADDE:
3701 case AMDGPU::G_SADDE:
3702 case AMDGPU::G_USUBE:
3703 case AMDGPU::G_SSUBE:
3704 case AMDGPU::G_SMIN:
3705 case AMDGPU::G_SMAX:
3706 case AMDGPU::G_UMIN:
3707 case AMDGPU::G_UMAX:
3709 case AMDGPU::G_SHUFFLE_VECTOR:
3710 case AMDGPU::G_SBFX:
3711 case AMDGPU::G_UBFX:
3715 case AMDGPU::G_FADD:
3716 case AMDGPU::G_FSUB:
3717 case AMDGPU::G_FMUL:
3719 case AMDGPU::G_FFLOOR:
3720 case AMDGPU::G_FCEIL:
3721 case AMDGPU::G_FRINT:
3722 case AMDGPU::G_FMINNUM:
3723 case AMDGPU::G_FMAXNUM:
3724 case AMDGPU::G_INTRINSIC_TRUNC:
3725 case AMDGPU::G_STRICT_FADD:
3726 case AMDGPU::G_STRICT_FSUB:
3727 case AMDGPU::G_STRICT_FMUL:
3728 case AMDGPU::G_STRICT_FMA: {
3729 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3735 case AMDGPU::G_FPTOSI:
3736 case AMDGPU::G_FPTOUI:
3737 case AMDGPU::G_SITOFP:
3738 case AMDGPU::G_UITOFP: {
3739 unsigned SizeDst =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3740 unsigned SizeSrc =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
3746 case AMDGPU::G_FPTRUNC:
3747 case AMDGPU::G_FPEXT: {
3748 unsigned SizeDst =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3749 unsigned SizeSrc =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
3755 case AMDGPU::G_SADDSAT:
3756 case AMDGPU::G_SSUBSAT:
3757 case AMDGPU::G_UADDSAT:
3758 case AMDGPU::G_USUBSAT:
3759 case AMDGPU::G_FMAD:
3760 case AMDGPU::G_FSQRT:
3761 case AMDGPU::G_FEXP2:
3762 case AMDGPU::G_FLOG2:
3763 case AMDGPU::G_FLDEXP:
3764 case AMDGPU::G_FMINNUM_IEEE:
3765 case AMDGPU::G_FMAXNUM_IEEE:
3766 case AMDGPU::G_FCANONICALIZE:
3767 case AMDGPU::G_STRICT_FLDEXP:
3768 case AMDGPU::G_BSWAP:
3769 case AMDGPU::G_FSHR:
3770 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
3771 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
3772 case AMDGPU::G_AMDGPU_RCP_IFLAG:
3773 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
3774 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
3775 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
3776 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
3777 case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
3778 case AMDGPU::G_AMDGPU_SMED3:
3779 case AMDGPU::G_AMDGPU_FMED3:
3781 case AMDGPU::G_UMULH:
3782 case AMDGPU::G_SMULH: {
3787 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3788 case AMDGPU::G_AMDGPU_MAD_I64_I32: {
3797 bool AllSalu =
true;
3798 bool MulSalu =
true;
3799 for (
unsigned i = 0; i < 5; ++i) {
3802 if (Bank->getID() != AMDGPU::SGPRRegBankID) {
3804 if (i == 2 || i == 3) {
3822 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
3823 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
3824 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
3825 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
3826 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
3829 case AMDGPU::G_IMPLICIT_DEF: {
3830 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3831 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3834 case AMDGPU::G_FCONSTANT:
3835 case AMDGPU::G_CONSTANT:
3836 case AMDGPU::G_GLOBAL_VALUE:
3837 case AMDGPU::G_BLOCK_ADDR:
3838 case AMDGPU::G_READCYCLECOUNTER: {
3839 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3840 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3843 case AMDGPU::G_FRAME_INDEX: {
3846 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3847 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3850 case AMDGPU::G_DYN_STACKALLOC: {
3852 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
3854 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, 32);
3857 case AMDGPU::G_AMDGPU_WAVE_ADDRESS: {
3862 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
3863 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
3866 case AMDGPU::G_INSERT: {
3871 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
3872 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
3873 OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
3874 OpdsMapping[3] =
nullptr;
3877 case AMDGPU::G_EXTRACT: {
3881 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
3882 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
3883 OpdsMapping[2] =
nullptr;
3886 case AMDGPU::G_BUILD_VECTOR:
3887 case AMDGPU::G_BUILD_VECTOR_TRUNC: {
3888 LLT DstTy =
MRI.getType(
MI.getOperand(0).getReg());
3891 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
3894 unsigned DstBankID =
regBankUnion(Src0BankID, Src1BankID);
3896 OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
3897 OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
3898 OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
3904 case AMDGPU::G_MERGE_VALUES:
3905 case AMDGPU::G_CONCAT_VECTORS: {
3907 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3908 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
3910 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
3912 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; ++i)
3913 OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
3916 case AMDGPU::G_BITREVERSE:
3917 case AMDGPU::G_BITCAST:
3918 case AMDGPU::G_INTTOPTR:
3919 case AMDGPU::G_PTRTOINT:
3920 case AMDGPU::G_FABS:
3921 case AMDGPU::G_FNEG: {
3922 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3924 OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
3927 case AMDGPU::G_AMDGPU_FFBH_U32:
3928 case AMDGPU::G_AMDGPU_FFBL_B32:
3929 case AMDGPU::G_CTLZ_ZERO_UNDEF:
3930 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
3931 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
3933 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
3934 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(BankID,
Size);
3937 case AMDGPU::G_CTPOP: {
3938 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
3940 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
3945 OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
3948 case AMDGPU::G_TRUNC: {
3954 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
3955 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
3958 case AMDGPU::G_ZEXT:
3959 case AMDGPU::G_SEXT:
3960 case AMDGPU::G_ANYEXT:
3961 case AMDGPU::G_SEXT_INREG: {
3970 switch (SrcBank->
getID()) {
3971 case AMDGPU::SGPRRegBankID:
3972 DstBank = AMDGPU::SGPRRegBankID;
3975 DstBank = AMDGPU::VGPRRegBankID;
3981 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
3982 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->
getID(),
3986 case AMDGPU::G_IS_FPCLASS: {
3988 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
3989 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3990 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
3991 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
3994 case AMDGPU::G_STORE: {
3996 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4001 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4002 OpdsMapping[0] = ValMapping;
4006 case AMDGPU::G_ICMP:
4007 case AMDGPU::G_FCMP: {
4008 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4013 AMDGPU::SGPRRegBankID);
4017 auto canUseSCCICMP = [&]() {
4020 return Size == 32 ||
4025 auto canUseSCCFCMP = [&]() {
4029 bool isICMP =
MI.getOpcode() == AMDGPU::G_ICMP;
4030 bool CanUseSCC = DstBank == AMDGPU::SGPRRegBankID &&
4031 Op2Bank == AMDGPU::SGPRRegBankID &&
4032 Op3Bank == AMDGPU::SGPRRegBankID &&
4033 (isICMP ? canUseSCCICMP() : canUseSCCFCMP());
4035 DstBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
4036 unsigned SrcBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4040 const unsigned ResultSize = 1;
4042 OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, ResultSize);
4043 OpdsMapping[1] =
nullptr;
4044 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank,
Size);
4045 OpdsMapping[3] = AMDGPU::getValueMapping(SrcBank,
Size);
4048 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
4051 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4052 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4053 unsigned IdxSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4055 unsigned OutputBankID =
regBankUnion(SrcBankID, IdxBank);
4057 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(OutputBankID, DstSize);
4058 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, SrcSize);
4061 OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4064 case AMDGPU::G_INSERT_VECTOR_ELT: {
4066 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4068 unsigned VecSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4069 unsigned InsertSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4070 unsigned IdxSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4074 OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4075 OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4079 if (InsertSize == 64 && OutputBankID == AMDGPU::VGPRRegBankID) {
4080 OpdsMapping[2] = AMDGPU::getValueMappingSplit64(InsertEltBankID,
4083 assert(InsertSize == 32 || InsertSize == 64);
4084 OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBankID, InsertSize);
4088 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBankID, IdxSize);
4091 case AMDGPU::G_UNMERGE_VALUES: {
4096 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
4098 OpdsMapping[i] = AMDGPU::getValueMapping(Bank,
Size);
4102 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
4103 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
4104 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
4105 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
4106 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
4107 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
4108 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
4109 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
4110 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
4111 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
4112 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
4113 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16:
4114 case AMDGPU::G_AMDGPU_BUFFER_STORE:
4115 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
4116 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
4117 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
4118 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16: {
4137 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
4138 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
4139 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
4140 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
4141 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
4142 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
4143 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
4144 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
4145 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
4146 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
4147 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
4148 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
4149 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
4150 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
4151 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
4174 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
4200 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD: {
4208 unsigned RSrcBank = OpdsMapping[1]->BreakDown[0].RegBank->getID();
4209 unsigned OffsetBank = OpdsMapping[2]->BreakDown[0].RegBank->getID();
4210 unsigned ResultBank =
regBankUnion(RSrcBank, OffsetBank);
4212 unsigned Size0 =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4213 OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
4216 case AMDGPU::G_INTRINSIC:
4217 case AMDGPU::G_INTRINSIC_CONVERGENT: {
4221 case Intrinsic::amdgcn_div_fmas:
4222 case Intrinsic::amdgcn_div_fixup:
4223 case Intrinsic::amdgcn_trig_preop:
4224 case Intrinsic::amdgcn_sin:
4225 case Intrinsic::amdgcn_cos:
4226 case Intrinsic::amdgcn_log_clamp:
4227 case Intrinsic::amdgcn_log:
4228 case Intrinsic::amdgcn_exp2:
4229 case Intrinsic::amdgcn_rcp:
4230 case Intrinsic::amdgcn_rcp_legacy:
4231 case Intrinsic::amdgcn_sqrt:
4232 case Intrinsic::amdgcn_rsq:
4233 case Intrinsic::amdgcn_rsq_legacy:
4234 case Intrinsic::amdgcn_rsq_clamp:
4235 case Intrinsic::amdgcn_fmul_legacy:
4236 case Intrinsic::amdgcn_fma_legacy:
4237 case Intrinsic::amdgcn_frexp_mant:
4238 case Intrinsic::amdgcn_frexp_exp:
4239 case Intrinsic::amdgcn_fract:
4240 case Intrinsic::amdgcn_cvt_pknorm_i16:
4241 case Intrinsic::amdgcn_cvt_pknorm_u16:
4242 case Intrinsic::amdgcn_cvt_pk_i16:
4243 case Intrinsic::amdgcn_cvt_pk_u16:
4244 case Intrinsic::amdgcn_fmed3:
4245 case Intrinsic::amdgcn_cubeid:
4246 case Intrinsic::amdgcn_cubema:
4247 case Intrinsic::amdgcn_cubesc:
4248 case Intrinsic::amdgcn_cubetc:
4249 case Intrinsic::amdgcn_sffbh:
4250 case Intrinsic::amdgcn_fmad_ftz:
4251 case Intrinsic::amdgcn_mbcnt_lo:
4252 case Intrinsic::amdgcn_mbcnt_hi:
4253 case Intrinsic::amdgcn_mul_u24:
4254 case Intrinsic::amdgcn_mul_i24:
4255 case Intrinsic::amdgcn_mulhi_u24:
4256 case Intrinsic::amdgcn_mulhi_i24:
4257 case Intrinsic::amdgcn_lerp:
4258 case Intrinsic::amdgcn_sad_u8:
4259 case Intrinsic::amdgcn_msad_u8:
4260 case Intrinsic::amdgcn_sad_hi_u8:
4261 case Intrinsic::amdgcn_sad_u16:
4262 case Intrinsic::amdgcn_qsad_pk_u16_u8:
4263 case Intrinsic::amdgcn_mqsad_pk_u16_u8:
4264 case Intrinsic::amdgcn_mqsad_u32_u8:
4265 case Intrinsic::amdgcn_cvt_pk_u8_f32:
4266 case Intrinsic::amdgcn_alignbyte:
4267 case Intrinsic::amdgcn_perm:
4268 case Intrinsic::amdgcn_fdot2:
4269 case Intrinsic::amdgcn_sdot2:
4270 case Intrinsic::amdgcn_udot2:
4271 case Intrinsic::amdgcn_sdot4:
4272 case Intrinsic::amdgcn_udot4:
4273 case Intrinsic::amdgcn_sdot8:
4274 case Intrinsic::amdgcn_udot8:
4275 case Intrinsic::amdgcn_fdot2_bf16_bf16:
4276 case Intrinsic::amdgcn_fdot2_f16_f16:
4277 case Intrinsic::amdgcn_fdot2_f32_bf16:
4278 case Intrinsic::amdgcn_sudot4:
4279 case Intrinsic::amdgcn_sudot8:
4280 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16:
4281 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16:
4282 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16:
4283 case Intrinsic::amdgcn_wmma_f32_16x16x16_f16:
4284 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4:
4285 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8:
4287 case Intrinsic::amdgcn_sbfe:
4288 case Intrinsic::amdgcn_ubfe:
4292 case Intrinsic::amdgcn_ds_swizzle:
4293 case Intrinsic::amdgcn_ds_permute:
4294 case Intrinsic::amdgcn_ds_bpermute:
4295 case Intrinsic::amdgcn_update_dpp:
4296 case Intrinsic::amdgcn_mov_dpp8:
4297 case Intrinsic::amdgcn_mov_dpp:
4298 case Intrinsic::amdgcn_strict_wwm:
4299 case Intrinsic::amdgcn_wwm:
4300 case Intrinsic::amdgcn_strict_wqm:
4301 case Intrinsic::amdgcn_wqm:
4302 case Intrinsic::amdgcn_softwqm:
4303 case Intrinsic::amdgcn_set_inactive:
4304 case Intrinsic::amdgcn_permlane64:
4306 case Intrinsic::amdgcn_cvt_pkrtz:
4310 case Intrinsic::amdgcn_kernarg_segment_ptr:
4311 case Intrinsic::amdgcn_s_getpc:
4312 case Intrinsic::amdgcn_groupstaticsize:
4313 case Intrinsic::amdgcn_reloc_constant:
4314 case Intrinsic::returnaddress: {
4315 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4316 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4319 case Intrinsic::amdgcn_wqm_vote: {
4320 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4321 OpdsMapping[0] = OpdsMapping[2]
4322 = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size);
4325 case Intrinsic::amdgcn_ps_live: {
4326 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4329 case Intrinsic::amdgcn_div_scale: {
4330 unsigned Dst0Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4331 unsigned Dst1Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4332 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);
4333 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
4335 unsigned SrcSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4336 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4337 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4340 case Intrinsic::amdgcn_class: {
4341 Register Src0Reg =
MI.getOperand(2).getReg();
4342 Register Src1Reg =
MI.getOperand(3).getReg();
4343 unsigned Src0Size =
MRI.getType(Src0Reg).getSizeInBits();
4344 unsigned Src1Size =
MRI.getType(Src1Reg).getSizeInBits();
4345 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4346 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4347 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src0Size);
4348 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src1Size);
4351 case Intrinsic::amdgcn_icmp:
4352 case Intrinsic::amdgcn_fcmp: {
4353 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4355 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4356 unsigned OpSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4357 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4358 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4361 case Intrinsic::amdgcn_readlane: {
4364 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4366 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4369 case Intrinsic::amdgcn_readfirstlane: {
4370 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4371 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4372 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4373 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4376 case Intrinsic::amdgcn_writelane: {
4377 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4379 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
4382 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4384 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4388 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
4389 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4390 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4393 case Intrinsic::amdgcn_if_break: {
4395 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4396 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4397 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4400 case Intrinsic::amdgcn_permlane16:
4401 case Intrinsic::amdgcn_permlanex16: {
4403 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4404 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4405 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4410 case Intrinsic::amdgcn_mfma_f32_4x4x1f32:
4411 case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
4412 case Intrinsic::amdgcn_mfma_i32_4x4x4i8:
4413 case Intrinsic::amdgcn_mfma_f32_4x4x2bf16:
4414 case Intrinsic::amdgcn_mfma_f32_16x16x1f32:
4415 case Intrinsic::amdgcn_mfma_f32_16x16x4f32:
4416 case Intrinsic::amdgcn_mfma_f32_16x16x4f16:
4417 case Intrinsic::amdgcn_mfma_f32_16x16x16f16:
4418 case Intrinsic::amdgcn_mfma_i32_16x16x4i8:
4419 case Intrinsic::amdgcn_mfma_i32_16x16x16i8:
4420 case Intrinsic::amdgcn_mfma_f32_16x16x2bf16:
4421 case Intrinsic::amdgcn_mfma_f32_16x16x8bf16:
4422 case Intrinsic::amdgcn_mfma_f32_32x32x1f32:
4423 case Intrinsic::amdgcn_mfma_f32_32x32x2f32:
4424 case Intrinsic::amdgcn_mfma_f32_32x32x4f16:
4425 case Intrinsic::amdgcn_mfma_f32_32x32x8f16:
4426 case Intrinsic::amdgcn_mfma_i32_32x32x4i8:
4427 case Intrinsic::amdgcn_mfma_i32_32x32x8i8:
4428 case Intrinsic::amdgcn_mfma_f32_32x32x2bf16:
4429 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16:
4430 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16_1k:
4431 case Intrinsic::amdgcn_mfma_f32_16x16x4bf16_1k:
4432 case Intrinsic::amdgcn_mfma_f32_4x4x4bf16_1k:
4433 case Intrinsic::amdgcn_mfma_f32_32x32x8bf16_1k:
4434 case Intrinsic::amdgcn_mfma_f32_16x16x16bf16_1k:
4435 case Intrinsic::amdgcn_mfma_f64_16x16x4f64:
4436 case Intrinsic::amdgcn_mfma_f64_4x4x4f64:
4437 case Intrinsic::amdgcn_mfma_i32_16x16x32_i8:
4438 case Intrinsic::amdgcn_mfma_i32_32x32x16_i8:
4439 case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32:
4440 case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32:
4441 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_bf8:
4442 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_fp8:
4443 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_bf8:
4444 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_fp8:
4445 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_bf8:
4446 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_fp8:
4447 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_bf8:
4448 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_fp8: {
4457 Info->mayNeedAGPRs()
4463 Info->mayNeedAGPRs()
4468 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
4469 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
4470 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
4471 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
4472 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
4473 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
4474 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
4475 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
4476 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
4477 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
4478 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
4479 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
4480 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
4481 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8: {
4490 case Intrinsic::amdgcn_interp_p1:
4491 case Intrinsic::amdgcn_interp_p2:
4492 case Intrinsic::amdgcn_interp_mov:
4493 case Intrinsic::amdgcn_interp_p1_f16:
4494 case Intrinsic::amdgcn_interp_p2_f16:
4495 case Intrinsic::amdgcn_lds_param_load: {
4496 const int M0Idx =
MI.getNumOperands() - 1;
4497 Register M0Reg =
MI.getOperand(M0Idx).getReg();
4499 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4501 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4502 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
4503 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4507 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
4510 case Intrinsic::amdgcn_interp_inreg_p10:
4511 case Intrinsic::amdgcn_interp_inreg_p2:
4512 case Intrinsic::amdgcn_interp_inreg_p10_f16:
4513 case Intrinsic::amdgcn_interp_inreg_p2_f16: {
4514 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4515 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4516 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4517 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4518 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4521 case Intrinsic::amdgcn_ballot: {
4522 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4523 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4524 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4525 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, SrcSize);
4528 case Intrinsic::amdgcn_inverse_ballot: {
4530 Register MaskReg =
MI.getOperand(2).getReg();
4531 unsigned MaskSize =
MRI.getType(MaskReg).getSizeInBits();
4532 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
4533 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4534 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
4537 case Intrinsic::amdgcn_wave_reduce_umin:
4538 case Intrinsic::amdgcn_wave_reduce_umax: {
4539 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4540 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4541 unsigned OpSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4544 OpdsMapping[2] = AMDGPU::getValueMapping(regBankID, OpSize);
4550 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4551 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4552 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4553 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4556 assert(RSrcIntrin &&
"missing RsrcIntrinsic for image intrinsic");
4563 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
4564 unsigned N =
MI.getNumExplicitOperands() - 2;
4565 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 128);
4569 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4572 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4575 for (
unsigned I = 2;
I <
N; ++
I) {
4576 unsigned Size =
MRI.getType(
MI.getOperand(
I).getReg()).getSizeInBits();
4577 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4582 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
4583 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
4584 auto IntrID = cast<GIntrinsic>(
MI).getIntrinsicID();
4586 case Intrinsic::amdgcn_s_getreg:
4587 case Intrinsic::amdgcn_s_memtime:
4588 case Intrinsic::amdgcn_s_memrealtime:
4589 case Intrinsic::amdgcn_s_get_waveid_in_workgroup:
4590 case Intrinsic::amdgcn_s_sendmsg_rtn: {
4591 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4592 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4595 case Intrinsic::amdgcn_global_atomic_fadd:
4596 case Intrinsic::amdgcn_global_atomic_csub:
4597 case Intrinsic::amdgcn_global_atomic_fmin:
4598 case Intrinsic::amdgcn_global_atomic_fmax:
4599 case Intrinsic::amdgcn_flat_atomic_fadd:
4600 case Intrinsic::amdgcn_flat_atomic_fmin:
4601 case Intrinsic::amdgcn_flat_atomic_fmax:
4602 case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
4603 case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16:
4605 case Intrinsic::amdgcn_ds_ordered_add:
4606 case Intrinsic::amdgcn_ds_ordered_swap:
4607 case Intrinsic::amdgcn_ds_fadd_v2bf16: {
4608 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4609 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4611 AMDGPU::SGPRRegBankID);
4612 OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
4613 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4616 case Intrinsic::amdgcn_ds_append:
4617 case Intrinsic::amdgcn_ds_consume: {
4618 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4619 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4623 case Intrinsic::amdgcn_exp_compr:
4624 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4625 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4627 case Intrinsic::amdgcn_exp:
4629 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4630 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4631 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4632 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4634 case Intrinsic::amdgcn_exp_row:
4635 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4636 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4637 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4638 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4641 case Intrinsic::amdgcn_s_sendmsg:
4642 case Intrinsic::amdgcn_s_sendmsghalt: {
4645 AMDGPU::SGPRRegBankID);
4646 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
4649 case Intrinsic::amdgcn_s_setreg: {
4652 AMDGPU::SGPRRegBankID);
4653 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
4656 case Intrinsic::amdgcn_end_cf: {
4658 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4661 case Intrinsic::amdgcn_else: {
4663 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4664 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
4665 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
4668 case Intrinsic::amdgcn_live_mask: {
4669 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4672 case Intrinsic::amdgcn_wqm_demote:
4673 case Intrinsic::amdgcn_kill: {
4674 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4677 case Intrinsic::amdgcn_raw_buffer_load:
4678 case Intrinsic::amdgcn_raw_ptr_buffer_load:
4679 case Intrinsic::amdgcn_raw_tbuffer_load:
4680 case Intrinsic::amdgcn_raw_ptr_tbuffer_load: {
4689 case Intrinsic::amdgcn_raw_buffer_load_lds:
4690 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
4697 case Intrinsic::amdgcn_raw_buffer_store:
4698 case Intrinsic::amdgcn_raw_ptr_buffer_store:
4699 case Intrinsic::amdgcn_raw_buffer_store_format:
4700 case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
4701 case Intrinsic::amdgcn_raw_tbuffer_store:
4702 case Intrinsic::amdgcn_raw_ptr_tbuffer_store: {
4709 case Intrinsic::amdgcn_struct_buffer_load:
4710 case Intrinsic::amdgcn_struct_ptr_buffer_load:
4711 case Intrinsic::amdgcn_struct_tbuffer_load:
4712 case Intrinsic::amdgcn_struct_ptr_tbuffer_load: {
4720 case Intrinsic::amdgcn_struct_buffer_load_lds:
4721 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
4729 case Intrinsic::amdgcn_struct_buffer_store:
4730 case Intrinsic::amdgcn_struct_ptr_buffer_store:
4731 case Intrinsic::amdgcn_struct_tbuffer_store:
4732 case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
4740 case Intrinsic::amdgcn_init_exec_from_input: {
4742 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4745 case Intrinsic::amdgcn_ds_gws_init:
4746 case Intrinsic::amdgcn_ds_gws_barrier:
4747 case Intrinsic::amdgcn_ds_gws_sema_br: {
4748 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4752 AMDGPU::SGPRRegBankID);
4753 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
4756 case Intrinsic::amdgcn_ds_gws_sema_v:
4757 case Intrinsic::amdgcn_ds_gws_sema_p:
4758 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
4761 AMDGPU::SGPRRegBankID);
4762 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
4765 case Intrinsic::amdgcn_global_load_lds: {
4770 case Intrinsic::amdgcn_lds_direct_load: {
4771 const int M0Idx =
MI.getNumOperands() - 1;
4772 Register M0Reg =
MI.getOperand(M0Idx).getReg();
4774 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4776 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);