84#include "llvm/IR/IntrinsicsAMDGPU.h"
86#define GET_TARGET_REGBANK_IMPL
87#include "AMDGPUGenRegisterBank.inc"
90#include "AMDGPUGenRegisterBankInfo.def"
93using namespace MIPatternMatch;
109 :
B(B), RBI(RBI_),
MRI(MRI_), NewBank(RB) {
110 assert(!B.isObservingChanges());
111 B.setChangeObserver(*
this);
114 ~ApplyRegBankMapping()
override {
118 B.stopObservingChanges();
123 const unsigned Opc =
MI.getOpcode();
124 if (Opc == AMDGPU::G_ANYEXT || Opc == AMDGPU::G_ZEXT ||
125 Opc == AMDGPU::G_SEXT) {
132 if (SrcBank == &AMDGPU::VCCRegBank) {
136 assert(NewBank == &AMDGPU::VGPRRegBank);
140 B.setInsertPt(*
MI.getParent(),
MI);
142 auto True = B.buildConstant(
S32, Opc == AMDGPU::G_SEXT ? -1 : 1);
143 auto False = B.buildConstant(
S32, 0);
144 B.buildSelect(DstReg, SrcReg, True, False);
145 MRI.setRegBank(True.getReg(0), *NewBank);
146 MRI.setRegBank(False.getReg(0), *NewBank);
147 MI.eraseFromParent();
150 assert(!
MRI.getRegClassOrRegBank(DstReg));
151 MRI.setRegBank(DstReg, *NewBank);
156 if (Opc == AMDGPU::G_TRUNC) {
159 assert(DstBank != &AMDGPU::VCCRegBank);
169 if (Reg.isPhysical() ||
MRI.getRegClassOrRegBank(Reg))
174 assert(NewBank == &AMDGPU::VGPRRegBank &&
175 "s1 operands should only be used for vector bools");
176 assert((
MI.getOpcode() != AMDGPU::G_TRUNC &&
177 MI.getOpcode() != AMDGPU::G_ANYEXT) &&
178 "not expecting legalization artifacts here");
179 RB = &AMDGPU::VCCRegBank;
182 MRI.setRegBank(Reg, *RB);
205 : Subtarget(ST),
TRI(Subtarget.getRegisterInfo()),
206 TII(Subtarget.getInstrInfo()) {
211 static auto InitializeRegisterBankOnce = [
this]() {
213 &
getRegBank(AMDGPU::VGPRRegBankID) == &AMDGPU::VGPRRegBank &&
214 &
getRegBank(AMDGPU::AGPRRegBankID) == &AMDGPU::AGPRRegBank);
218 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
222 unsigned BankID = Bank.
getID();
223 return BankID == AMDGPU::VGPRRegBankID || BankID == AMDGPU::AGPRRegBankID;
227 return RB != &AMDGPU::SGPRRegBank;
234 if (Dst.getID() == AMDGPU::SGPRRegBankID &&
236 return std::numeric_limits<unsigned>::max();
247 (Dst.getID() == AMDGPU::SGPRRegBankID) &&
249 Src.getID() == AMDGPU::SGPRRegBankID ||
250 Src.getID() == AMDGPU::VCCRegBankID))
251 return std::numeric_limits<unsigned>::max();
254 if (Dst.getID() == AMDGPU::AGPRRegBankID &&
255 Src.getID() == AMDGPU::AGPRRegBankID)
289 if (&RC == &AMDGPU::SReg_1RegClass)
290 return AMDGPU::VCCRegBank;
299 return AMDGPU::SGPRRegBank;
301 return Ty ==
LLT::scalar(1) ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
304 return TRI->
isAGPRClass(&RC) ? AMDGPU::AGPRRegBank : AMDGPU::VGPRRegBank;
307template <
unsigned NumOps>
311 const std::array<unsigned, NumOps> RegSrcOpIdx,
318 unsigned Sizes[NumOps];
319 for (
unsigned I = 0;
I < NumOps; ++
I) {
320 Register Reg =
MI.getOperand(RegSrcOpIdx[
I]).getReg();
324 for (
unsigned I = 0, E =
MI.getNumExplicitDefs();
I != E; ++
I) {
326 Operands[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
330 unsigned MappingID = 2;
331 for (
const auto &Entry : Table) {
332 for (
unsigned I = 0;
I < NumOps; ++
I) {
333 int OpIdx = RegSrcOpIdx[
I];
334 Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[
I],
Sizes[
I]);
349 case Intrinsic::amdgcn_readlane: {
352 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
355 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
358 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
359 return addMappingFromTable<3>(
MI,
MRI, RegSrcOpIdx, Table);
361 case Intrinsic::amdgcn_writelane: {
364 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
367 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
370 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
373 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
377 const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };
378 return addMappingFromTable<4>(
MI,
MRI, RegSrcOpIdx, Table);
390 case Intrinsic::amdgcn_s_buffer_load: {
393 { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
396 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },
399 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
402 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }
406 const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
407 return addMappingFromTable<2>(
MI,
MRI, RegSrcOpIdx, Table);
409 case Intrinsic::amdgcn_ds_ordered_add:
410 case Intrinsic::amdgcn_ds_ordered_swap: {
414 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
417 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
420 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
421 return addMappingFromTable<3>(
MI,
MRI, RegSrcOpIdx, Table);
423 case Intrinsic::amdgcn_s_sendmsg:
424 case Intrinsic::amdgcn_s_sendmsghalt: {
428 { { AMDGPU::SGPRRegBankID }, 1 },
431 { { AMDGPU::VGPRRegBankID }, 3 }
434 const std::array<unsigned, 1> RegSrcOpIdx = { { 2 } };
435 return addMappingFromTable<1>(
MI,
MRI, RegSrcOpIdx, Table);
445 if (!
MI.hasOneMemOperand())
478 switch (
MI.getOpcode()) {
479 case TargetOpcode::G_CONSTANT:
480 case TargetOpcode::G_IMPLICIT_DEF: {
484 { { AMDGPU::VGPRRegBankID }, 1 },
485 { { AMDGPU::SGPRRegBankID }, 1 },
486 { { AMDGPU::VCCRegBankID }, 1 }
489 return addMappingFromTable<1>(
MI,
MRI, {{ 0 }}, Table);
494 case TargetOpcode::G_FCONSTANT:
495 case TargetOpcode::G_FRAME_INDEX:
496 case TargetOpcode::G_GLOBAL_VALUE: {
498 { { AMDGPU::VGPRRegBankID }, 1 },
499 { { AMDGPU::SGPRRegBankID }, 1 }
502 return addMappingFromTable<1>(
MI,
MRI, {{ 0 }}, Table);
504 case TargetOpcode::G_AND:
505 case TargetOpcode::G_OR:
506 case TargetOpcode::G_XOR: {
513 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
514 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
515 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32)}),
521 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size),
522 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size),
523 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size)}),
534 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
535 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
536 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size)}),
542 {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
543 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
544 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size)}),
549 case TargetOpcode::G_LOAD:
550 case TargetOpcode::G_ZEXTLOAD:
551 case TargetOpcode::G_SEXTLOAD: {
553 LLT PtrTy =
MRI.getType(
MI.getOperand(1).getReg());
562 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
563 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize)}),
571 {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
572 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}),
585 case TargetOpcode::G_SELECT: {
589 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
590 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
591 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size)}),
597 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
598 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
599 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size)}),
605 case TargetOpcode::G_UADDE:
606 case TargetOpcode::G_USUBE:
607 case TargetOpcode::G_SADDE:
608 case TargetOpcode::G_SSUBE: {
612 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
613 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
614 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
615 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
616 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1)}),
622 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
623 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
624 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
625 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
630 case AMDGPU::G_BRCOND: {
631 assert(
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() == 1);
636 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
nullptr}),
642 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
nullptr }),
647 case AMDGPU::G_INTRINSIC:
648 case AMDGPU::G_INTRINSIC_CONVERGENT:
650 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
651 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
666 Register LoLHS =
MRI->createGenericVirtualRegister(HalfTy);
667 Register HiLHS =
MRI->createGenericVirtualRegister(HalfTy);
669 MRI->setRegBank(LoLHS, *Bank);
670 MRI->setRegBank(HiLHS, *Bank);
675 B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
686 MRI.setType(Reg, NewTy);
706 LLT Ty =
MRI.getType(Src);
709 if (Bank == &AMDGPU::SGPRRegBank)
715 if (Bank != &AMDGPU::VGPRRegBank) {
717 Src =
B.buildCopy(Ty, Src).getReg(0);
718 MRI.setRegBank(Src, AMDGPU::VGPRRegBank);
722 unsigned NumParts = Bits / 32;
729 auto Unmerge =
B.buildUnmerge(
S32, Src);
730 for (
unsigned i = 0; i < NumParts; ++i)
734 for (
unsigned i = 0; i < NumParts; ++i) {
736 Register DstPart =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
737 MRI.setType(DstPart, NumParts == 1 ? Ty :
S32);
742 assert(Constrained &&
"Failed to constrain readfirstlane src reg");
744 B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart});
752 Register Dst =
B.buildMergeLikeInstr(Ty, DstParts).getReg(0);
753 MRI.setRegBank(Dst, AMDGPU::SGPRRegBank);
786 const unsigned MovExecOpc =
788 const unsigned MovExecTermOpc =
792 AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
794 AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
796 AMDGPU::EXEC_LO : AMDGPU::EXEC;
799 const int OrigRangeSize = std::distance(
Range.begin(),
Range.end());
803 Register SaveExecReg =
MRI.createVirtualRegister(WaveRC);
804 Register InitSaveExecReg =
MRI.createVirtualRegister(WaveRC);
807 B.buildInstr(TargetOpcode::IMPLICIT_DEF)
808 .addDef(InitSaveExecReg);
810 Register PhiExec =
MRI.createVirtualRegister(WaveRC);
811 Register NewExec =
MRI.createVirtualRegister(WaveRC);
837 B.setInsertPt(*LoopBB, LoopBB->
end());
839 B.buildInstr(TargetOpcode::PHI)
841 .addReg(InitSaveExecReg)
856 auto NewEnd = BodyBB->
end();
863 assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);
868 if (!SGPROperandRegs.
count(OldReg))
873 auto OldVal = WaterfalledRegMap.
find(OldReg);
874 if (OldVal != WaterfalledRegMap.
end()) {
875 Op.setReg(OldVal->second);
880 LLT OpTy =
MRI.getType(OpReg);
883 if (OpBank != &AMDGPU::VGPRRegBank) {
886 OpReg =
B.buildCopy(OpTy, OpReg).getReg(0);
887 MRI.setRegBank(OpReg, AMDGPU::VGPRRegBank);
895 bool Is64 = OpSize % 64 == 0;
896 unsigned PartSize = Is64 ? 64 : 32;
898 unsigned NumParts = OpSize / PartSize;
904 CurrentLaneParts.
push_back(CurrentLaneReg);
906 auto UnmergeOp =
B.buildUnmerge(PartTy, OpReg);
907 auto UnmergeCurrentLane =
B.buildUnmerge(PartTy, CurrentLaneReg);
908 for (
unsigned i = 0; i < NumParts; ++i) {
910 CurrentLaneParts.
push_back(UnmergeCurrentLane.getReg(i));
911 MRI.setRegBank(OpParts[i], AMDGPU::VGPRRegBank);
912 MRI.setRegBank(CurrentLaneParts[i], AMDGPU::SGPRRegBank);
916 for (
unsigned i = 0; i < NumParts; ++i) {
918 OpParts[i]).getReg(0);
919 MRI.setRegBank(CmpReg, AMDGPU::VCCRegBank);
924 CondReg =
B.buildAnd(
S1, CondReg, CmpReg).getReg(0);
925 MRI.setRegBank(CondReg, AMDGPU::VCCRegBank);
929 Op.setReg(CurrentLaneReg);
932 WaterfalledRegMap.
insert(std::pair(OldReg,
Op.getReg()));
937 CondReg =
B.buildIntrinsic(Intrinsic::amdgcn_ballot,
941 MRI.setRegClass(CondReg, WaveRC);
944 B.buildInstr(AndSaveExecOpc)
948 MRI.setSimpleHint(NewExec, CondReg);
950 B.setInsertPt(*BodyBB, BodyBB->
end());
953 B.buildInstr(XorTermOpc)
962 B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
969 B.setMBB(*RestoreExecBB);
970 B.buildInstr(MovExecTermOpc)
972 .addReg(SaveExecReg);
976 B.setInsertPt(*RemainderBB, RemainderBB->
begin());
988 for (
unsigned Op : OpIndices) {
992 if (OpBank->
getID() != AMDGPU::SGPRRegBankID)
993 SGPROperandRegs.
insert(Reg);
997 return !SGPROperandRegs.
empty();
1017 Register Reg =
MI.getOperand(OpIdx).getReg();
1020 if (Bank == &AMDGPU::SGPRRegBank)
1024 MI.getOperand(OpIdx).setReg(Reg);
1036 assert(FirstSize % EltSize == 0);
1038 unsigned FirstPartNumElts = FirstSize / EltSize;
1039 unsigned RemainderElts = (TotalSize - FirstSize) / EltSize;
1060 const LLT LoadTy =
MRI.getType(DstReg);
1063 const unsigned MaxNonSmrdLoadSize = 128;
1067 if (DstBank == &AMDGPU::SGPRRegBank) {
1078 if (LoadSize == 32 &&
1082 if (LoadSize == 32 &&
1091 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
1093 if (LoadSize == 32) {
1097 if (
MI.getOpcode() == AMDGPU::G_SEXTLOAD) {
1099 auto WideLoad =
B.buildLoadFromOffset(
S32, PtrReg, *MMO, 0);
1100 B.buildSExtInReg(
MI.getOperand(0), WideLoad, MemSize);
1101 }
else if (
MI.getOpcode() == AMDGPU::G_ZEXTLOAD) {
1103 auto WideLoad =
B.buildLoadFromOffset(
S32, PtrReg, *MMO, 0);
1104 B.buildZExtInReg(
MI.getOperand(0), WideLoad, MemSize);
1107 B.buildLoadFromOffset(
MI.getOperand(0), PtrReg, *MMO, 0);
1121 auto WideLoad =
B.buildLoadFromOffset(WiderTy, PtrReg, *MMO, 0);
1123 B.buildTrunc(
MI.getOperand(0), WideLoad);
1125 B.buildDeleteTrailingVectorElements(
MI.getOperand(0).getReg(),
1130 MI.eraseFromParent();
1135 if (LoadSize <= MaxNonSmrdLoadSize)
1141 if (SrcRegs.
empty())
1147 LLT PtrTy =
MRI.getType(
MI.getOperand(1).getReg());
1148 MRI.setType(BasePtrReg, PtrTy);
1154 assert(LoadSize % MaxNonSmrdLoadSize == 0);
1155 unsigned NumSplitParts = LoadTy.
getSizeInBits() / MaxNonSmrdLoadSize;
1156 const LLT LoadSplitTy = LoadTy.
divide(NumSplitParts);
1157 ApplyRegBankMapping O(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
1169 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
1180 const auto &TFI = *ST.getFrameLowering();
1188 Register AllocSize =
MI.getOperand(1).getReg();
1194 if (SizeBank != &AMDGPU::SGPRRegBank)
1197 LLT PtrTy =
MRI.getType(Dst);
1202 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
1204 auto WaveSize =
B.buildConstant(
LLT::scalar(32), ST.getWavefrontSizeLog2());
1205 auto ScaledSize =
B.buildShl(IntPtrTy, AllocSize, WaveSize);
1207 auto SPCopy =
B.buildCopy(PtrTy, SPReg);
1208 if (Alignment > TFI.getStackAlign()) {
1209 auto PtrAdd =
B.buildPtrAdd(PtrTy, SPCopy, ScaledSize);
1210 B.buildMaskLowPtrBits(Dst, PtrAdd,
1211 Log2(Alignment) + ST.getWavefrontSizeLog2());
1213 B.buildPtrAdd(Dst, SPCopy, ScaledSize);
1216 MI.eraseFromParent();
1223 int RsrcIdx)
const {
1224 const int NumDefs =
MI.getNumExplicitDefs();
1228 RsrcIdx += NumDefs + 1;
1235 for (
int I = NumDefs, NumOps =
MI.getNumOperands();
I != NumOps; ++
I) {
1236 if (!
MI.getOperand(
I).isReg())
1240 if (
I == RsrcIdx ||
I == RsrcIdx + 1)
1252 Register &SOffsetReg, int64_t &InstOffsetVal,
Align Alignment)
const {
1256 if (std::optional<int64_t> Imm =
1260 VOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1261 SOffsetReg =
B.buildConstant(
S32, SOffset).getReg(0);
1262 InstOffsetVal = ImmOffset;
1264 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1265 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1266 return SOffset + ImmOffset;
1281 SOffsetReg =
B.buildConstant(
S32, SOffset).getReg(0);
1282 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1283 InstOffsetVal = ImmOffset;
1289 VOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1290 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1292 InstOffsetVal = ImmOffset;
1306 if (Src0Bank == &AMDGPU::VGPRRegBank && Src1Bank == &AMDGPU::SGPRRegBank) {
1312 if (Src0Bank == &AMDGPU::SGPRRegBank && Src1Bank == &AMDGPU::VGPRRegBank) {
1322 VOffsetReg = CombinedOffset;
1324 VOffsetReg =
B.buildCopy(
S32, CombinedOffset).getReg(0);
1325 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1328 SOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1329 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1340 LLT Ty =
MRI.getType(Dst);
1346 if (RSrcBank == &AMDGPU::SGPRRegBank &&
1347 OffsetBank == &AMDGPU::SGPRRegBank)
1355 if (LoadSize == 256 || LoadSize == 512) {
1356 NumLoads = LoadSize / 128;
1357 Ty = Ty.
divide(NumLoads);
1362 const Align Alignment = NumLoads > 1 ?
Align(16 * NumLoads) :
Align(1);
1368 int64_t ImmOffset = 0;
1371 SOffset, ImmOffset, Alignment);
1376 const Align MemAlign(4);
1390 B.getMRI()->setRegBank(VIndex, AMDGPU::VGPRRegBank);
1397 for (
int i = 0; i < NumLoads; ++i) {
1398 if (NumLoads == 1) {
1401 LoadParts[i] =
MRI.createGenericVirtualRegister(Ty);
1402 MRI.setRegBank(LoadParts[i], AMDGPU::VGPRRegBank);
1409 B.buildInstr(AMDGPU::G_AMDGPU_BUFFER_LOAD)
1410 .addDef(LoadParts[i])
1415 .addImm(ImmOffset + 16 * i)
1418 .addMemOperand(MMO);
1424 if (RSrcBank != &AMDGPU::SGPRRegBank) {
1427 B.setInstr(*Span.
begin());
1428 MI.eraseFromParent();
1432 OpsToWaterfall.
insert(RSrc);
1437 if (NumLoads != 1) {
1439 B.buildConcatVectors(Dst, LoadParts);
1441 B.buildMergeLikeInstr(Dst, LoadParts);
1445 if (RSrcBank == &AMDGPU::SGPRRegBank)
1446 MI.eraseFromParent();
1461 LLT Ty =
MRI.getType(DstReg);
1465 unsigned FirstOpnd = isa<GIntrinsic>(
MI) ? 2 : 1;
1466 Register SrcReg =
MI.getOperand(FirstOpnd).getReg();
1467 Register OffsetReg =
MI.getOperand(FirstOpnd + 1).getReg();
1468 Register WidthReg =
MI.getOperand(FirstOpnd + 2).getReg();
1472 if (DstBank == &AMDGPU::VGPRRegBank) {
1478 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
1482 auto ShiftOffset =
Signed ?
B.buildAShr(
S64, SrcReg, OffsetReg)
1483 :
B.buildLShr(
S64, SrcReg, OffsetReg);
1484 auto UnmergeSOffset =
B.buildUnmerge({
S32,
S32}, ShiftOffset);
1491 auto Zero =
B.buildConstant(
S32, 0);
1492 auto WidthImm = ConstWidth->Value.getZExtValue();
1493 if (WidthImm <= 32) {
1497 Signed ?
B.buildSbfx(
S32, UnmergeSOffset.getReg(0), Zero, WidthReg)
1498 :
B.buildUbfx(
S32, UnmergeSOffset.getReg(0), Zero, WidthReg);
1500 Signed ?
B.buildAShr(
S32, Extract,
B.buildConstant(
S32, 31)) : Zero;
1501 B.buildMergeLikeInstr(DstReg, {Extract, Extend});
1505 auto UpperWidth =
B.buildConstant(
S32, WidthImm - 32);
1508 ?
B.buildSbfx(
S32, UnmergeSOffset.getReg(1), Zero, UpperWidth)
1509 :
B.buildUbfx(
S32, UnmergeSOffset.getReg(1), Zero, UpperWidth);
1510 B.buildMergeLikeInstr(DstReg, {UnmergeSOffset.getReg(0), Extract});
1512 MI.eraseFromParent();
1518 auto ExtShift =
B.buildSub(
S32,
B.buildConstant(
S32, 64), WidthReg);
1519 auto SignBit =
B.buildShl(
S64, ShiftOffset, ExtShift);
1521 B.buildAShr(
S64, SignBit, ExtShift);
1523 B.buildLShr(
S64, SignBit, ExtShift);
1524 MI.eraseFromParent();
1530 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
1533 auto OffsetMask =
B.buildConstant(
S32, maskTrailingOnes<unsigned>(6));
1534 auto ClampOffset =
B.buildAnd(
S32, OffsetReg, OffsetMask);
1537 auto ShiftWidth =
B.buildShl(
S32, WidthReg,
B.buildConstant(
S32, 16));
1542 auto MergedInputs =
B.buildOr(
S32, ClampOffset, ShiftWidth);
1546 unsigned Opc = Ty ==
S32 ? (
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
1547 (
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1549 auto MIB =
B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs});
1553 MI.eraseFromParent();
1571 if (
MRI.getRegBankOrNull(Src0) == &AMDGPU::VGPRRegBank)
1574 bool IsUnsigned =
MI.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
1578 bool DstOnValu =
MRI.getRegBankOrNull(Src2) == &AMDGPU::VGPRRegBank;
1579 bool Accumulate =
true;
1588 Register DstLo =
B.buildMul(
S32, Src0, Src1).getReg(0);
1589 bool MulHiInVgpr =
false;
1591 MRI.setRegBank(DstLo, AMDGPU::SGPRRegBank);
1594 DstHi = IsUnsigned ?
B.buildUMulH(
S32, Src0, Src1).getReg(0)
1595 :
B.buildSMulH(
S32, Src0, Src1).getReg(0);
1596 MRI.setRegBank(DstHi, AMDGPU::SGPRRegBank);
1601 MRI.setRegBank(VSrc0, AMDGPU::VGPRRegBank);
1602 MRI.setRegBank(VSrc1, AMDGPU::VGPRRegBank);
1604 DstHi = IsUnsigned ?
B.buildUMulH(
S32, VSrc0, VSrc1).getReg(0)
1605 :
B.buildSMulH(
S32, VSrc0, VSrc1).getReg(0);
1606 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1622 LLT CarryType = DstOnValu ?
S1 :
S32;
1624 DstOnValu ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
1626 DstOnValu ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank;
1631 Zero =
B.buildConstant(
S32, 0).getReg(0);
1632 MRI.setRegBank(Zero,
1633 MulHiInVgpr ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank);
1637 MRI.setRegBank(Carry, MulHiInVgpr ? AMDGPU::VCCRegBank
1638 : AMDGPU::SGPRRegBank);
1640 if (DstOnValu && !MulHiInVgpr) {
1641 Carry =
B.buildTrunc(
S1, Carry).getReg(0);
1642 MRI.setRegBank(Carry, AMDGPU::VCCRegBank);
1648 DstLo =
B.buildCopy(
S32, DstLo).getReg(0);
1649 DstHi =
B.buildCopy(
S32, DstHi).getReg(0);
1650 MRI.setRegBank(DstLo, AMDGPU::VGPRRegBank);
1651 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1654 auto Unmerge =
B.buildUnmerge(
S32, Src2);
1655 Register Src2Lo = Unmerge.getReg(0);
1656 Register Src2Hi = Unmerge.getReg(1);
1657 MRI.setRegBank(Src2Lo, DstBank);
1658 MRI.setRegBank(Src2Hi, DstBank);
1662 MRI.setRegBank(Src2Sign.getReg(0), CarryBank);
1664 Carry =
B.buildXor(CarryType, Carry, Src2Sign).getReg(0);
1665 MRI.setRegBank(Carry, CarryBank);
1668 auto AddLo =
B.buildUAddo(
S32, CarryType, DstLo, Src2Lo);
1669 DstLo = AddLo.getReg(0);
1670 Register CarryLo = AddLo.getReg(1);
1671 MRI.setRegBank(DstLo, DstBank);
1672 MRI.setRegBank(CarryLo, CarryBank);
1674 auto AddHi =
B.buildUAdde(
S32, CarryType, DstHi, Src2Hi, CarryLo);
1675 DstHi = AddHi.getReg(0);
1676 MRI.setRegBank(DstHi, DstBank);
1678 Register CarryHi = AddHi.getReg(1);
1679 MRI.setRegBank(CarryHi, CarryBank);
1684 Carry =
B.buildXor(CarryType, Carry, CarryHi).getReg(0);
1685 MRI.setRegBank(Carry, CarryBank);
1689 Carry =
B.buildConstant(CarryType, 0).getReg(0);
1690 MRI.setRegBank(Carry, CarryBank);
1694 B.buildMergeLikeInstr(Dst0, {DstLo, DstHi});
1697 B.buildCopy(Dst1, Carry);
1699 B.buildTrunc(Dst1, Carry);
1702 MI.eraseFromParent();
1709 case TargetOpcode::G_ASHR:
1710 case TargetOpcode::G_SMIN:
1711 case TargetOpcode::G_SMAX:
1712 return TargetOpcode::G_SEXT;
1713 case TargetOpcode::G_LSHR:
1714 case TargetOpcode::G_UMIN:
1715 case TargetOpcode::G_UMAX:
1716 return TargetOpcode::G_ZEXT;
1718 return TargetOpcode::G_ANYEXT;
1724static std::pair<Register, Register>
1727 auto Bitcast =
B.buildBitcast(
S32, Src);
1729 if (ExtOpcode == TargetOpcode::G_SEXT) {
1730 auto ExtLo =
B.buildSExtInReg(
S32, Bitcast, 16);
1731 auto ShiftHi =
B.buildAShr(
S32, Bitcast,
B.buildConstant(
S32, 16));
1732 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1735 auto ShiftHi =
B.buildLShr(
S32, Bitcast,
B.buildConstant(
S32, 16));
1736 if (ExtOpcode == TargetOpcode::G_ZEXT) {
1737 auto ExtLo =
B.buildAnd(
S32, Bitcast,
B.buildConstant(
S32, 0xffff));
1738 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1741 assert(ExtOpcode == TargetOpcode::G_ANYEXT);
1742 return std::pair(Bitcast.getReg(0), ShiftHi.getReg(0));
1750 if (!SrcReg.
empty()) {
1767 LLT StoreVT =
MRI.getType(Reg);
1771 auto Unmerge =
B.buildUnmerge(
S16, Reg);
1775 for (
int I = 0, E = Unmerge->getNumOperands() - 1;
I != E; ++
I)
1785static std::pair<Register, unsigned>
1789 return std::pair(
Register(), Const);
1793 return std::pair(
Base, Const);
1796 return std::pair(Reg, 0);
1799std::pair<Register, unsigned>
1812 if (ImmOffset != 0) {
1821 unsigned Overflow = ImmOffset & ~MaxImm;
1822 ImmOffset -= Overflow;
1823 if ((int32_t)Overflow < 0) {
1824 Overflow += ImmOffset;
1829 if (Overflow != 0) {
1831 BaseReg =
B.buildConstant(
S32, Overflow).getReg(0);
1833 auto OverflowVal =
B.buildConstant(
S32, Overflow);
1834 BaseReg =
B.buildAdd(
S32, BaseReg, OverflowVal).getReg(0);
1840 BaseReg =
B.buildConstant(
S32, 0).getReg(0);
1842 return {BaseReg, C1};
1848 LLT SrcTy =
MRI.getType(SrcReg);
1851 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1858 Register TmpReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1859 Register TmpReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1861 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1863 .addUse(SrcReg, 0, AMDGPU::sub0);
1864 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1866 .addUse(SrcReg, 0, AMDGPU::sub1);
1867 B.buildInstr(AMDGPU::REG_SEQUENCE)
1870 .addImm(AMDGPU::sub0)
1872 .addImm(AMDGPU::sub1);
1883 unsigned ConstOffset) {
1889 auto MaterializedOffset =
B.buildConstant(
S32, ConstOffset);
1891 auto Add =
B.buildAdd(
S32, WaterfallIdx, MaterializedOffset);
1892 MRI.setRegBank(MaterializedOffset.getReg(0), AMDGPU::SGPRRegBank);
1893 MRI.setRegBank(
Add.getReg(0), AMDGPU::SGPRRegBank);
1905 bool IsBooleanSrc =
false) {
1906 if (ExtOpc == AMDGPU::G_ZEXT) {
1907 B.buildConstant(Hi32Reg, 0);
1908 }
else if (ExtOpc == AMDGPU::G_SEXT) {
1912 B.buildCopy(Hi32Reg, Lo32Reg);
1916 B.getMRI()->setRegBank(ShiftAmt.getReg(0), RegBank);
1917 B.buildAShr(Hi32Reg, Lo32Reg, ShiftAmt);
1920 assert(ExtOpc == AMDGPU::G_ANYEXT &&
"not an integer extension");
1921 B.buildUndef(Hi32Reg);
1925bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect(
1927 const OperandsMapper &OpdMapper)
const {
1934 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
1936 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
1938 LLT VecTy =
MRI.getType(VecReg);
1949 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
1951 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
1954 (DstBank == AMDGPU::SGPRRegBank &&
1955 SrcBank == AMDGPU::SGPRRegBank &&
1956 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
1957 : AMDGPU::VCCRegBank;
1960 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
1961 Idx =
B.buildCopy(
S32,
Idx)->getOperand(0).getReg();
1962 MRI.setRegBank(
Idx, AMDGPU::VGPRRegBank);
1967 unsigned NumLanes = DstRegs.size();
1971 EltTy =
MRI.getType(DstRegs[0]);
1973 auto UnmergeToEltTy =
B.buildUnmerge(EltTy, VecReg);
1975 for (
unsigned L = 0;
L < NumLanes; ++
L)
1976 Res[L] = UnmergeToEltTy.getReg(L);
1978 for (
unsigned I = 1;
I < NumElem; ++
I) {
1979 auto IC =
B.buildConstant(
S32,
I);
1980 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
1982 MRI.setRegBank(
Cmp->getOperand(0).getReg(), CCBank);
1984 for (
unsigned L = 0;
L < NumLanes; ++
L) {
1985 auto S =
B.buildSelect(EltTy, Cmp,
1986 UnmergeToEltTy.getReg(
I * NumLanes + L), Res[L]);
1988 for (
unsigned N : { 0, 2, 3 })
1989 MRI.setRegBank(S->getOperand(
N).getReg(), DstBank);
1991 Res[
L] = S->getOperand(0).getReg();
1995 for (
unsigned L = 0;
L < NumLanes; ++
L) {
1996 Register DstReg = (NumLanes == 1) ?
MI.getOperand(0).getReg() : DstRegs[
L];
1997 B.buildCopy(DstReg, Res[L]);
1998 MRI.setRegBank(DstReg, DstBank);
2001 MRI.setRegBank(
MI.getOperand(0).getReg(), DstBank);
2002 MI.eraseFromParent();
2013 if (CurrBank && *CurrBank != Bank) {
2014 Register Copy =
B.buildCopy(
MRI.getType(Reg), Reg).getReg(0);
2015 MRI.setRegBank(Copy, Bank);
2019 MRI.setRegBank(Reg, Bank);
2023bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect(
2025 const OperandsMapper &OpdMapper)
const {
2032 *OpdMapper.getInstrMapping().getOperandMapping(3).BreakDown[0].RegBank;
2034 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
2036 LLT VecTy =
MRI.getType(VecReg);
2047 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
2049 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
2051 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
2054 (DstBank == AMDGPU::SGPRRegBank &&
2055 SrcBank == AMDGPU::SGPRRegBank &&
2056 InsBank == AMDGPU::SGPRRegBank &&
2057 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
2058 : AMDGPU::VCCRegBank;
2061 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
2062 Idx =
B.buildCopy(
S32,
Idx)->getOperand(0).getReg();
2063 MRI.setRegBank(
Idx, AMDGPU::VGPRRegBank);
2068 unsigned NumLanes = InsRegs.size();
2071 InsRegs.push_back(
MI.getOperand(2).getReg());
2073 EltTy =
MRI.getType(InsRegs[0]);
2076 auto UnmergeToEltTy =
B.buildUnmerge(EltTy, VecReg);
2079 for (
unsigned I = 0;
I < NumElem; ++
I) {
2080 auto IC =
B.buildConstant(
S32,
I);
2081 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
2083 MRI.setRegBank(
Cmp->getOperand(0).getReg(), CCBank);
2085 for (
unsigned L = 0;
L < NumLanes; ++
L) {
2087 Register Op1 = UnmergeToEltTy.getReg(
I * NumLanes + L);
2098 if (MergeTy ==
MRI.getType(
MI.getOperand(0).getReg())) {
2099 B.buildBuildVector(
MI.getOperand(0), Ops);
2101 auto Vec =
B.buildBuildVector(MergeTy, Ops);
2102 MRI.setRegBank(Vec->getOperand(0).getReg(), DstBank);
2103 B.buildBitcast(
MI.getOperand(0).getReg(), Vec);
2106 MRI.setRegBank(
MI.getOperand(0).getReg(), DstBank);
2107 MI.eraseFromParent();
2120 if (DefRegs.
empty()) {
2128 (Src0Regs.
empty() || Src0Regs.
size() == 2));
2139 if (Src0Regs.
empty())
2144 if (Src1Regs.
empty())
2167 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2169 Register Hi =
B.buildUMulH(HalfTy, Src0Regs[0], Src1Regs[0]).getReg(0);
2170 Register MulLoHi =
B.buildMul(HalfTy, Src0Regs[0], Src1Regs[1]).getReg(0);
2172 Register MulHiLo =
B.buildMul(HalfTy, Src0Regs[1], Src1Regs[0]).getReg(0);
2173 B.buildAdd(DefRegs[1],
Add, MulHiLo);
2174 B.buildMul(DefRegs[0], Src0Regs[0], Src1Regs[0]);
2176 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2177 MI.eraseFromParent();
2183 B.setInstrAndDebugLoc(
MI);
2184 unsigned Opc =
MI.getOpcode();
2187 case AMDGPU::G_CONSTANT:
2188 case AMDGPU::G_IMPLICIT_DEF: {
2190 LLT DstTy =
MRI.getType(DstReg);
2196 if (DstBank == &AMDGPU::VCCRegBank)
2199 if (DefRegs.
empty())
2202 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2205 LLVMContext &Ctx =
B.getMF().getFunction().getContext();
2207 MI.getOperand(0).setReg(NewDstReg);
2208 if (Opc != AMDGPU::G_IMPLICIT_DEF) {
2209 uint64_t ConstVal =
MI.getOperand(1).getCImm()->getZExtValue();
2210 MI.getOperand(1).setCImm(
2214 MRI.setRegBank(NewDstReg, *DstBank);
2215 B.buildTrunc(DefRegs[0], NewDstReg);
2218 case AMDGPU::G_PHI: {
2220 LLT DstTy =
MRI.getType(DstReg);
2227 if (DstBank == &AMDGPU::VCCRegBank) {
2234 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
2238 if (SrcBank != &AMDGPU::VCCRegBank) {
2243 MRI.setRegBank(Copy.getReg(0), AMDGPU::VCCRegBank);
2244 MI.getOperand(
I).setReg(Copy.getReg(0));
2255 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
2256 B.setInsertPt(
B.getMBB(),
MI);
2264 case AMDGPU::G_FCMP:
2268 case AMDGPU::G_ICMP:
2269 case AMDGPU::G_UADDO:
2270 case AMDGPU::G_USUBO:
2271 case AMDGPU::G_UADDE:
2272 case AMDGPU::G_SADDE:
2273 case AMDGPU::G_USUBE:
2274 case AMDGPU::G_SSUBE: {
2275 unsigned BoolDstOp =
2276 (Opc == AMDGPU::G_ICMP || Opc == AMDGPU::G_FCMP) ? 0 : 1;
2277 Register DstReg =
MI.getOperand(BoolDstOp).getReg();
2281 if (DstBank != &AMDGPU::SGPRRegBank)
2284 const bool HasCarryIn =
MI.getNumOperands() == 5;
2290 MRI.setRegBank(NewDstReg, AMDGPU::SGPRRegBank);
2291 MI.getOperand(BoolDstOp).setReg(NewDstReg);
2295 MRI.setRegBank(NewSrcReg, AMDGPU::SGPRRegBank);
2296 B.buildZExt(NewSrcReg,
MI.getOperand(4).getReg());
2297 MI.getOperand(4).setReg(NewSrcReg);
2301 B.setInsertPt(*
MBB, std::next(
MI.getIterator()));
2306 if (DefRegs.
empty())
2308 B.buildTrunc(DefRegs[0], NewDstReg);
2311 case AMDGPU::G_SELECT: {
2313 LLT DstTy =
MRI.getType(DstReg);
2316 if (CondRegs.
empty())
2323 if (CondBank == &AMDGPU::SGPRRegBank) {
2326 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2328 MI.getOperand(1).setReg(NewCondReg);
2329 B.buildZExt(NewCondReg, CondRegs[0]);
2342 if (DefRegs.
empty()) {
2347 if (Src1Regs.
empty())
2353 if (Src2Regs.
empty())
2360 B.buildSelect(DefRegs[0], CondRegs[0], Src1Regs[0], Src2Regs[0]);
2361 B.buildSelect(DefRegs[1], CondRegs[0], Src1Regs[1], Src2Regs[1]);
2363 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2364 MI.eraseFromParent();
2367 case AMDGPU::G_BRCOND: {
2368 Register CondReg =
MI.getOperand(0).getReg();
2373 if (CondBank == &AMDGPU::SGPRRegBank) {
2376 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2378 MI.getOperand(0).setReg(NewCondReg);
2379 B.buildZExt(NewCondReg, CondReg);
2387 case AMDGPU::G_XOR: {
2391 LLT DstTy =
MRI.getType(DstReg);
2396 if (DstBank == &AMDGPU::VCCRegBank)
2400 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
2418 if (DefRegs.
empty()) {
2425 (Src0Regs.
empty() || Src0Regs.
size() == 2));
2431 if (Src0Regs.
empty())
2436 if (Src1Regs.
empty())
2443 B.buildInstr(Opc, {DefRegs[0]}, {Src0Regs[0], Src1Regs[0]});
2444 B.buildInstr(Opc, {DefRegs[1]}, {Src0Regs[1], Src1Regs[1]});
2446 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2447 MI.eraseFromParent();
2450 case AMDGPU::G_ABS: {
2456 if (SrcBank && SrcBank == &AMDGPU::VGPRRegBank) {
2458 ApplyRegBankMapping Apply(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2471 case AMDGPU::G_LSHR:
2472 case AMDGPU::G_ASHR:
2473 case AMDGPU::G_SMIN:
2474 case AMDGPU::G_SMAX:
2475 case AMDGPU::G_UMIN:
2476 case AMDGPU::G_UMAX: {
2478 LLT DstTy =
MRI.getType(DstReg);
2495 if (DstBank == &AMDGPU::VGPRRegBank)
2501 ApplyRegBankMapping ApplySALU(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
2503 if (DstTy.
isVector() && Opc == AMDGPU::G_ABS) {
2506 std::tie(WideSrcLo, WideSrcHi) =
2508 auto Lo =
B.buildInstr(AMDGPU::G_ABS, {
S32}, {WideSrcLo});
2509 auto Hi =
B.buildInstr(AMDGPU::G_ABS, {
S32}, {WideSrcHi});
2510 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2511 MI.eraseFromParent();
2520 std::tie(WideSrc0Lo, WideSrc0Hi)
2522 std::tie(WideSrc1Lo, WideSrc1Hi)
2524 auto Lo =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Lo, WideSrc1Lo});
2525 auto Hi =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Hi, WideSrc1Hi});
2526 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2527 MI.eraseFromParent();
2535 if (Opc == AMDGPU::G_SHL || Opc == AMDGPU::G_LSHR ||
2536 Opc == AMDGPU::G_ASHR) {
2537 B.setInsertPt(*
MBB,
MI.getIterator());
2545 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
2546 case AMDGPU::G_AMDGPU_S_MUL_U64_U32: {
2560 Register SrcReg0 =
MI.getOperand(1).getReg();
2561 Register SrcReg1 =
MI.getOperand(2).getReg();
2564 assert(
MRI.getType(DstReg) ==
S64 &&
"This is a special case for s_mul_u64 "
2565 "that handles only 64-bit operands.");
2571 if (DstBank == &AMDGPU::SGPRRegBank) {
2572 MI.setDesc(
TII->get(AMDGPU::S_MUL_U64));
2573 MRI.setRegClass(DstReg, &AMDGPU::SGPR_64RegClass);
2574 MRI.setRegClass(SrcReg0, &AMDGPU::SGPR_64RegClass);
2575 MRI.setRegClass(SrcReg1, &AMDGPU::SGPR_64RegClass);
2581 assert(
MRI.getRegBankOrNull(DstReg) == &AMDGPU::VGPRRegBank &&
2582 "The destination operand should be in vector registers.");
2587 Register Op0L =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2588 MRI.setRegClass(Op0L, &AMDGPU::VGPR_32RegClass);
2590 B.buildTrunc(Op0L, SrcReg0);
2593 Register Op1L =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2594 MRI.setRegClass(Op1L, &AMDGPU::VGPR_32RegClass);
2596 B.buildTrunc(Op1L, SrcReg1);
2598 unsigned NewOpc = Opc == AMDGPU::G_AMDGPU_S_MUL_U64_U32
2599 ? AMDGPU::G_AMDGPU_MAD_U64_U32
2600 : AMDGPU::G_AMDGPU_MAD_I64_I32;
2604 MRI.setRegClass(Zero64, &AMDGPU::VReg_64RegClass);
2605 Register CarryOut =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
2606 MRI.setRegClass(CarryOut, &AMDGPU::VReg_64RegClass);
2607 B.buildInstr(NewOpc, {DstReg, CarryOut}, {Op0L, Op1L, Zero64});
2608 MI.eraseFromParent();
2611 case AMDGPU::G_SEXT_INREG: {
2613 if (SrcRegs.
empty())
2617 ApplyRegBankMapping O(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2624 int Amt =
MI.getOperand(2).getImm();
2630 B.buildFreeze(DstRegs[0], SrcRegs[0]);
2632 auto Freeze =
B.buildFreeze(
S32, SrcRegs[0]);
2634 B.buildSExtInReg(DstRegs[0], Freeze, Amt);
2637 B.buildAShr(DstRegs[1], DstRegs[0],
B.buildConstant(
S32, 31));
2641 B.buildCopy(DstRegs[0], SrcRegs[0]);
2642 B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
2646 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2647 MI.eraseFromParent();
2650 case AMDGPU::G_CTPOP:
2651 case AMDGPU::G_BITREVERSE: {
2654 if (DstBank == &AMDGPU::SGPRRegBank)
2659 LLT Ty =
MRI.getType(SrcReg);
2663 ApplyRegBankMapping ApplyVALU(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2672 case AMDGPU::G_AMDGPU_FFBH_U32:
2673 case AMDGPU::G_AMDGPU_FFBL_B32:
2674 case AMDGPU::G_CTLZ_ZERO_UNDEF:
2675 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
2678 if (DstBank == &AMDGPU::SGPRRegBank)
2683 LLT Ty =
MRI.getType(SrcReg);
2693 ApplyRegBankMapping ApplyVALU(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2695 unsigned NewOpc = Opc == AMDGPU::G_CTLZ_ZERO_UNDEF
2696 ? (
unsigned)AMDGPU::G_AMDGPU_FFBH_U32
2697 : Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2698 ? (
unsigned)AMDGPU::G_AMDGPU_FFBL_B32
2700 unsigned Idx = NewOpc == AMDGPU::G_AMDGPU_FFBH_U32;
2701 auto X =
B.buildInstr(NewOpc, {
S32}, {SrcRegs[
Idx]});
2702 auto Y =
B.buildInstr(NewOpc, {
S32}, {SrcRegs[
Idx ^ 1]});
2704 Opc == AMDGPU::G_CTLZ_ZERO_UNDEF || Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2706 : AMDGPU::G_UADDSAT;
2707 Y =
B.buildInstr(AddOpc, {
S32}, {
Y,
B.buildConstant(
S32, 32)});
2709 B.buildUMin(DstReg,
X,
Y);
2710 MI.eraseFromParent();
2713 case AMDGPU::G_SEXT:
2714 case AMDGPU::G_ZEXT:
2715 case AMDGPU::G_ANYEXT: {
2717 LLT SrcTy =
MRI.getType(SrcReg);
2718 const bool Signed = Opc == AMDGPU::G_SEXT;
2726 LLT DstTy =
MRI.getType(DstReg);
2728 SrcBank != &AMDGPU::SGPRRegBank &&
2729 SrcBank != &AMDGPU::VCCRegBank &&
2739 B.buildSExtOrTrunc(DefRegs[0], SrcReg);
2740 }
else if (Opc == AMDGPU::G_ZEXT) {
2741 B.buildZExtOrTrunc(DefRegs[0], SrcReg);
2743 B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
2747 MRI.setRegBank(DstReg, *SrcBank);
2748 MI.eraseFromParent();
2758 if (SrcBank == &AMDGPU::VCCRegBank) {
2765 const bool UseSel64 = DstSize > 32 &&
2766 SrcBank->
getID() == AMDGPU::SGPRRegBankID;
2770 auto True =
B.buildConstant(SelType,
Signed ? -1 : 1);
2771 auto False =
B.buildConstant(SelType, 0);
2773 MRI.setRegBank(True.getReg(0), *DstBank);
2774 MRI.setRegBank(False.getReg(0), *DstBank);
2775 MRI.setRegBank(DstReg, *DstBank);
2778 B.buildSelect(DefRegs[0], SrcReg, True, False);
2780 }
else if (DstSize < 32) {
2781 auto Sel =
B.buildSelect(SelType, SrcReg, True, False);
2782 MRI.setRegBank(Sel.getReg(0), *DstBank);
2783 B.buildTrunc(DstReg, Sel);
2785 B.buildSelect(DstReg, SrcReg, True, False);
2788 MI.eraseFromParent();
2794 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
2803 LLT DstTy =
MRI.getType(DstReg);
2804 LLT SrcTy =
MRI.getType(SrcReg);
2806 if (foldExtractEltToCmpSelect(
B,
MI, OpdMapper))
2818 unsigned ConstOffset;
2819 std::tie(BaseIdxReg, ConstOffset) =
2826 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2831 if (ShouldMoveIndexIntoLoop)
2832 MI.getOperand(2).setReg(BaseIdxReg);
2838 const bool NeedCopyToVGPR = DstBank == &AMDGPU::VGPRRegBank &&
2839 SrcBank == &AMDGPU::SGPRRegBank;
2840 if (DstRegs.
empty()) {
2845 if (NeedCopyToVGPR) {
2847 Register TmpReg =
MRI.createGenericVirtualRegister(DstTy);
2848 MRI.setRegBank(TmpReg, AMDGPU::SGPRRegBank);
2849 MI.getOperand(0).setReg(TmpReg);
2850 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2857 if (ShouldMoveIndexIntoLoop)
2867 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
2868 auto One =
B.buildConstant(
S32, 1);
2879 auto IdxLo =
B.buildShl(
S32, BaseIdxReg, One);
2880 auto IdxHi =
B.buildAdd(
S32, IdxLo, One);
2882 auto Extract0 =
B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);
2883 auto Extract1 =
B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi);
2885 MRI.setRegBank(DstReg, *DstBank);
2886 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
2887 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
2888 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
2889 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
2893 MI.eraseFromParent();
2899 B.setInstr(*Span.
begin());
2900 MI.eraseFromParent();
2904 if (NeedCopyToVGPR) {
2908 MRI.setRegBank(TmpReg0, AMDGPU::SGPRRegBank);
2909 MRI.setRegBank(TmpReg1, AMDGPU::SGPRRegBank);
2911 Extract0->getOperand(0).setReg(TmpReg0);
2912 Extract1->getOperand(0).setReg(TmpReg1);
2920 if (ShouldMoveIndexIntoLoop)
2925 case AMDGPU::G_INSERT_VECTOR_ELT: {
2929 LLT VecTy =
MRI.getType(DstReg);
2935 MRI.setType(
MI.getOperand(1).getReg(), VecTy);
2937 if (foldInsertEltToCmpSelect(
B,
MI, OpdMapper))
2945 LLT InsTy =
MRI.getType(InsReg);
2949 unsigned ConstOffset;
2950 std::tie(BaseIdxReg, ConstOffset) =
2957 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2962 if (ShouldMoveIndexIntoLoop)
2963 MI.getOperand(3).setReg(BaseIdxReg);
2966 if (InsRegs.
empty()) {
2970 if (ShouldMoveIndexIntoLoop) {
2982 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
2983 auto One =
B.buildConstant(
S32, 1);
2992 auto IdxLo =
B.buildShl(
S32, BaseIdxReg, One);
2993 auto IdxHi =
B.buildAdd(
S32, IdxLo, One);
2995 auto InsLo =
B.buildInsertVectorElement(Vec32, CastSrc, InsRegs[0], IdxLo);
2996 auto InsHi =
B.buildInsertVectorElement(Vec32, InsLo, InsRegs[1], IdxHi);
3005 MRI.setRegBank(InsReg, *InsSrcBank);
3006 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
3007 MRI.setRegBank(InsLo.getReg(0), *DstBank);
3008 MRI.setRegBank(InsHi.getReg(0), *DstBank);
3009 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
3010 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
3011 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
3016 B.setInsertPt(
B.getMBB(),
MI);
3017 B.buildBitcast(DstReg, InsHi);
3018 MI.eraseFromParent();
3022 B.setInstr(*Span.
begin());
3023 MI.eraseFromParent();
3034 B.buildBitcast(DstReg, InsHi);
3037 if (ShouldMoveIndexIntoLoop)
3042 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
3043 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
3044 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
3045 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
3046 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
3047 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
3048 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
3049 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
3050 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
3051 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
3052 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
3053 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
3054 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
3055 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
3056 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
3057 case AMDGPU::G_AMDGPU_BUFFER_STORE:
3058 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
3059 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
3060 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
3061 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16:
3062 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
3063 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16: {
3068 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
3069 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
3070 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
3071 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
3072 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
3073 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
3074 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
3075 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
3076 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
3077 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
3078 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
3079 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC: {
3084 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
3085 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
3086 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
3091 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
3096 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
3097 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
3098 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
3099 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
3100 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
3104 case AMDGPU::G_INTRINSIC:
3105 case AMDGPU::G_INTRINSIC_CONVERGENT: {
3107 case Intrinsic::amdgcn_readlane: {
3118 case Intrinsic::amdgcn_writelane: {
3128 case Intrinsic::amdgcn_interp_p1:
3129 case Intrinsic::amdgcn_interp_p2:
3130 case Intrinsic::amdgcn_interp_mov:
3131 case Intrinsic::amdgcn_interp_p1_f16:
3132 case Intrinsic::amdgcn_interp_p2_f16:
3133 case Intrinsic::amdgcn_lds_param_load: {
3141 case Intrinsic::amdgcn_interp_inreg_p10:
3142 case Intrinsic::amdgcn_interp_inreg_p2:
3143 case Intrinsic::amdgcn_interp_inreg_p10_f16:
3144 case Intrinsic::amdgcn_interp_inreg_p2_f16:
3145 case Intrinsic::amdgcn_interp_p10_rtz_f16:
3146 case Intrinsic::amdgcn_interp_p2_rtz_f16:
3149 case Intrinsic::amdgcn_permlane16:
3150 case Intrinsic::amdgcn_permlanex16: {
3158 case Intrinsic::amdgcn_sbfe:
3161 case Intrinsic::amdgcn_ubfe:
3164 case Intrinsic::amdgcn_inverse_ballot:
3165 case Intrinsic::amdgcn_s_bitreplicate:
3166 case Intrinsic::amdgcn_s_quadmask:
3167 case Intrinsic::amdgcn_s_wqm:
3171 case Intrinsic::amdgcn_ballot:
3177 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3178 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3179 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
3180 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3181 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3191 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
3192 unsigned N =
MI.getNumExplicitOperands() - 2;
3197 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
3198 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
3199 auto IntrID = cast<GIntrinsic>(
MI).getIntrinsicID();
3201 case Intrinsic::amdgcn_ds_ordered_add:
3202 case Intrinsic::amdgcn_ds_ordered_swap: {
3209 case Intrinsic::amdgcn_ds_gws_init:
3210 case Intrinsic::amdgcn_ds_gws_barrier:
3211 case Intrinsic::amdgcn_ds_gws_sema_br: {
3217 case Intrinsic::amdgcn_ds_gws_sema_v:
3218 case Intrinsic::amdgcn_ds_gws_sema_p:
3219 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
3224 case Intrinsic::amdgcn_ds_append:
3225 case Intrinsic::amdgcn_ds_consume: {
3229 case Intrinsic::amdgcn_s_sendmsg:
3230 case Intrinsic::amdgcn_s_sendmsghalt: {
3235 case Intrinsic::amdgcn_s_setreg: {
3239 case Intrinsic::amdgcn_s_ttracedata:
3242 case Intrinsic::amdgcn_raw_buffer_load_lds:
3243 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
3250 case Intrinsic::amdgcn_struct_buffer_load_lds:
3251 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
3258 case Intrinsic::amdgcn_global_load_lds: {
3263 case Intrinsic::amdgcn_lds_direct_load: {
3269 case Intrinsic::amdgcn_exp_row:
3273 case Intrinsic::amdgcn_s_sleep_var:
3277 case Intrinsic::amdgcn_s_barrier_signal_var:
3278 case Intrinsic::amdgcn_s_barrier_join:
3279 case Intrinsic::amdgcn_s_wakeup_barrier:
3282 case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
3285 case Intrinsic::amdgcn_s_barrier_init:
3289 case Intrinsic::amdgcn_s_get_barrier_state: {
3299 if (RSrcIntrin->IsImage) {
3310 case AMDGPU::G_SI_CALL: {
3321 unsigned FrameSetupOpcode = AMDGPU::ADJCALLSTACKUP;
3322 unsigned FrameDestroyOpcode = AMDGPU::ADJCALLSTACKDOWN;
3328 unsigned NonCopyInstrsLen = 0;
3334 while (Start->getOpcode() != FrameSetupOpcode) {
3336 bool IsCopy =
false;
3337 if (Start->getOpcode() == AMDGPU::COPY) {
3338 auto &Dst = Start->getOperand(0);
3341 if (Reg.isPhysical() &&
MI.readsRegister(Reg,
TRI)) {
3346 auto &Src = Start->getOperand(1);
3349 IsCopy =
Info->getScratchRSrcReg() == Reg;
3357 NonCopyInstrsLen = NonCopyInstrs.
size();
3362 NonCopyInstrs.
resize(NonCopyInstrsLen);
3364 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3370 NonCopyInstrs.
clear();
3371 NonCopyInstrsLen = 0;
3374 while (
End->getOpcode() != FrameDestroyOpcode) {
3376 bool IsCopy =
false;
3377 if (
End->getOpcode() == AMDGPU::COPY) {
3378 auto &Src =
End->getOperand(1);
3381 IsCopy = Reg.isPhysical() &&
MI.modifiesRegister(Reg,
TRI);
3387 NonCopyInstrsLen = NonCopyInstrs.
size();
3392 NonCopyInstrs.
resize(NonCopyInstrsLen);
3396 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3401 B.setInsertPt(
B.getMBB(), Start);
3405 case AMDGPU::G_LOAD:
3406 case AMDGPU::G_ZEXTLOAD:
3407 case AMDGPU::G_SEXTLOAD: {
3412 case AMDGPU::G_DYN_STACKALLOC:
3415 case AMDGPU::G_STACKRESTORE: {
3420 case AMDGPU::G_SBFX:
3423 case AMDGPU::G_UBFX:
3426 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3427 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3430 case AMDGPU::G_PREFETCH: {
3432 MI.eraseFromParent();
3437 if (PtrBank == AMDGPU::VGPRRegBankID) {
3438 MI.eraseFromParent();
3441 unsigned AS =
MRI.getType(PtrReg).getAddressSpace();
3444 MI.eraseFromParent();
3462 if (RB0 == AMDGPU::InvalidRegBankID)
3464 if (RB1 == AMDGPU::InvalidRegBankID)
3467 if (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID)
3468 return AMDGPU::SGPRRegBankID;
3470 if (RB0 == AMDGPU::AGPRRegBankID && RB1 == AMDGPU::AGPRRegBankID)
3471 return AMDGPU::AGPRRegBankID;
3473 return AMDGPU::VGPRRegBankID;
3477 if (RB0 == AMDGPU::InvalidRegBankID)
3479 if (RB1 == AMDGPU::InvalidRegBankID)
3485 if (RB0 == AMDGPU::VCCRegBankID || RB1 == AMDGPU::VCCRegBankID)
3486 return AMDGPU::VCCRegBankID;
3494 unsigned RegBank = AMDGPU::InvalidRegBankID;
3502 if (RegBank == AMDGPU::VGPRRegBankID)
3518 if (Bank->getID() != AMDGPU::SGPRRegBankID)
3531 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
3537 OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3540 MI.getNumOperands());
3553 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
3559 unsigned BankID =
Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
3560 OpdsMapping[i] = AMDGPU::getValueMapping(BankID,
Size);
3564 MI.getNumOperands());
3573 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
3579 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3583 MI.getNumOperands());
3589 int RsrcIdx)
const {
3592 RsrcIdx +=
MI.getNumExplicitDefs() + 1;
3594 const int NumOps =
MI.getNumOperands();
3599 for (
int I = 0;
I != NumOps; ++
I) {
3600 if (!
MI.getOperand(
I).isReg())
3614 const bool MustBeSGPR =
I == RsrcIdx ||
I == RsrcIdx + 1;
3619 OpdsMapping[
I] = AMDGPU::getValueMapping(NewBank,
Size);
3622 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3633 LLT PtrTy =
MRI.getType(PtrReg);
3637 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3642 return AMDGPU::getValueMapping(PtrBank->
getID(),
Size);
3653 LLT PtrTy =
MRI.getType(PtrReg);
3665 ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3666 PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
3668 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3673 AMDGPU::VGPRRegBankID : AMDGPU::SGPRRegBankID;
3675 PtrMapping = AMDGPU::getValueMapping(PtrBankID, PtrSize);
3678 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3679 PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
3682 OpdsMapping[0] = ValMapping;
3683 OpdsMapping[1] = PtrMapping;
3708 return AMDGPU::getValueMapping(Bank,
Size);
3716 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3724 return AMDGPU::getValueMapping(AMDGPU::AGPRRegBankID,
Size);
3741 if (
MI.isCopy() ||
MI.getOpcode() == AMDGPU::G_FREEZE) {
3749 assert(SrcBank &&
"src bank should have been assigned already");
3756 DstBank = &AMDGPU::VCCRegBank;
3758 DstBank = &AMDGPU::VCCRegBank;
3764 if (
MI.getOpcode() != AMDGPU::G_FREEZE &&
3769 unsigned OpdsMappingSize =
MI.isCopy() ? 1 : 2;
3771 OpdsMapping[0] = &ValMap;
3772 if (
MI.getOpcode() == AMDGPU::G_FREEZE)
3773 OpdsMapping[1] = &ValMap;
3780 if (
MI.isRegSequence()) {
3783 unsigned BankID = AMDGPU::SGPRRegBankID;
3785 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
3789 if (OpBank != AMDGPU::SGPRRegBankID) {
3790 BankID = AMDGPU::VGPRRegBankID;
3806 if (
auto *
PHI = dyn_cast<GPhi>(&
MI)) {
3807 unsigned ResultBank = AMDGPU::InvalidRegBankID;
3812 ResultBank = DstBank->
getID();
3814 for (
unsigned I = 0;
I <
PHI->getNumIncomingValues(); ++
I) {
3819 if (!Bank || Bank->
getID() == AMDGPU::VGPRRegBankID) {
3820 ResultBank = AMDGPU::VGPRRegBankID;
3825 unsigned OpBank = Bank->
getID();
3829 assert(ResultBank != AMDGPU::InvalidRegBankID);
3831 unsigned Size =
MRI.getType(DstReg).getSizeInBits();
3846 switch (
MI.getOpcode()) {
3853 case AMDGPU::G_MUL: {
3854 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3859 unsigned TargetBankID = AMDGPU::InvalidRegBankID;
3860 unsigned BankLHS = AMDGPU::InvalidRegBankID;
3861 unsigned BankRHS = AMDGPU::InvalidRegBankID;
3863 TargetBankID = DstBank->
getID();
3864 if (DstBank == &AMDGPU::VCCRegBank) {
3865 TargetBankID = AMDGPU::VCCRegBankID;
3866 BankLHS = AMDGPU::VCCRegBankID;
3867 BankRHS = AMDGPU::VCCRegBankID;
3870 AMDGPU::SGPRRegBankID);
3872 AMDGPU::SGPRRegBankID);
3876 AMDGPU::VCCRegBankID);
3878 AMDGPU::VCCRegBankID);
3881 if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {
3882 TargetBankID = AMDGPU::VGPRRegBankID;
3883 }
else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {
3884 TargetBankID = AMDGPU::VCCRegBankID;
3885 BankLHS = AMDGPU::VCCRegBankID;
3886 BankRHS = AMDGPU::VCCRegBankID;
3887 }
else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
3888 TargetBankID = AMDGPU::SGPRRegBankID;
3892 OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID,
Size);
3893 OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS,
Size);
3894 OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS,
Size);
3901 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID,
Size);
3902 OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
3904 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size);
3906 OpdsMapping[1] = AMDGPU::getValueMapping(Bank1,
Size);
3909 OpdsMapping[2] = AMDGPU::getValueMapping(Bank2,
Size);
3917 case AMDGPU::G_PTR_ADD:
3918 case AMDGPU::G_PTRMASK:
3922 case AMDGPU::G_LSHR:
3923 case AMDGPU::G_ASHR:
3924 case AMDGPU::G_UADDO:
3925 case AMDGPU::G_USUBO:
3926 case AMDGPU::G_UADDE:
3927 case AMDGPU::G_SADDE:
3928 case AMDGPU::G_USUBE:
3929 case AMDGPU::G_SSUBE:
3930 case AMDGPU::G_SMIN:
3931 case AMDGPU::G_SMAX:
3932 case AMDGPU::G_UMIN:
3933 case AMDGPU::G_UMAX:
3935 case AMDGPU::G_SHUFFLE_VECTOR:
3936 case AMDGPU::G_SBFX:
3937 case AMDGPU::G_UBFX:
3938 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
3939 case AMDGPU::G_AMDGPU_S_MUL_U64_U32:
3943 case AMDGPU::G_FADD:
3944 case AMDGPU::G_FSUB:
3945 case AMDGPU::G_FMUL:
3947 case AMDGPU::G_FFLOOR:
3948 case AMDGPU::G_FCEIL:
3949 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
3950 case AMDGPU::G_FMINNUM:
3951 case AMDGPU::G_FMAXNUM:
3952 case AMDGPU::G_FMINIMUM:
3953 case AMDGPU::G_FMAXIMUM:
3954 case AMDGPU::G_INTRINSIC_TRUNC:
3955 case AMDGPU::G_STRICT_FADD:
3956 case AMDGPU::G_STRICT_FSUB:
3957 case AMDGPU::G_STRICT_FMUL:
3958 case AMDGPU::G_STRICT_FMA: {
3959 LLT Ty =
MRI.getType(
MI.getOperand(0).getReg());
3966 case AMDGPU::G_FPTOSI:
3967 case AMDGPU::G_FPTOUI:
3968 case AMDGPU::G_SITOFP:
3969 case AMDGPU::G_UITOFP: {
3970 unsigned SizeDst =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3971 unsigned SizeSrc =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
3977 case AMDGPU::G_FPTRUNC:
3978 case AMDGPU::G_FPEXT: {
3979 unsigned SizeDst =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3980 unsigned SizeSrc =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
3986 case AMDGPU::G_FSQRT:
3987 case AMDGPU::G_FEXP2:
3988 case AMDGPU::G_FLOG2: {
3989 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3995 case AMDGPU::G_SADDSAT:
3996 case AMDGPU::G_SSUBSAT:
3997 case AMDGPU::G_UADDSAT:
3998 case AMDGPU::G_USUBSAT:
3999 case AMDGPU::G_FMAD:
4000 case AMDGPU::G_FLDEXP:
4001 case AMDGPU::G_FMINNUM_IEEE:
4002 case AMDGPU::G_FMAXNUM_IEEE:
4003 case AMDGPU::G_FCANONICALIZE:
4004 case AMDGPU::G_STRICT_FLDEXP:
4005 case AMDGPU::G_BSWAP:
4006 case AMDGPU::G_FSHR:
4007 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
4008 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
4009 case AMDGPU::G_AMDGPU_RCP_IFLAG:
4010 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
4011 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
4012 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
4013 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
4014 case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
4015 case AMDGPU::G_AMDGPU_SMED3:
4016 case AMDGPU::G_AMDGPU_FMED3:
4018 case AMDGPU::G_UMULH:
4019 case AMDGPU::G_SMULH: {
4024 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4025 case AMDGPU::G_AMDGPU_MAD_I64_I32: {
4034 bool AllSalu =
true;
4035 bool MulSalu =
true;
4036 for (
unsigned i = 0; i < 5; ++i) {
4039 if (Bank->getID() != AMDGPU::SGPRRegBankID) {
4041 if (i == 2 || i == 3) {
4059 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4060 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4061 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4062 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4063 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4066 case AMDGPU::G_IMPLICIT_DEF: {
4067 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4068 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4071 case AMDGPU::G_FCONSTANT:
4072 case AMDGPU::G_CONSTANT:
4073 case AMDGPU::G_GLOBAL_VALUE:
4074 case AMDGPU::G_FRAME_INDEX:
4075 case AMDGPU::G_BLOCK_ADDR:
4076 case AMDGPU::G_READSTEADYCOUNTER:
4077 case AMDGPU::G_READCYCLECOUNTER: {
4078 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4079 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4082 case AMDGPU::G_DYN_STACKALLOC: {
4084 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4086 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, 32);
4089 case AMDGPU::G_AMDGPU_WAVE_ADDRESS: {
4094 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4095 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4098 case AMDGPU::G_INSERT: {
4103 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4104 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4105 OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
4106 OpdsMapping[3] =
nullptr;
4109 case AMDGPU::G_EXTRACT: {
4113 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4114 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4115 OpdsMapping[2] =
nullptr;
4118 case AMDGPU::G_BUILD_VECTOR:
4119 case AMDGPU::G_BUILD_VECTOR_TRUNC: {
4120 LLT DstTy =
MRI.getType(
MI.getOperand(0).getReg());
4123 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4126 unsigned DstBankID =
regBankUnion(Src0BankID, Src1BankID);
4128 OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
4129 OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
4130 OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
4136 case AMDGPU::G_MERGE_VALUES:
4137 case AMDGPU::G_CONCAT_VECTORS: {
4139 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4140 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4142 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4144 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; ++i)
4145 OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
4148 case AMDGPU::G_BITREVERSE:
4149 case AMDGPU::G_BITCAST:
4150 case AMDGPU::G_INTTOPTR:
4151 case AMDGPU::G_PTRTOINT:
4152 case AMDGPU::G_FABS:
4153 case AMDGPU::G_FNEG: {
4154 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4156 OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
4159 case AMDGPU::G_AMDGPU_FFBH_U32:
4160 case AMDGPU::G_AMDGPU_FFBL_B32:
4161 case AMDGPU::G_CTLZ_ZERO_UNDEF:
4162 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
4163 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4165 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4166 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(BankID,
Size);
4169 case AMDGPU::G_CTPOP: {
4170 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4172 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4177 OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
4180 case AMDGPU::G_TRUNC: {
4186 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4187 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
4190 case AMDGPU::G_ZEXT:
4191 case AMDGPU::G_SEXT:
4192 case AMDGPU::G_ANYEXT:
4193 case AMDGPU::G_SEXT_INREG: {
4202 switch (SrcBank->
getID()) {
4203 case AMDGPU::SGPRRegBankID:
4204 DstBank = AMDGPU::SGPRRegBankID;
4207 DstBank = AMDGPU::VGPRRegBankID;
4213 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
4214 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->
getID(),
4218 case AMDGPU::G_IS_FPCLASS: {
4220 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
4221 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4222 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4223 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4226 case AMDGPU::G_STORE: {
4228 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4233 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4234 OpdsMapping[0] = ValMapping;
4238 case AMDGPU::G_ICMP:
4239 case AMDGPU::G_FCMP: {
4240 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4245 AMDGPU::SGPRRegBankID);
4249 auto canUseSCCICMP = [&]() {
4252 return Size == 32 ||
4257 auto canUseSCCFCMP = [&]() {
4261 bool isICMP =
MI.getOpcode() == AMDGPU::G_ICMP;
4262 bool CanUseSCC = DstBank == AMDGPU::SGPRRegBankID &&
4263 Op2Bank == AMDGPU::SGPRRegBankID &&
4264 Op3Bank == AMDGPU::SGPRRegBankID &&
4265 (isICMP ? canUseSCCICMP() : canUseSCCFCMP());
4267 DstBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
4268 unsigned SrcBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4272 const unsigned ResultSize = 1;
4274 OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, ResultSize);
4275 OpdsMapping[1] =
nullptr;
4276 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank,
Size);
4277 OpdsMapping[3] = AMDGPU::getValueMapping(SrcBank,
Size);
4280 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
4283 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4284 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4285 unsigned IdxSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4287 unsigned OutputBankID =
regBankUnion(SrcBankID, IdxBank);
4289 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(OutputBankID, DstSize);
4290 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, SrcSize);
4293 OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4296 case AMDGPU::G_INSERT_VECTOR_ELT: {
4298 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4300 unsigned VecSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4301 unsigned InsertSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4302 unsigned IdxSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4306 OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4307 OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4311 if (InsertSize == 64 && OutputBankID == AMDGPU::VGPRRegBankID) {
4312 OpdsMapping[2] = AMDGPU::getValueMappingSplit64(InsertEltBankID,
4315 assert(InsertSize == 32 || InsertSize == 64);
4316 OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBankID, InsertSize);
4320 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBankID, IdxSize);
4323 case AMDGPU::G_UNMERGE_VALUES: {
4328 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
4330 OpdsMapping[i] = AMDGPU::getValueMapping(Bank,
Size);
4334 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
4335 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
4336 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
4337 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
4338 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
4339 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
4340 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
4341 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
4342 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
4343 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
4344 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
4345 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
4346 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
4347 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
4348 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
4349 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
4350 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16:
4351 case AMDGPU::G_AMDGPU_BUFFER_STORE:
4352 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
4353 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
4354 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
4355 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16: {
4374 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
4375 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
4376 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
4377 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
4378 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
4379 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
4380 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
4381 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
4382 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
4383 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
4384 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
4385 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
4386 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
4387 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
4388 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
4411 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
4437 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
4438 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
4439 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
4440 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
4441 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
4449 unsigned RSrcBank = OpdsMapping[1]->BreakDown[0].RegBank->getID();
4450 unsigned OffsetBank = OpdsMapping[2]->BreakDown[0].RegBank->getID();
4451 unsigned ResultBank =
regBankUnion(RSrcBank, OffsetBank);
4453 unsigned Size0 =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4454 OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
4457 case AMDGPU::G_INTRINSIC:
4458 case AMDGPU::G_INTRINSIC_CONVERGENT: {
4462 case Intrinsic::amdgcn_div_fmas:
4463 case Intrinsic::amdgcn_div_fixup:
4464 case Intrinsic::amdgcn_trig_preop:
4465 case Intrinsic::amdgcn_sin:
4466 case Intrinsic::amdgcn_cos:
4467 case Intrinsic::amdgcn_log_clamp:
4468 case Intrinsic::amdgcn_rcp_legacy:
4469 case Intrinsic::amdgcn_rsq_legacy:
4470 case Intrinsic::amdgcn_rsq_clamp:
4471 case Intrinsic::amdgcn_fmul_legacy:
4472 case Intrinsic::amdgcn_fma_legacy:
4473 case Intrinsic::amdgcn_frexp_mant:
4474 case Intrinsic::amdgcn_frexp_exp:
4475 case Intrinsic::amdgcn_fract:
4476 case Intrinsic::amdgcn_cvt_pknorm_i16:
4477 case Intrinsic::amdgcn_cvt_pknorm_u16:
4478 case Intrinsic::amdgcn_cvt_pk_i16:
4479 case Intrinsic::amdgcn_cvt_pk_u16:
4480 case Intrinsic::amdgcn_fmed3:
4481 case Intrinsic::amdgcn_cubeid:
4482 case Intrinsic::amdgcn_cubema:
4483 case Intrinsic::amdgcn_cubesc:
4484 case Intrinsic::amdgcn_cubetc:
4485 case Intrinsic::amdgcn_sffbh:
4486 case Intrinsic::amdgcn_fmad_ftz:
4487 case Intrinsic::amdgcn_mbcnt_lo:
4488 case Intrinsic::amdgcn_mbcnt_hi:
4489 case Intrinsic::amdgcn_mul_u24:
4490 case Intrinsic::amdgcn_mul_i24:
4491 case Intrinsic::amdgcn_mulhi_u24:
4492 case Intrinsic::amdgcn_mulhi_i24:
4493 case Intrinsic::amdgcn_lerp:
4494 case Intrinsic::amdgcn_sad_u8:
4495 case Intrinsic::amdgcn_msad_u8:
4496 case Intrinsic::amdgcn_sad_hi_u8:
4497 case Intrinsic::amdgcn_sad_u16:
4498 case Intrinsic::amdgcn_qsad_pk_u16_u8:
4499 case Intrinsic::amdgcn_mqsad_pk_u16_u8:
4500 case Intrinsic::amdgcn_mqsad_u32_u8:
4501 case Intrinsic::amdgcn_cvt_pk_u8_f32:
4502 case Intrinsic::amdgcn_alignbyte:
4503 case Intrinsic::amdgcn_perm:
4504 case Intrinsic::amdgcn_fdot2:
4505 case Intrinsic::amdgcn_sdot2:
4506 case Intrinsic::amdgcn_udot2:
4507 case Intrinsic::amdgcn_sdot4:
4508 case Intrinsic::amdgcn_udot4:
4509 case Intrinsic::amdgcn_sdot8:
4510 case Intrinsic::amdgcn_udot8:
4511 case Intrinsic::amdgcn_fdot2_bf16_bf16:
4512 case Intrinsic::amdgcn_fdot2_f16_f16:
4513 case Intrinsic::amdgcn_fdot2_f32_bf16:
4514 case Intrinsic::amdgcn_sudot4:
4515 case Intrinsic::amdgcn_sudot8:
4516 case Intrinsic::amdgcn_dot4_f32_fp8_bf8:
4517 case Intrinsic::amdgcn_dot4_f32_bf8_fp8:
4518 case Intrinsic::amdgcn_dot4_f32_fp8_fp8:
4519 case Intrinsic::amdgcn_dot4_f32_bf8_bf8:
4520 case Intrinsic::amdgcn_cvt_f32_fp8:
4521 case Intrinsic::amdgcn_cvt_f32_bf8:
4522 case Intrinsic::amdgcn_cvt_pk_f32_fp8:
4523 case Intrinsic::amdgcn_cvt_pk_f32_bf8:
4524 case Intrinsic::amdgcn_cvt_pk_fp8_f32:
4525 case Intrinsic::amdgcn_cvt_pk_bf8_f32:
4526 case Intrinsic::amdgcn_cvt_sr_fp8_f32:
4527 case Intrinsic::amdgcn_cvt_sr_bf8_f32:
4528 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16:
4529 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16:
4530 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied:
4531 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied:
4532 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16:
4533 case Intrinsic::amdgcn_wmma_f32_16x16x16_f16:
4534 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4:
4535 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8:
4536 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8:
4537 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8:
4538 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8:
4539 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8:
4540 case Intrinsic::amdgcn_wmma_i32_16x16x32_iu4:
4541 case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16:
4542 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
4543 case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
4544 case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
4545 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8:
4546 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4:
4547 case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4:
4548 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8:
4549 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8:
4550 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8:
4551 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8:
4553 case Intrinsic::amdgcn_log:
4554 case Intrinsic::amdgcn_exp2:
4555 case Intrinsic::amdgcn_rcp:
4556 case Intrinsic::amdgcn_rsq:
4557 case Intrinsic::amdgcn_sqrt: {
4558 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4564 case Intrinsic::amdgcn_sbfe:
4565 case Intrinsic::amdgcn_ubfe:
4569 case Intrinsic::amdgcn_ds_swizzle:
4570 case Intrinsic::amdgcn_ds_permute:
4571 case Intrinsic::amdgcn_ds_bpermute:
4572 case Intrinsic::amdgcn_update_dpp:
4573 case Intrinsic::amdgcn_mov_dpp8:
4574 case Intrinsic::amdgcn_mov_dpp:
4575 case Intrinsic::amdgcn_strict_wwm:
4576 case Intrinsic::amdgcn_wwm:
4577 case Intrinsic::amdgcn_strict_wqm:
4578 case Intrinsic::amdgcn_wqm:
4579 case Intrinsic::amdgcn_softwqm:
4580 case Intrinsic::amdgcn_set_inactive:
4581 case Intrinsic::amdgcn_set_inactive_chain_arg:
4582 case Intrinsic::amdgcn_permlane64:
4584 case Intrinsic::amdgcn_cvt_pkrtz:
4588 case Intrinsic::amdgcn_kernarg_segment_ptr:
4589 case Intrinsic::amdgcn_s_getpc:
4590 case Intrinsic::amdgcn_groupstaticsize:
4591 case Intrinsic::amdgcn_reloc_constant:
4592 case Intrinsic::returnaddress: {
4593 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4594 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4597 case Intrinsic::amdgcn_wqm_vote: {
4598 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4599 OpdsMapping[0] = OpdsMapping[2]
4600 = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size);
4603 case Intrinsic::amdgcn_ps_live: {
4604 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4607 case Intrinsic::amdgcn_div_scale: {
4608 unsigned Dst0Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4609 unsigned Dst1Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4610 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);
4611 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
4613 unsigned SrcSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4614 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4615 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4618 case Intrinsic::amdgcn_class: {
4619 Register Src0Reg =
MI.getOperand(2).getReg();
4620 Register Src1Reg =
MI.getOperand(3).getReg();
4621 unsigned Src0Size =
MRI.getType(Src0Reg).getSizeInBits();
4622 unsigned Src1Size =
MRI.getType(Src1Reg).getSizeInBits();
4623 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4624 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4625 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src0Size);
4626 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src1Size);
4629 case Intrinsic::amdgcn_icmp:
4630 case Intrinsic::amdgcn_fcmp: {
4631 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4633 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4634 unsigned OpSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4635 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4636 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4639 case Intrinsic::amdgcn_readlane: {
4642 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4644 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4647 case Intrinsic::amdgcn_readfirstlane: {
4648 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4649 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4650 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4651 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4654 case Intrinsic::amdgcn_writelane: {
4655 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4657 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
4660 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4662 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4666 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
4667 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4668 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4671 case Intrinsic::amdgcn_if_break: {
4673 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4674 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4675 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4678 case Intrinsic::amdgcn_permlane16:
4679 case Intrinsic::amdgcn_permlanex16: {
4681 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4682 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4683 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4688 case Intrinsic::amdgcn_permlane16_var:
4689 case Intrinsic::amdgcn_permlanex16_var: {
4691 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4692 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4693 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4694 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4697 case Intrinsic::amdgcn_mfma_f32_4x4x1f32:
4698 case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
4699 case Intrinsic::amdgcn_mfma_i32_4x4x4i8:
4700 case Intrinsic::amdgcn_mfma_f32_4x4x2bf16:
4701 case Intrinsic::amdgcn_mfma_f32_16x16x1f32:
4702 case Intrinsic::amdgcn_mfma_f32_16x16x4f32:
4703 case Intrinsic::amdgcn_mfma_f32_16x16x4f16:
4704 case Intrinsic::amdgcn_mfma_f32_16x16x16f16:
4705 case Intrinsic::amdgcn_mfma_i32_16x16x4i8:
4706 case Intrinsic::amdgcn_mfma_i32_16x16x16i8:
4707 case Intrinsic::amdgcn_mfma_f32_16x16x2bf16:
4708 case Intrinsic::amdgcn_mfma_f32_16x16x8bf16:
4709 case Intrinsic::amdgcn_mfma_f32_32x32x1f32:
4710 case Intrinsic::amdgcn_mfma_f32_32x32x2f32:
4711 case Intrinsic::amdgcn_mfma_f32_32x32x4f16:
4712 case Intrinsic::amdgcn_mfma_f32_32x32x8f16:
4713 case Intrinsic::amdgcn_mfma_i32_32x32x4i8:
4714 case Intrinsic::amdgcn_mfma_i32_32x32x8i8:
4715 case Intrinsic::amdgcn_mfma_f32_32x32x2bf16:
4716 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16:
4717 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16_1k:
4718 case Intrinsic::amdgcn_mfma_f32_16x16x4bf16_1k:
4719 case Intrinsic::amdgcn_mfma_f32_4x4x4bf16_1k:
4720 case Intrinsic::amdgcn_mfma_f32_32x32x8bf16_1k:
4721 case Intrinsic::amdgcn_mfma_f32_16x16x16bf16_1k:
4722 case Intrinsic::amdgcn_mfma_f64_16x16x4f64:
4723 case Intrinsic::amdgcn_mfma_f64_4x4x4f64:
4724 case Intrinsic::amdgcn_mfma_i32_16x16x32_i8:
4725 case Intrinsic::amdgcn_mfma_i32_32x32x16_i8:
4726 case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32:
4727 case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32:
4728 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_bf8:
4729 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_fp8:
4730 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_bf8:
4731 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_fp8:
4732 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_bf8:
4733 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_fp8:
4734 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_bf8:
4735 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_fp8: {
4744 Info->mayNeedAGPRs()