84#include "llvm/IR/IntrinsicsAMDGPU.h"
86#define GET_TARGET_REGBANK_IMPL
87#include "AMDGPUGenRegisterBank.inc"
90#include "AMDGPUGenRegisterBankInfo.def"
93using namespace MIPatternMatch;
109 :
B(B), RBI(RBI_),
MRI(MRI_), NewBank(RB) {
110 assert(!B.isObservingChanges());
111 B.setChangeObserver(*
this);
114 ~ApplyRegBankMapping()
override {
118 B.stopObservingChanges();
123 const unsigned Opc =
MI.getOpcode();
124 if (Opc == AMDGPU::G_ANYEXT || Opc == AMDGPU::G_ZEXT ||
125 Opc == AMDGPU::G_SEXT) {
132 if (SrcBank == &AMDGPU::VCCRegBank) {
136 assert(NewBank == &AMDGPU::VGPRRegBank);
140 B.setInsertPt(*
MI.getParent(),
MI);
142 auto True = B.buildConstant(
S32, Opc == AMDGPU::G_SEXT ? -1 : 1);
143 auto False = B.buildConstant(
S32, 0);
144 B.buildSelect(DstReg, SrcReg, True, False);
145 MRI.setRegBank(True.getReg(0), *NewBank);
146 MRI.setRegBank(False.getReg(0), *NewBank);
147 MI.eraseFromParent();
150 assert(!
MRI.getRegClassOrRegBank(DstReg));
151 MRI.setRegBank(DstReg, *NewBank);
156 if (Opc == AMDGPU::G_TRUNC) {
159 assert(DstBank != &AMDGPU::VCCRegBank);
169 if (Reg.isPhysical() ||
MRI.getRegClassOrRegBank(Reg))
174 assert(NewBank == &AMDGPU::VGPRRegBank &&
175 "s1 operands should only be used for vector bools");
176 assert((
MI.getOpcode() != AMDGPU::G_TRUNC &&
177 MI.getOpcode() != AMDGPU::G_ANYEXT) &&
178 "not expecting legalization artifacts here");
179 RB = &AMDGPU::VCCRegBank;
182 MRI.setRegBank(Reg, *RB);
205 : Subtarget(ST),
TRI(Subtarget.getRegisterInfo()),
206 TII(Subtarget.getInstrInfo()) {
211 static auto InitializeRegisterBankOnce = [
this]() {
213 &
getRegBank(AMDGPU::VGPRRegBankID) == &AMDGPU::VGPRRegBank &&
214 &
getRegBank(AMDGPU::AGPRRegBankID) == &AMDGPU::AGPRRegBank);
218 llvm::call_once(InitializeRegisterBankFlag, InitializeRegisterBankOnce);
222 unsigned BankID = Bank.
getID();
223 return BankID == AMDGPU::VGPRRegBankID || BankID == AMDGPU::AGPRRegBankID;
227 return RB != &AMDGPU::SGPRRegBank;
234 if (Dst.getID() == AMDGPU::SGPRRegBankID &&
236 return std::numeric_limits<unsigned>::max();
247 (Dst.getID() == AMDGPU::SGPRRegBankID) &&
249 Src.getID() == AMDGPU::SGPRRegBankID ||
250 Src.getID() == AMDGPU::VCCRegBankID))
251 return std::numeric_limits<unsigned>::max();
254 if (Dst.getID() == AMDGPU::AGPRRegBankID &&
255 Src.getID() == AMDGPU::AGPRRegBankID)
289 if (&RC == &AMDGPU::SReg_1RegClass)
290 return AMDGPU::VCCRegBank;
299 return AMDGPU::SGPRRegBank;
301 return Ty ==
LLT::scalar(1) ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
304 return TRI->
isAGPRClass(&RC) ? AMDGPU::AGPRRegBank : AMDGPU::VGPRRegBank;
307template <
unsigned NumOps>
311 const std::array<unsigned, NumOps> RegSrcOpIdx,
318 unsigned Sizes[NumOps];
319 for (
unsigned I = 0;
I < NumOps; ++
I) {
320 Register Reg =
MI.getOperand(RegSrcOpIdx[
I]).getReg();
324 for (
unsigned I = 0, E =
MI.getNumExplicitDefs();
I != E; ++
I) {
326 Operands[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SizeI);
330 unsigned MappingID = 2;
331 for (
const auto &Entry : Table) {
332 for (
unsigned I = 0;
I < NumOps; ++
I) {
333 int OpIdx = RegSrcOpIdx[
I];
334 Operands[OpIdx] = AMDGPU::getValueMapping(Entry.RegBanks[
I],
Sizes[
I]);
349 case Intrinsic::amdgcn_readlane: {
352 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
355 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
358 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
359 return addMappingFromTable<3>(
MI,
MRI, RegSrcOpIdx, Table);
361 case Intrinsic::amdgcn_writelane: {
364 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
367 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
370 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 },
373 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 3 }
377 const std::array<unsigned, 4> RegSrcOpIdx = { { 0, 2, 3, 4 } };
378 return addMappingFromTable<4>(
MI,
MRI, RegSrcOpIdx, Table);
390 case Intrinsic::amdgcn_s_buffer_load: {
393 { { AMDGPU::SGPRRegBankID, AMDGPU::SGPRRegBankID }, 1 },
396 { { AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 300 },
399 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID }, 1000 },
402 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 1500 }
406 const std::array<unsigned, 2> RegSrcOpIdx = { { 2, 3 } };
407 return addMappingFromTable<2>(
MI,
MRI, RegSrcOpIdx, Table);
409 case Intrinsic::amdgcn_ds_ordered_add:
410 case Intrinsic::amdgcn_ds_ordered_swap: {
414 { { AMDGPU::VGPRRegBankID, AMDGPU::SGPRRegBankID, AMDGPU::VGPRRegBankID }, 1 },
417 { { AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID, AMDGPU::VGPRRegBankID }, 2 }
420 const std::array<unsigned, 3> RegSrcOpIdx = { { 0, 2, 3 } };
421 return addMappingFromTable<3>(
MI,
MRI, RegSrcOpIdx, Table);
423 case Intrinsic::amdgcn_s_sendmsg:
424 case Intrinsic::amdgcn_s_sendmsghalt: {
428 { { AMDGPU::SGPRRegBankID }, 1 },
431 { { AMDGPU::VGPRRegBankID }, 3 }
434 const std::array<unsigned, 1> RegSrcOpIdx = { { 2 } };
435 return addMappingFromTable<1>(
MI,
MRI, RegSrcOpIdx, Table);
445 if (!
MI.hasOneMemOperand())
478 switch (
MI.getOpcode()) {
479 case TargetOpcode::G_CONSTANT:
480 case TargetOpcode::G_IMPLICIT_DEF: {
484 { { AMDGPU::VGPRRegBankID }, 1 },
485 { { AMDGPU::SGPRRegBankID }, 1 },
486 { { AMDGPU::VCCRegBankID }, 1 }
489 return addMappingFromTable<1>(
MI,
MRI, {{ 0 }}, Table);
494 case TargetOpcode::G_FCONSTANT:
495 case TargetOpcode::G_FRAME_INDEX:
496 case TargetOpcode::G_GLOBAL_VALUE: {
498 { { AMDGPU::VGPRRegBankID }, 1 },
499 { { AMDGPU::SGPRRegBankID }, 1 }
502 return addMappingFromTable<1>(
MI,
MRI, {{ 0 }}, Table);
504 case TargetOpcode::G_AND:
505 case TargetOpcode::G_OR:
506 case TargetOpcode::G_XOR: {
513 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
514 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32),
515 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32)}),
521 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size),
522 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size),
523 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size)}),
534 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
535 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
536 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size)}),
542 {AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
543 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
544 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size)}),
549 case TargetOpcode::G_LOAD:
550 case TargetOpcode::G_ZEXTLOAD:
551 case TargetOpcode::G_SEXTLOAD: {
553 LLT PtrTy =
MRI.getType(
MI.getOperand(1).getReg());
562 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
563 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize)}),
571 {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
572 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}),
585 case TargetOpcode::G_SELECT: {
589 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
590 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
591 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size)}),
597 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
598 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size),
599 AMDGPU::getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size)}),
605 case TargetOpcode::G_UADDE:
606 case TargetOpcode::G_USUBE:
607 case TargetOpcode::G_SADDE:
608 case TargetOpcode::G_SSUBE: {
612 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
613 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
614 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
615 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size),
616 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1)}),
622 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
623 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
624 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size),
625 AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1)}),
630 case AMDGPU::G_BRCOND: {
631 assert(
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() == 1);
636 {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1),
nullptr}),
642 {AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1),
nullptr }),
647 case AMDGPU::G_INTRINSIC:
648 case AMDGPU::G_INTRINSIC_CONVERGENT:
650 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
651 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
666 Register LoLHS =
MRI->createGenericVirtualRegister(HalfTy);
667 Register HiLHS =
MRI->createGenericVirtualRegister(HalfTy);
669 MRI->setRegBank(LoLHS, *Bank);
670 MRI->setRegBank(HiLHS, *Bank);
675 B.buildInstr(AMDGPU::G_UNMERGE_VALUES)
686 MRI.setType(Reg, NewTy);
706 LLT Ty =
MRI.getType(Src);
709 if (Bank == &AMDGPU::SGPRRegBank)
715 if (Bank != &AMDGPU::VGPRRegBank) {
717 Src =
B.buildCopy(Ty, Src).getReg(0);
718 MRI.setRegBank(Src, AMDGPU::VGPRRegBank);
722 unsigned NumParts = Bits / 32;
729 auto Unmerge =
B.buildUnmerge(
S32, Src);
730 for (
unsigned i = 0; i < NumParts; ++i)
734 for (
unsigned i = 0; i < NumParts; ++i) {
736 Register DstPart =
MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
737 MRI.setType(DstPart, NumParts == 1 ? Ty :
S32);
742 assert(Constrained &&
"Failed to constrain readfirstlane src reg");
744 B.buildInstr(AMDGPU::V_READFIRSTLANE_B32, {DstPart}, {SrcPart});
752 Register Dst =
B.buildMergeLikeInstr(Ty, DstParts).getReg(0);
753 MRI.setRegBank(Dst, AMDGPU::SGPRRegBank);
786 const unsigned MovExecOpc =
788 const unsigned MovExecTermOpc =
792 AMDGPU::S_XOR_B32_term : AMDGPU::S_XOR_B64_term;
794 AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64;
796 AMDGPU::EXEC_LO : AMDGPU::EXEC;
799 const int OrigRangeSize = std::distance(
Range.begin(),
Range.end());
803 Register SaveExecReg =
MRI.createVirtualRegister(WaveRC);
804 Register InitSaveExecReg =
MRI.createVirtualRegister(WaveRC);
807 B.buildInstr(TargetOpcode::IMPLICIT_DEF)
808 .addDef(InitSaveExecReg);
810 Register PhiExec =
MRI.createVirtualRegister(WaveRC);
811 Register NewExec =
MRI.createVirtualRegister(WaveRC);
837 B.setInsertPt(*LoopBB, LoopBB->
end());
839 B.buildInstr(TargetOpcode::PHI)
841 .addReg(InitSaveExecReg)
856 auto NewEnd = BodyBB->
end();
863 assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);
868 if (!SGPROperandRegs.
count(OldReg))
873 auto OldVal = WaterfalledRegMap.
find(OldReg);
874 if (OldVal != WaterfalledRegMap.
end()) {
875 Op.setReg(OldVal->second);
880 LLT OpTy =
MRI.getType(OpReg);
883 if (OpBank != &AMDGPU::VGPRRegBank) {
886 OpReg =
B.buildCopy(OpTy, OpReg).getReg(0);
887 MRI.setRegBank(OpReg, AMDGPU::VGPRRegBank);
895 bool Is64 = OpSize % 64 == 0;
896 unsigned PartSize = Is64 ? 64 : 32;
898 unsigned NumParts = OpSize / PartSize;
904 CurrentLaneParts.
push_back(CurrentLaneReg);
906 auto UnmergeOp =
B.buildUnmerge(PartTy, OpReg);
907 auto UnmergeCurrentLane =
B.buildUnmerge(PartTy, CurrentLaneReg);
908 for (
unsigned i = 0; i < NumParts; ++i) {
910 CurrentLaneParts.
push_back(UnmergeCurrentLane.getReg(i));
911 MRI.setRegBank(OpParts[i], AMDGPU::VGPRRegBank);
912 MRI.setRegBank(CurrentLaneParts[i], AMDGPU::SGPRRegBank);
916 for (
unsigned i = 0; i < NumParts; ++i) {
918 OpParts[i]).getReg(0);
919 MRI.setRegBank(CmpReg, AMDGPU::VCCRegBank);
924 CondReg =
B.buildAnd(
S1, CondReg, CmpReg).getReg(0);
925 MRI.setRegBank(CondReg, AMDGPU::VCCRegBank);
929 Op.setReg(CurrentLaneReg);
932 WaterfalledRegMap.
insert(std::pair(OldReg,
Op.getReg()));
937 CondReg =
B.buildIntrinsic(Intrinsic::amdgcn_ballot,
941 MRI.setRegClass(CondReg, WaveRC);
944 B.buildInstr(AndSaveExecOpc)
948 MRI.setSimpleHint(NewExec, CondReg);
950 B.setInsertPt(*BodyBB, BodyBB->
end());
953 B.buildInstr(XorTermOpc)
962 B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
969 B.setMBB(*RestoreExecBB);
970 B.buildInstr(MovExecTermOpc)
972 .addReg(SaveExecReg);
976 B.setInsertPt(*RemainderBB, RemainderBB->
begin());
988 for (
unsigned Op : OpIndices) {
992 if (OpBank->
getID() != AMDGPU::SGPRRegBankID)
993 SGPROperandRegs.
insert(Reg);
997 return !SGPROperandRegs.
empty();
1017 Register Reg =
MI.getOperand(OpIdx).getReg();
1020 if (Bank == &AMDGPU::SGPRRegBank)
1024 MI.getOperand(OpIdx).setReg(Reg);
1036 assert(FirstSize % EltSize == 0);
1038 unsigned FirstPartNumElts = FirstSize / EltSize;
1039 unsigned RemainderElts = (TotalSize - FirstSize) / EltSize;
1060 const LLT LoadTy =
MRI.getType(DstReg);
1063 const unsigned MaxNonSmrdLoadSize = 128;
1067 if (DstBank == &AMDGPU::SGPRRegBank) {
1078 if (LoadSize == 32 &&
1082 if (LoadSize == 32 &&
1091 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
1093 if (LoadSize == 32) {
1097 if (
MI.getOpcode() == AMDGPU::G_SEXTLOAD) {
1099 auto WideLoad =
B.buildLoadFromOffset(
S32, PtrReg, *MMO, 0);
1100 B.buildSExtInReg(
MI.getOperand(0), WideLoad, MemSize);
1101 }
else if (
MI.getOpcode() == AMDGPU::G_ZEXTLOAD) {
1103 auto WideLoad =
B.buildLoadFromOffset(
S32, PtrReg, *MMO, 0);
1104 B.buildZExtInReg(
MI.getOperand(0), WideLoad, MemSize);
1107 B.buildLoadFromOffset(
MI.getOperand(0), PtrReg, *MMO, 0);
1121 auto WideLoad =
B.buildLoadFromOffset(WiderTy, PtrReg, *MMO, 0);
1123 B.buildTrunc(
MI.getOperand(0), WideLoad);
1125 B.buildDeleteTrailingVectorElements(
MI.getOperand(0).getReg(),
1130 MI.eraseFromParent();
1135 if (LoadSize <= MaxNonSmrdLoadSize)
1141 if (SrcRegs.
empty())
1147 LLT PtrTy =
MRI.getType(
MI.getOperand(1).getReg());
1148 MRI.setType(BasePtrReg, PtrTy);
1154 assert(LoadSize % MaxNonSmrdLoadSize == 0);
1155 unsigned NumSplitParts = LoadTy.
getSizeInBits() / MaxNonSmrdLoadSize;
1156 const LLT LoadSplitTy = LoadTy.
divide(NumSplitParts);
1157 ApplyRegBankMapping O(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
1169 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
1180 const auto &TFI = *ST.getFrameLowering();
1188 Register AllocSize =
MI.getOperand(1).getReg();
1194 if (SizeBank != &AMDGPU::SGPRRegBank)
1197 LLT PtrTy =
MRI.getType(Dst);
1202 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
1204 auto WaveSize =
B.buildConstant(
LLT::scalar(32), ST.getWavefrontSizeLog2());
1205 auto ScaledSize =
B.buildShl(IntPtrTy, AllocSize, WaveSize);
1207 auto SPCopy =
B.buildCopy(PtrTy, SPReg);
1208 if (Alignment > TFI.getStackAlign()) {
1209 auto PtrAdd =
B.buildPtrAdd(PtrTy, SPCopy, ScaledSize);
1210 B.buildMaskLowPtrBits(Dst, PtrAdd,
1211 Log2(Alignment) + ST.getWavefrontSizeLog2());
1213 B.buildPtrAdd(Dst, SPCopy, ScaledSize);
1216 MI.eraseFromParent();
1223 int RsrcIdx)
const {
1224 const int NumDefs =
MI.getNumExplicitDefs();
1228 RsrcIdx += NumDefs + 1;
1235 for (
int I = NumDefs, NumOps =
MI.getNumOperands();
I != NumOps; ++
I) {
1236 if (!
MI.getOperand(
I).isReg())
1240 if (
I == RsrcIdx ||
I == RsrcIdx + 1)
1252 Register &SOffsetReg, int64_t &InstOffsetVal,
Align Alignment)
const {
1256 if (std::optional<int64_t> Imm =
1260 VOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1261 SOffsetReg =
B.buildConstant(
S32, SOffset).getReg(0);
1262 InstOffsetVal = ImmOffset;
1264 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1265 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1266 return SOffset + ImmOffset;
1281 SOffsetReg =
B.buildConstant(
S32, SOffset).getReg(0);
1282 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1283 InstOffsetVal = ImmOffset;
1289 VOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1290 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1292 InstOffsetVal = ImmOffset;
1306 if (Src0Bank == &AMDGPU::VGPRRegBank && Src1Bank == &AMDGPU::SGPRRegBank) {
1312 if (Src0Bank == &AMDGPU::SGPRRegBank && Src1Bank == &AMDGPU::VGPRRegBank) {
1322 VOffsetReg = CombinedOffset;
1324 VOffsetReg =
B.buildCopy(
S32, CombinedOffset).getReg(0);
1325 B.getMRI()->setRegBank(VOffsetReg, AMDGPU::VGPRRegBank);
1328 SOffsetReg =
B.buildConstant(
S32, 0).getReg(0);
1329 B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
1340 LLT Ty =
MRI.getType(Dst);
1346 if (RSrcBank == &AMDGPU::SGPRRegBank &&
1347 OffsetBank == &AMDGPU::SGPRRegBank)
1355 if (LoadSize == 256 || LoadSize == 512) {
1356 NumLoads = LoadSize / 128;
1357 Ty = Ty.
divide(NumLoads);
1362 const Align Alignment = NumLoads > 1 ?
Align(16 * NumLoads) :
Align(1);
1368 int64_t ImmOffset = 0;
1371 SOffset, ImmOffset, Alignment);
1376 const Align MemAlign(4);
1390 B.getMRI()->setRegBank(VIndex, AMDGPU::VGPRRegBank);
1397 for (
int i = 0; i < NumLoads; ++i) {
1398 if (NumLoads == 1) {
1401 LoadParts[i] =
MRI.createGenericVirtualRegister(Ty);
1402 MRI.setRegBank(LoadParts[i], AMDGPU::VGPRRegBank);
1409 B.buildInstr(AMDGPU::G_AMDGPU_BUFFER_LOAD)
1410 .addDef(LoadParts[i])
1415 .addImm(ImmOffset + 16 * i)
1418 .addMemOperand(MMO);
1424 if (RSrcBank != &AMDGPU::SGPRRegBank) {
1427 B.setInstr(*Span.
begin());
1428 MI.eraseFromParent();
1432 OpsToWaterfall.
insert(RSrc);
1437 if (NumLoads != 1) {
1439 B.buildConcatVectors(Dst, LoadParts);
1441 B.buildMergeLikeInstr(Dst, LoadParts);
1445 if (RSrcBank == &AMDGPU::SGPRRegBank)
1446 MI.eraseFromParent();
1461 LLT Ty =
MRI.getType(DstReg);
1465 unsigned FirstOpnd = isa<GIntrinsic>(
MI) ? 2 : 1;
1466 Register SrcReg =
MI.getOperand(FirstOpnd).getReg();
1467 Register OffsetReg =
MI.getOperand(FirstOpnd + 1).getReg();
1468 Register WidthReg =
MI.getOperand(FirstOpnd + 2).getReg();
1472 if (DstBank == &AMDGPU::VGPRRegBank) {
1478 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
1482 auto ShiftOffset =
Signed ?
B.buildAShr(
S64, SrcReg, OffsetReg)
1483 :
B.buildLShr(
S64, SrcReg, OffsetReg);
1484 auto UnmergeSOffset =
B.buildUnmerge({
S32,
S32}, ShiftOffset);
1491 auto Zero =
B.buildConstant(
S32, 0);
1492 auto WidthImm = ConstWidth->Value.getZExtValue();
1493 if (WidthImm <= 32) {
1497 Signed ?
B.buildSbfx(
S32, UnmergeSOffset.getReg(0), Zero, WidthReg)
1498 :
B.buildUbfx(
S32, UnmergeSOffset.getReg(0), Zero, WidthReg);
1500 Signed ?
B.buildAShr(
S32, Extract,
B.buildConstant(
S32, 31)) : Zero;
1501 B.buildMergeLikeInstr(DstReg, {Extract, Extend});
1505 auto UpperWidth =
B.buildConstant(
S32, WidthImm - 32);
1508 ?
B.buildSbfx(
S32, UnmergeSOffset.getReg(1), Zero, UpperWidth)
1509 :
B.buildUbfx(
S32, UnmergeSOffset.getReg(1), Zero, UpperWidth);
1510 B.buildMergeLikeInstr(DstReg, {UnmergeSOffset.getReg(0), Extract});
1512 MI.eraseFromParent();
1518 auto ExtShift =
B.buildSub(
S32,
B.buildConstant(
S32, 64), WidthReg);
1519 auto SignBit =
B.buildShl(
S64, ShiftOffset, ExtShift);
1521 B.buildAShr(
S64, SignBit, ExtShift);
1523 B.buildLShr(
S64, SignBit, ExtShift);
1524 MI.eraseFromParent();
1530 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
1533 auto OffsetMask =
B.buildConstant(
S32, maskTrailingOnes<unsigned>(6));
1534 auto ClampOffset =
B.buildAnd(
S32, OffsetReg, OffsetMask);
1537 auto ShiftWidth =
B.buildShl(
S32, WidthReg,
B.buildConstant(
S32, 16));
1542 auto MergedInputs =
B.buildOr(
S32, ClampOffset, ShiftWidth);
1546 unsigned Opc = Ty ==
S32 ? (
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32) :
1547 (
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64);
1549 auto MIB =
B.buildInstr(Opc, {DstReg}, {SrcReg, MergedInputs});
1553 MI.eraseFromParent();
1571 if (
MRI.getRegBankOrNull(Src0) == &AMDGPU::VGPRRegBank)
1574 bool IsUnsigned =
MI.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
1578 bool DstOnValu =
MRI.getRegBankOrNull(Src2) == &AMDGPU::VGPRRegBank;
1579 bool Accumulate =
true;
1588 Register DstLo =
B.buildMul(
S32, Src0, Src1).getReg(0);
1589 bool MulHiInVgpr =
false;
1591 MRI.setRegBank(DstLo, AMDGPU::SGPRRegBank);
1594 DstHi = IsUnsigned ?
B.buildUMulH(
S32, Src0, Src1).getReg(0)
1595 :
B.buildSMulH(
S32, Src0, Src1).getReg(0);
1596 MRI.setRegBank(DstHi, AMDGPU::SGPRRegBank);
1601 MRI.setRegBank(VSrc0, AMDGPU::VGPRRegBank);
1602 MRI.setRegBank(VSrc1, AMDGPU::VGPRRegBank);
1604 DstHi = IsUnsigned ?
B.buildUMulH(
S32, VSrc0, VSrc1).getReg(0)
1605 :
B.buildSMulH(
S32, VSrc0, VSrc1).getReg(0);
1606 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1622 LLT CarryType = DstOnValu ?
S1 :
S32;
1624 DstOnValu ? AMDGPU::VCCRegBank : AMDGPU::SGPRRegBank;
1626 DstOnValu ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank;
1631 Zero =
B.buildConstant(
S32, 0).getReg(0);
1632 MRI.setRegBank(Zero,
1633 MulHiInVgpr ? AMDGPU::VGPRRegBank : AMDGPU::SGPRRegBank);
1637 MRI.setRegBank(Carry, MulHiInVgpr ? AMDGPU::VCCRegBank
1638 : AMDGPU::SGPRRegBank);
1640 if (DstOnValu && !MulHiInVgpr) {
1641 Carry =
B.buildTrunc(
S1, Carry).getReg(0);
1642 MRI.setRegBank(Carry, AMDGPU::VCCRegBank);
1648 DstLo =
B.buildCopy(
S32, DstLo).getReg(0);
1649 DstHi =
B.buildCopy(
S32, DstHi).getReg(0);
1650 MRI.setRegBank(DstLo, AMDGPU::VGPRRegBank);
1651 MRI.setRegBank(DstHi, AMDGPU::VGPRRegBank);
1654 auto Unmerge =
B.buildUnmerge(
S32, Src2);
1655 Register Src2Lo = Unmerge.getReg(0);
1656 Register Src2Hi = Unmerge.getReg(1);
1657 MRI.setRegBank(Src2Lo, DstBank);
1658 MRI.setRegBank(Src2Hi, DstBank);
1662 MRI.setRegBank(Src2Sign.getReg(0), CarryBank);
1664 Carry =
B.buildXor(CarryType, Carry, Src2Sign).getReg(0);
1665 MRI.setRegBank(Carry, CarryBank);
1668 auto AddLo =
B.buildUAddo(
S32, CarryType, DstLo, Src2Lo);
1669 DstLo = AddLo.getReg(0);
1670 Register CarryLo = AddLo.getReg(1);
1671 MRI.setRegBank(DstLo, DstBank);
1672 MRI.setRegBank(CarryLo, CarryBank);
1674 auto AddHi =
B.buildUAdde(
S32, CarryType, DstHi, Src2Hi, CarryLo);
1675 DstHi = AddHi.getReg(0);
1676 MRI.setRegBank(DstHi, DstBank);
1678 Register CarryHi = AddHi.getReg(1);
1679 MRI.setRegBank(CarryHi, CarryBank);
1684 Carry =
B.buildXor(CarryType, Carry, CarryHi).getReg(0);
1685 MRI.setRegBank(Carry, CarryBank);
1689 Carry =
B.buildConstant(CarryType, 0).getReg(0);
1690 MRI.setRegBank(Carry, CarryBank);
1694 B.buildMergeLikeInstr(Dst0, {DstLo, DstHi});
1697 B.buildCopy(Dst1, Carry);
1699 B.buildTrunc(Dst1, Carry);
1702 MI.eraseFromParent();
1709 case TargetOpcode::G_ASHR:
1710 case TargetOpcode::G_SMIN:
1711 case TargetOpcode::G_SMAX:
1712 return TargetOpcode::G_SEXT;
1713 case TargetOpcode::G_LSHR:
1714 case TargetOpcode::G_UMIN:
1715 case TargetOpcode::G_UMAX:
1716 return TargetOpcode::G_ZEXT;
1718 return TargetOpcode::G_ANYEXT;
1724static std::pair<Register, Register>
1727 auto Bitcast =
B.buildBitcast(
S32, Src);
1729 if (ExtOpcode == TargetOpcode::G_SEXT) {
1730 auto ExtLo =
B.buildSExtInReg(
S32, Bitcast, 16);
1731 auto ShiftHi =
B.buildAShr(
S32, Bitcast,
B.buildConstant(
S32, 16));
1732 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1735 auto ShiftHi =
B.buildLShr(
S32, Bitcast,
B.buildConstant(
S32, 16));
1736 if (ExtOpcode == TargetOpcode::G_ZEXT) {
1737 auto ExtLo =
B.buildAnd(
S32, Bitcast,
B.buildConstant(
S32, 0xffff));
1738 return std::pair(ExtLo.getReg(0), ShiftHi.getReg(0));
1741 assert(ExtOpcode == TargetOpcode::G_ANYEXT);
1742 return std::pair(Bitcast.getReg(0), ShiftHi.getReg(0));
1750 if (!SrcReg.
empty()) {
1767 LLT StoreVT =
MRI.getType(Reg);
1771 auto Unmerge =
B.buildUnmerge(
S16, Reg);
1775 for (
int I = 0, E = Unmerge->getNumOperands() - 1;
I != E; ++
I)
1785static std::pair<Register, unsigned>
1789 return std::pair(
Register(), Const);
1793 return std::pair(
Base, Const);
1796 return std::pair(Reg, 0);
1799std::pair<Register, unsigned>
1812 if (ImmOffset != 0) {
1821 unsigned Overflow = ImmOffset & ~MaxImm;
1822 ImmOffset -= Overflow;
1823 if ((int32_t)Overflow < 0) {
1824 Overflow += ImmOffset;
1829 if (Overflow != 0) {
1831 BaseReg =
B.buildConstant(
S32, Overflow).getReg(0);
1833 auto OverflowVal =
B.buildConstant(
S32, Overflow);
1834 BaseReg =
B.buildAdd(
S32, BaseReg, OverflowVal).getReg(0);
1840 BaseReg =
B.buildConstant(
S32, 0).getReg(0);
1842 return {BaseReg, C1};
1848 LLT SrcTy =
MRI.getType(SrcReg);
1851 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1858 Register TmpReg0 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1859 Register TmpReg1 =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1861 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1863 .addUse(SrcReg, 0, AMDGPU::sub0);
1864 B.buildInstr(AMDGPU::V_MOV_B32_e32)
1866 .addUse(SrcReg, 0, AMDGPU::sub1);
1867 B.buildInstr(AMDGPU::REG_SEQUENCE)
1870 .addImm(AMDGPU::sub0)
1872 .addImm(AMDGPU::sub1);
1883 unsigned ConstOffset) {
1889 auto MaterializedOffset =
B.buildConstant(
S32, ConstOffset);
1891 auto Add =
B.buildAdd(
S32, WaterfallIdx, MaterializedOffset);
1892 MRI.setRegBank(MaterializedOffset.getReg(0), AMDGPU::SGPRRegBank);
1893 MRI.setRegBank(
Add.getReg(0), AMDGPU::SGPRRegBank);
1905 bool IsBooleanSrc =
false) {
1906 if (ExtOpc == AMDGPU::G_ZEXT) {
1907 B.buildConstant(Hi32Reg, 0);
1908 }
else if (ExtOpc == AMDGPU::G_SEXT) {
1912 B.buildCopy(Hi32Reg, Lo32Reg);
1916 B.getMRI()->setRegBank(ShiftAmt.getReg(0), RegBank);
1917 B.buildAShr(Hi32Reg, Lo32Reg, ShiftAmt);
1920 assert(ExtOpc == AMDGPU::G_ANYEXT &&
"not an integer extension");
1921 B.buildUndef(Hi32Reg);
1925bool AMDGPURegisterBankInfo::foldExtractEltToCmpSelect(
1927 const OperandsMapper &OpdMapper)
const {
1934 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
1936 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
1938 LLT VecTy =
MRI.getType(VecReg);
1949 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
1951 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
1954 (DstBank == AMDGPU::SGPRRegBank &&
1955 SrcBank == AMDGPU::SGPRRegBank &&
1956 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
1957 : AMDGPU::VCCRegBank;
1960 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
1961 Idx =
B.buildCopy(
S32,
Idx)->getOperand(0).getReg();
1962 MRI.setRegBank(
Idx, AMDGPU::VGPRRegBank);
1967 unsigned NumLanes = DstRegs.size();
1971 EltTy =
MRI.getType(DstRegs[0]);
1973 auto UnmergeToEltTy =
B.buildUnmerge(EltTy, VecReg);
1975 for (
unsigned L = 0;
L < NumLanes; ++
L)
1976 Res[L] = UnmergeToEltTy.getReg(L);
1978 for (
unsigned I = 1;
I < NumElem; ++
I) {
1979 auto IC =
B.buildConstant(
S32,
I);
1980 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
1982 MRI.setRegBank(
Cmp->getOperand(0).getReg(), CCBank);
1984 for (
unsigned L = 0;
L < NumLanes; ++
L) {
1985 auto S =
B.buildSelect(EltTy, Cmp,
1986 UnmergeToEltTy.getReg(
I * NumLanes + L), Res[L]);
1988 for (
unsigned N : { 0, 2, 3 })
1989 MRI.setRegBank(S->getOperand(
N).getReg(), DstBank);
1991 Res[
L] = S->getOperand(0).getReg();
1995 for (
unsigned L = 0;
L < NumLanes; ++
L) {
1996 Register DstReg = (NumLanes == 1) ?
MI.getOperand(0).getReg() : DstRegs[
L];
1997 B.buildCopy(DstReg, Res[L]);
1998 MRI.setRegBank(DstReg, DstBank);
2001 MRI.setRegBank(
MI.getOperand(0).getReg(), DstBank);
2002 MI.eraseFromParent();
2013 if (CurrBank && *CurrBank != Bank) {
2014 Register Copy =
B.buildCopy(
MRI.getType(Reg), Reg).getReg(0);
2015 MRI.setRegBank(Copy, Bank);
2019 MRI.setRegBank(Reg, Bank);
2023bool AMDGPURegisterBankInfo::foldInsertEltToCmpSelect(
2025 const OperandsMapper &OpdMapper)
const {
2032 *OpdMapper.getInstrMapping().getOperandMapping(3).BreakDown[0].RegBank;
2034 bool IsDivergentIdx = IdxBank != AMDGPU::SGPRRegBank;
2036 LLT VecTy =
MRI.getType(VecReg);
2047 *OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
2049 *OpdMapper.getInstrMapping().getOperandMapping(1).BreakDown[0].RegBank;
2051 *OpdMapper.getInstrMapping().getOperandMapping(2).BreakDown[0].RegBank;
2054 (DstBank == AMDGPU::SGPRRegBank &&
2055 SrcBank == AMDGPU::SGPRRegBank &&
2056 InsBank == AMDGPU::SGPRRegBank &&
2057 IdxBank == AMDGPU::SGPRRegBank) ? AMDGPU::SGPRRegBank
2058 : AMDGPU::VCCRegBank;
2061 if (CCBank == AMDGPU::VCCRegBank && IdxBank == AMDGPU::SGPRRegBank) {
2062 Idx =
B.buildCopy(
S32,
Idx)->getOperand(0).getReg();
2063 MRI.setRegBank(
Idx, AMDGPU::VGPRRegBank);
2068 unsigned NumLanes = InsRegs.size();
2071 InsRegs.push_back(
MI.getOperand(2).getReg());
2073 EltTy =
MRI.getType(InsRegs[0]);
2076 auto UnmergeToEltTy =
B.buildUnmerge(EltTy, VecReg);
2079 for (
unsigned I = 0;
I < NumElem; ++
I) {
2080 auto IC =
B.buildConstant(
S32,
I);
2081 MRI.setRegBank(IC->getOperand(0).getReg(), AMDGPU::SGPRRegBank);
2083 MRI.setRegBank(
Cmp->getOperand(0).getReg(), CCBank);
2085 for (
unsigned L = 0;
L < NumLanes; ++
L) {
2087 Register Op1 = UnmergeToEltTy.getReg(
I * NumLanes + L);
2098 if (MergeTy ==
MRI.getType(
MI.getOperand(0).getReg())) {
2099 B.buildBuildVector(
MI.getOperand(0), Ops);
2101 auto Vec =
B.buildBuildVector(MergeTy, Ops);
2102 MRI.setRegBank(Vec->getOperand(0).getReg(), DstBank);
2103 B.buildBitcast(
MI.getOperand(0).getReg(), Vec);
2106 MRI.setRegBank(
MI.getOperand(0).getReg(), DstBank);
2107 MI.eraseFromParent();
2120 if (DefRegs.
empty()) {
2128 (Src0Regs.
empty() || Src0Regs.
size() == 2));
2139 if (Src0Regs.
empty())
2144 if (Src1Regs.
empty())
2167 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2169 Register Hi =
B.buildUMulH(HalfTy, Src0Regs[0], Src1Regs[0]).getReg(0);
2170 Register MulLoHi =
B.buildMul(HalfTy, Src0Regs[0], Src1Regs[1]).getReg(0);
2172 Register MulHiLo =
B.buildMul(HalfTy, Src0Regs[1], Src1Regs[0]).getReg(0);
2173 B.buildAdd(DefRegs[1],
Add, MulHiLo);
2174 B.buildMul(DefRegs[0], Src0Regs[0], Src1Regs[0]);
2176 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2177 MI.eraseFromParent();
2183 B.setInstrAndDebugLoc(
MI);
2184 unsigned Opc =
MI.getOpcode();
2187 case AMDGPU::G_CONSTANT:
2188 case AMDGPU::G_IMPLICIT_DEF: {
2190 LLT DstTy =
MRI.getType(DstReg);
2196 if (DstBank == &AMDGPU::VCCRegBank)
2199 if (DefRegs.
empty())
2202 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2205 LLVMContext &Ctx =
B.getMF().getFunction().getContext();
2207 MI.getOperand(0).setReg(NewDstReg);
2208 if (Opc != AMDGPU::G_IMPLICIT_DEF) {
2209 uint64_t ConstVal =
MI.getOperand(1).getCImm()->getZExtValue();
2210 MI.getOperand(1).setCImm(
2214 MRI.setRegBank(NewDstReg, *DstBank);
2215 B.buildTrunc(DefRegs[0], NewDstReg);
2218 case AMDGPU::G_PHI: {
2220 LLT DstTy =
MRI.getType(DstReg);
2227 if (DstBank == &AMDGPU::VCCRegBank) {
2234 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
2238 if (SrcBank != &AMDGPU::VCCRegBank) {
2243 MRI.setRegBank(Copy.getReg(0), AMDGPU::VCCRegBank);
2244 MI.getOperand(
I).setReg(Copy.getReg(0));
2255 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
2256 B.setInsertPt(
B.getMBB(),
MI);
2264 case AMDGPU::G_FCMP:
2268 case AMDGPU::G_ICMP:
2269 case AMDGPU::G_UADDO:
2270 case AMDGPU::G_USUBO:
2271 case AMDGPU::G_UADDE:
2272 case AMDGPU::G_SADDE:
2273 case AMDGPU::G_USUBE:
2274 case AMDGPU::G_SSUBE: {
2275 unsigned BoolDstOp =
2276 (Opc == AMDGPU::G_ICMP || Opc == AMDGPU::G_FCMP) ? 0 : 1;
2277 Register DstReg =
MI.getOperand(BoolDstOp).getReg();
2281 if (DstBank != &AMDGPU::SGPRRegBank)
2284 const bool HasCarryIn =
MI.getNumOperands() == 5;
2290 MRI.setRegBank(NewDstReg, AMDGPU::SGPRRegBank);
2291 MI.getOperand(BoolDstOp).setReg(NewDstReg);
2295 MRI.setRegBank(NewSrcReg, AMDGPU::SGPRRegBank);
2296 B.buildZExt(NewSrcReg,
MI.getOperand(4).getReg());
2297 MI.getOperand(4).setReg(NewSrcReg);
2301 B.setInsertPt(*
MBB, std::next(
MI.getIterator()));
2306 if (DefRegs.
empty())
2308 B.buildTrunc(DefRegs[0], NewDstReg);
2311 case AMDGPU::G_SELECT: {
2313 LLT DstTy =
MRI.getType(DstReg);
2316 if (CondRegs.
empty())
2323 if (CondBank == &AMDGPU::SGPRRegBank) {
2326 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2328 MI.getOperand(1).setReg(NewCondReg);
2329 B.buildZExt(NewCondReg, CondRegs[0]);
2342 if (DefRegs.
empty()) {
2347 if (Src1Regs.
empty())
2353 if (Src2Regs.
empty())
2360 B.buildSelect(DefRegs[0], CondRegs[0], Src1Regs[0], Src2Regs[0]);
2361 B.buildSelect(DefRegs[1], CondRegs[0], Src1Regs[1], Src2Regs[1]);
2363 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2364 MI.eraseFromParent();
2367 case AMDGPU::G_BRCOND: {
2368 Register CondReg =
MI.getOperand(0).getReg();
2373 if (CondBank == &AMDGPU::SGPRRegBank) {
2376 MRI.setRegBank(NewCondReg, AMDGPU::SGPRRegBank);
2378 MI.getOperand(0).setReg(NewCondReg);
2379 B.buildZExt(NewCondReg, CondReg);
2387 case AMDGPU::G_XOR: {
2391 LLT DstTy =
MRI.getType(DstReg);
2396 if (DstBank == &AMDGPU::VCCRegBank)
2400 ApplyRegBankMapping ApplyBank(
B, *
this,
MRI, DstBank);
2418 if (DefRegs.
empty()) {
2425 (Src0Regs.
empty() || Src0Regs.
size() == 2));
2431 if (Src0Regs.
empty())
2436 if (Src1Regs.
empty())
2443 B.buildInstr(Opc, {DefRegs[0]}, {Src0Regs[0], Src1Regs[0]});
2444 B.buildInstr(Opc, {DefRegs[1]}, {Src0Regs[1], Src1Regs[1]});
2446 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2447 MI.eraseFromParent();
2450 case AMDGPU::G_ABS: {
2456 if (SrcBank && SrcBank == &AMDGPU::VGPRRegBank) {
2458 ApplyRegBankMapping Apply(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2471 case AMDGPU::G_LSHR:
2472 case AMDGPU::G_ASHR:
2473 case AMDGPU::G_SMIN:
2474 case AMDGPU::G_SMAX:
2475 case AMDGPU::G_UMIN:
2476 case AMDGPU::G_UMAX: {
2478 LLT DstTy =
MRI.getType(DstReg);
2495 if (DstBank == &AMDGPU::VGPRRegBank)
2501 ApplyRegBankMapping ApplySALU(
B, *
this,
MRI, &AMDGPU::SGPRRegBank);
2503 if (DstTy.
isVector() && Opc == AMDGPU::G_ABS) {
2506 std::tie(WideSrcLo, WideSrcHi) =
2508 auto Lo =
B.buildInstr(AMDGPU::G_ABS, {
S32}, {WideSrcLo});
2509 auto Hi =
B.buildInstr(AMDGPU::G_ABS, {
S32}, {WideSrcHi});
2510 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2511 MI.eraseFromParent();
2520 std::tie(WideSrc0Lo, WideSrc0Hi)
2522 std::tie(WideSrc1Lo, WideSrc1Hi)
2524 auto Lo =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Lo, WideSrc1Lo});
2525 auto Hi =
B.buildInstr(
MI.getOpcode(), {S32}, {WideSrc0Hi, WideSrc1Hi});
2526 B.buildBuildVectorTrunc(DstReg, {
Lo.getReg(0),
Hi.getReg(0)});
2527 MI.eraseFromParent();
2535 if (Opc == AMDGPU::G_SHL || Opc == AMDGPU::G_LSHR ||
2536 Opc == AMDGPU::G_ASHR) {
2537 B.setInsertPt(*
MBB,
MI.getIterator());
2545 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
2546 case AMDGPU::G_AMDGPU_S_MUL_U64_U32: {
2560 Register SrcReg0 =
MI.getOperand(1).getReg();
2561 Register SrcReg1 =
MI.getOperand(2).getReg();
2564 assert(
MRI.getType(DstReg) ==
S64 &&
"This is a special case for s_mul_u64 "
2565 "that handles only 64-bit operands.");
2571 if (DstBank == &AMDGPU::SGPRRegBank) {
2572 MI.setDesc(
TII->get(AMDGPU::S_MUL_U64));
2573 MRI.setRegClass(DstReg, &AMDGPU::SGPR_64RegClass);
2574 MRI.setRegClass(SrcReg0, &AMDGPU::SGPR_64RegClass);
2575 MRI.setRegClass(SrcReg1, &AMDGPU::SGPR_64RegClass);
2581 assert(
MRI.getRegBankOrNull(DstReg) == &AMDGPU::VGPRRegBank &&
2582 "The destination operand should be in vector registers.");
2587 Register Op0L =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2588 MRI.setRegClass(Op0L, &AMDGPU::VGPR_32RegClass);
2590 B.buildTrunc(Op0L, SrcReg0);
2593 Register Op1L =
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
2594 MRI.setRegClass(Op1L, &AMDGPU::VGPR_32RegClass);
2596 B.buildTrunc(Op1L, SrcReg1);
2598 unsigned NewOpc = Opc == AMDGPU::G_AMDGPU_S_MUL_U64_U32
2599 ? AMDGPU::G_AMDGPU_MAD_U64_U32
2600 : AMDGPU::G_AMDGPU_MAD_I64_I32;
2604 MRI.setRegClass(Zero64, &AMDGPU::VReg_64RegClass);
2605 Register CarryOut =
MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
2606 MRI.setRegClass(CarryOut, &AMDGPU::VReg_64RegClass);
2607 B.buildInstr(NewOpc, {DstReg, CarryOut}, {Op0L, Op1L, Zero64});
2608 MI.eraseFromParent();
2611 case AMDGPU::G_SEXT_INREG: {
2613 if (SrcRegs.
empty())
2617 ApplyRegBankMapping O(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2624 int Amt =
MI.getOperand(2).getImm();
2630 B.buildFreeze(DstRegs[0], SrcRegs[0]);
2632 auto Freeze =
B.buildFreeze(
S32, SrcRegs[0]);
2634 B.buildSExtInReg(DstRegs[0], Freeze, Amt);
2637 B.buildAShr(DstRegs[1], DstRegs[0],
B.buildConstant(
S32, 31));
2641 B.buildCopy(DstRegs[0], SrcRegs[0]);
2642 B.buildSExtInReg(DstRegs[1], DstRegs[0], Amt - 32);
2646 MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
2647 MI.eraseFromParent();
2650 case AMDGPU::G_CTPOP:
2651 case AMDGPU::G_BITREVERSE: {
2654 if (DstBank == &AMDGPU::SGPRRegBank)
2659 LLT Ty =
MRI.getType(SrcReg);
2663 ApplyRegBankMapping ApplyVALU(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2672 case AMDGPU::G_AMDGPU_FFBH_U32:
2673 case AMDGPU::G_AMDGPU_FFBL_B32:
2674 case AMDGPU::G_CTLZ_ZERO_UNDEF:
2675 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
2678 if (DstBank == &AMDGPU::SGPRRegBank)
2683 LLT Ty =
MRI.getType(SrcReg);
2693 ApplyRegBankMapping ApplyVALU(
B, *
this,
MRI, &AMDGPU::VGPRRegBank);
2695 unsigned NewOpc = Opc == AMDGPU::G_CTLZ_ZERO_UNDEF
2696 ? (
unsigned)AMDGPU::G_AMDGPU_FFBH_U32
2697 : Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2698 ? (
unsigned)AMDGPU::G_AMDGPU_FFBL_B32
2700 unsigned Idx = NewOpc == AMDGPU::G_AMDGPU_FFBH_U32;
2701 auto X =
B.buildInstr(NewOpc, {
S32}, {SrcRegs[
Idx]});
2702 auto Y =
B.buildInstr(NewOpc, {
S32}, {SrcRegs[
Idx ^ 1]});
2704 Opc == AMDGPU::G_CTLZ_ZERO_UNDEF || Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
2706 : AMDGPU::G_UADDSAT;
2707 Y =
B.buildInstr(AddOpc, {
S32}, {
Y,
B.buildConstant(
S32, 32)});
2709 B.buildUMin(DstReg,
X,
Y);
2710 MI.eraseFromParent();
2713 case AMDGPU::G_SEXT:
2714 case AMDGPU::G_ZEXT:
2715 case AMDGPU::G_ANYEXT: {
2717 LLT SrcTy =
MRI.getType(SrcReg);
2718 const bool Signed = Opc == AMDGPU::G_SEXT;
2726 LLT DstTy =
MRI.getType(DstReg);
2728 SrcBank != &AMDGPU::SGPRRegBank &&
2729 SrcBank != &AMDGPU::VCCRegBank &&
2739 B.buildSExtOrTrunc(DefRegs[0], SrcReg);
2740 }
else if (Opc == AMDGPU::G_ZEXT) {
2741 B.buildZExtOrTrunc(DefRegs[0], SrcReg);
2743 B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
2747 MRI.setRegBank(DstReg, *SrcBank);
2748 MI.eraseFromParent();
2758 if (SrcBank == &AMDGPU::VCCRegBank) {
2765 const bool UseSel64 = DstSize > 32 &&
2766 SrcBank->
getID() == AMDGPU::SGPRRegBankID;
2770 auto True =
B.buildConstant(SelType,
Signed ? -1 : 1);
2771 auto False =
B.buildConstant(SelType, 0);
2773 MRI.setRegBank(True.getReg(0), *DstBank);
2774 MRI.setRegBank(False.getReg(0), *DstBank);
2775 MRI.setRegBank(DstReg, *DstBank);
2778 B.buildSelect(DefRegs[0], SrcReg, True, False);
2780 }
else if (DstSize < 32) {
2781 auto Sel =
B.buildSelect(SelType, SrcReg, True, False);
2782 MRI.setRegBank(Sel.getReg(0), *DstBank);
2783 B.buildTrunc(DstReg, Sel);
2785 B.buildSelect(DstReg, SrcReg, True, False);
2788 MI.eraseFromParent();
2794 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
2803 LLT DstTy =
MRI.getType(DstReg);
2804 LLT SrcTy =
MRI.getType(SrcReg);
2806 if (foldExtractEltToCmpSelect(
B,
MI, OpdMapper))
2818 unsigned ConstOffset;
2819 std::tie(BaseIdxReg, ConstOffset) =
2826 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2831 if (ShouldMoveIndexIntoLoop)
2832 MI.getOperand(2).setReg(BaseIdxReg);
2838 const bool NeedCopyToVGPR = DstBank == &AMDGPU::VGPRRegBank &&
2839 SrcBank == &AMDGPU::SGPRRegBank;
2840 if (DstRegs.
empty()) {
2845 if (NeedCopyToVGPR) {
2847 Register TmpReg =
MRI.createGenericVirtualRegister(DstTy);
2848 MRI.setRegBank(TmpReg, AMDGPU::SGPRRegBank);
2849 MI.getOperand(0).setReg(TmpReg);
2850 B.setInsertPt(*
MI.getParent(), ++
MI.getIterator());
2857 if (ShouldMoveIndexIntoLoop)
2867 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
2868 auto One =
B.buildConstant(
S32, 1);
2879 auto IdxLo =
B.buildShl(
S32, BaseIdxReg, One);
2880 auto IdxHi =
B.buildAdd(
S32, IdxLo, One);
2882 auto Extract0 =
B.buildExtractVectorElement(DstRegs[0], CastSrc, IdxLo);
2883 auto Extract1 =
B.buildExtractVectorElement(DstRegs[1], CastSrc, IdxHi);
2885 MRI.setRegBank(DstReg, *DstBank);
2886 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
2887 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
2888 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
2889 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
2893 MI.eraseFromParent();
2899 B.setInstr(*Span.
begin());
2900 MI.eraseFromParent();
2904 if (NeedCopyToVGPR) {
2908 MRI.setRegBank(TmpReg0, AMDGPU::SGPRRegBank);
2909 MRI.setRegBank(TmpReg1, AMDGPU::SGPRRegBank);
2911 Extract0->getOperand(0).setReg(TmpReg0);
2912 Extract1->getOperand(0).setReg(TmpReg1);
2920 if (ShouldMoveIndexIntoLoop)
2925 case AMDGPU::G_INSERT_VECTOR_ELT: {
2929 LLT VecTy =
MRI.getType(DstReg);
2935 MRI.setType(
MI.getOperand(1).getReg(), VecTy);
2937 if (foldInsertEltToCmpSelect(
B,
MI, OpdMapper))
2945 LLT InsTy =
MRI.getType(InsReg);
2949 unsigned ConstOffset;
2950 std::tie(BaseIdxReg, ConstOffset) =
2957 bool ShouldMoveIndexIntoLoop = IdxBank != &AMDGPU::SGPRRegBank &&
2962 if (ShouldMoveIndexIntoLoop)
2963 MI.getOperand(3).setReg(BaseIdxReg);
2966 if (InsRegs.
empty()) {
2970 if (ShouldMoveIndexIntoLoop) {
2982 auto CastSrc =
B.buildBitcast(Vec32, SrcReg);
2983 auto One =
B.buildConstant(
S32, 1);
2992 auto IdxLo =
B.buildShl(
S32, BaseIdxReg, One);
2993 auto IdxHi =
B.buildAdd(
S32, IdxLo, One);
2995 auto InsLo =
B.buildInsertVectorElement(Vec32, CastSrc, InsRegs[0], IdxLo);
2996 auto InsHi =
B.buildInsertVectorElement(Vec32, InsLo, InsRegs[1], IdxHi);
3005 MRI.setRegBank(InsReg, *InsSrcBank);
3006 MRI.setRegBank(CastSrc.getReg(0), *SrcBank);
3007 MRI.setRegBank(InsLo.getReg(0), *DstBank);
3008 MRI.setRegBank(InsHi.getReg(0), *DstBank);
3009 MRI.setRegBank(One.getReg(0), AMDGPU::SGPRRegBank);
3010 MRI.setRegBank(IdxLo.getReg(0), AMDGPU::SGPRRegBank);
3011 MRI.setRegBank(IdxHi.getReg(0), AMDGPU::SGPRRegBank);
3016 B.setInsertPt(
B.getMBB(),
MI);
3017 B.buildBitcast(DstReg, InsHi);
3018 MI.eraseFromParent();
3022 B.setInstr(*Span.
begin());
3023 MI.eraseFromParent();
3034 B.buildBitcast(DstReg, InsHi);
3037 if (ShouldMoveIndexIntoLoop)
3042 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
3043 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
3044 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
3045 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
3046 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
3047 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
3048 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
3049 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
3050 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
3051 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
3052 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
3053 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
3054 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
3055 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
3056 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
3057 case AMDGPU::G_AMDGPU_BUFFER_STORE:
3058 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
3059 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
3060 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
3061 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16:
3062 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
3063 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16: {
3068 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
3069 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
3070 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
3071 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
3072 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
3073 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
3074 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
3075 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
3076 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
3077 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
3078 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
3079 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC: {
3084 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
3085 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
3086 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
3091 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
3096 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
3097 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
3098 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
3099 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
3100 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
3104 case AMDGPU::G_INTRINSIC:
3105 case AMDGPU::G_INTRINSIC_CONVERGENT: {
3107 case Intrinsic::amdgcn_readlane: {
3118 case Intrinsic::amdgcn_writelane: {
3128 case Intrinsic::amdgcn_interp_p1:
3129 case Intrinsic::amdgcn_interp_p2:
3130 case Intrinsic::amdgcn_interp_mov:
3131 case Intrinsic::amdgcn_interp_p1_f16:
3132 case Intrinsic::amdgcn_interp_p2_f16:
3133 case Intrinsic::amdgcn_lds_param_load: {
3141 case Intrinsic::amdgcn_interp_inreg_p10:
3142 case Intrinsic::amdgcn_interp_inreg_p2:
3143 case Intrinsic::amdgcn_interp_inreg_p10_f16:
3144 case Intrinsic::amdgcn_interp_inreg_p2_f16:
3145 case Intrinsic::amdgcn_interp_p10_rtz_f16:
3146 case Intrinsic::amdgcn_interp_p2_rtz_f16:
3149 case Intrinsic::amdgcn_permlane16:
3150 case Intrinsic::amdgcn_permlanex16: {
3158 case Intrinsic::amdgcn_sbfe:
3161 case Intrinsic::amdgcn_ubfe:
3164 case Intrinsic::amdgcn_inverse_ballot:
3165 case Intrinsic::amdgcn_s_bitreplicate:
3166 case Intrinsic::amdgcn_s_quadmask:
3167 case Intrinsic::amdgcn_s_wqm:
3171 case Intrinsic::amdgcn_ballot:
3177 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
3178 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
3179 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
3180 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
3181 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
3191 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
3192 unsigned N =
MI.getNumExplicitOperands() - 2;
3197 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
3198 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
3199 auto IntrID = cast<GIntrinsic>(
MI).getIntrinsicID();
3201 case Intrinsic::amdgcn_ds_ordered_add:
3202 case Intrinsic::amdgcn_ds_ordered_swap: {
3209 case Intrinsic::amdgcn_ds_gws_init:
3210 case Intrinsic::amdgcn_ds_gws_barrier:
3211 case Intrinsic::amdgcn_ds_gws_sema_br: {
3217 case Intrinsic::amdgcn_ds_gws_sema_v:
3218 case Intrinsic::amdgcn_ds_gws_sema_p:
3219 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
3224 case Intrinsic::amdgcn_ds_append:
3225 case Intrinsic::amdgcn_ds_consume: {
3229 case Intrinsic::amdgcn_s_sendmsg:
3230 case Intrinsic::amdgcn_s_sendmsghalt: {
3235 case Intrinsic::amdgcn_s_setreg: {
3239 case Intrinsic::amdgcn_s_ttracedata:
3242 case Intrinsic::amdgcn_raw_buffer_load_lds:
3243 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
3250 case Intrinsic::amdgcn_struct_buffer_load_lds:
3251 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
3258 case Intrinsic::amdgcn_global_load_lds: {
3263 case Intrinsic::amdgcn_lds_direct_load: {
3269 case Intrinsic::amdgcn_exp_row:
3273 case Intrinsic::amdgcn_s_sleep_var:
3277 case Intrinsic::amdgcn_s_barrier_signal_var:
3278 case Intrinsic::amdgcn_s_barrier_join:
3279 case Intrinsic::amdgcn_s_wakeup_barrier:
3282 case Intrinsic::amdgcn_s_barrier_signal_isfirst_var:
3285 case Intrinsic::amdgcn_s_barrier_init:
3289 case Intrinsic::amdgcn_s_get_barrier_state: {
3299 if (RSrcIntrin->IsImage) {
3310 case AMDGPU::G_SI_CALL: {
3321 unsigned FrameSetupOpcode = AMDGPU::ADJCALLSTACKUP;
3322 unsigned FrameDestroyOpcode = AMDGPU::ADJCALLSTACKDOWN;
3328 unsigned NonCopyInstrsLen = 0;
3334 while (Start->getOpcode() != FrameSetupOpcode) {
3336 bool IsCopy =
false;
3337 if (Start->getOpcode() == AMDGPU::COPY) {
3338 auto &Dst = Start->getOperand(0);
3341 if (Reg.isPhysical() &&
MI.readsRegister(Reg,
TRI)) {
3346 auto &Src = Start->getOperand(1);
3349 IsCopy =
Info->getScratchRSrcReg() == Reg;
3357 NonCopyInstrsLen = NonCopyInstrs.
size();
3362 NonCopyInstrs.
resize(NonCopyInstrsLen);
3364 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3370 NonCopyInstrs.
clear();
3371 NonCopyInstrsLen = 0;
3374 while (
End->getOpcode() != FrameDestroyOpcode) {
3376 bool IsCopy =
false;
3377 if (
End->getOpcode() == AMDGPU::COPY) {
3378 auto &Src =
End->getOperand(1);
3381 IsCopy = Reg.isPhysical() &&
MI.modifiesRegister(Reg,
TRI);
3387 NonCopyInstrsLen = NonCopyInstrs.
size();
3392 NonCopyInstrs.
resize(NonCopyInstrsLen);
3396 for (
auto *NonCopy :
reverse(NonCopyInstrs)) {
3401 B.setInsertPt(
B.getMBB(), Start);
3405 case AMDGPU::G_LOAD:
3406 case AMDGPU::G_ZEXTLOAD:
3407 case AMDGPU::G_SEXTLOAD: {
3412 case AMDGPU::G_DYN_STACKALLOC:
3415 case AMDGPU::G_STACKRESTORE: {
3420 case AMDGPU::G_SBFX:
3423 case AMDGPU::G_UBFX:
3426 case AMDGPU::G_AMDGPU_MAD_U64_U32:
3427 case AMDGPU::G_AMDGPU_MAD_I64_I32:
3430 case AMDGPU::G_PREFETCH: {
3432 MI.eraseFromParent();
3437 if (PtrBank == AMDGPU::VGPRRegBankID) {
3438 MI.eraseFromParent();
3441 unsigned AS =
MRI.getType(PtrReg).getAddressSpace();
3444 MI.eraseFromParent();
3462 if (RB0 == AMDGPU::InvalidRegBankID)
3464 if (RB1 == AMDGPU::InvalidRegBankID)
3467 if (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID)
3468 return AMDGPU::SGPRRegBankID;
3470 if (RB0 == AMDGPU::AGPRRegBankID && RB1 == AMDGPU::AGPRRegBankID)
3471 return AMDGPU::AGPRRegBankID;
3473 return AMDGPU::VGPRRegBankID;
3477 if (RB0 == AMDGPU::InvalidRegBankID)
3479 if (RB1 == AMDGPU::InvalidRegBankID)
3485 if (RB0 == AMDGPU::VCCRegBankID || RB1 == AMDGPU::VCCRegBankID)
3486 return AMDGPU::VCCRegBankID;
3494 unsigned RegBank = AMDGPU::InvalidRegBankID;
3502 if (RegBank == AMDGPU::VGPRRegBankID)
3518 if (Bank->getID() != AMDGPU::SGPRRegBankID)
3531 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
3537 OpdsMapping[i] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3540 MI.getNumOperands());
3553 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
3559 unsigned BankID =
Size == 1 ? AMDGPU::VCCRegBankID : AMDGPU::VGPRRegBankID;
3560 OpdsMapping[i] = AMDGPU::getValueMapping(BankID,
Size);
3564 MI.getNumOperands());
3573 for (
unsigned I = 0, E =
MI.getNumOperands();
I != E; ++
I) {
3579 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3583 MI.getNumOperands());
3589 int RsrcIdx)
const {
3592 RsrcIdx +=
MI.getNumExplicitDefs() + 1;
3594 const int NumOps =
MI.getNumOperands();
3599 for (
int I = 0;
I != NumOps; ++
I) {
3600 if (!
MI.getOperand(
I).isReg())
3614 const bool MustBeSGPR =
I == RsrcIdx ||
I == RsrcIdx + 1;
3619 OpdsMapping[
I] = AMDGPU::getValueMapping(NewBank,
Size);
3622 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3633 LLT PtrTy =
MRI.getType(PtrReg);
3637 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3642 return AMDGPU::getValueMapping(PtrBank->
getID(),
Size);
3653 LLT PtrTy =
MRI.getType(PtrReg);
3665 ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
3666 PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize);
3668 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3673 AMDGPU::VGPRRegBankID : AMDGPU::SGPRRegBankID;
3675 PtrMapping = AMDGPU::getValueMapping(PtrBankID, PtrSize);
3678 ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3679 PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
3682 OpdsMapping[0] = ValMapping;
3683 OpdsMapping[1] = PtrMapping;
3708 return AMDGPU::getValueMapping(Bank,
Size);
3716 return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
3724 return AMDGPU::getValueMapping(AMDGPU::AGPRRegBankID,
Size);
3741 if (
MI.isCopy() ||
MI.getOpcode() == AMDGPU::G_FREEZE) {
3748 assert(SrcBank &&
"src bank should have been assigned already");
3753 if (
MI.getOpcode() != AMDGPU::G_FREEZE &&
3758 unsigned OpdsMappingSize =
MI.isCopy() ? 1 : 2;
3760 OpdsMapping[0] = &ValMap;
3761 if (
MI.getOpcode() == AMDGPU::G_FREEZE)
3762 OpdsMapping[1] = &ValMap;
3769 if (
MI.isRegSequence()) {
3772 unsigned BankID = AMDGPU::SGPRRegBankID;
3774 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
3778 if (OpBank != AMDGPU::SGPRRegBankID) {
3779 BankID = AMDGPU::VGPRRegBankID;
3795 if (
auto *
PHI = dyn_cast<GPhi>(&
MI)) {
3796 unsigned ResultBank = AMDGPU::InvalidRegBankID;
3801 ResultBank = DstBank->
getID();
3803 for (
unsigned I = 0;
I <
PHI->getNumIncomingValues(); ++
I) {
3808 if (!Bank || Bank->
getID() == AMDGPU::VGPRRegBankID) {
3809 ResultBank = AMDGPU::VGPRRegBankID;
3814 unsigned OpBank = Bank->
getID();
3818 assert(ResultBank != AMDGPU::InvalidRegBankID);
3820 unsigned Size =
MRI.getType(DstReg).getSizeInBits();
3835 switch (
MI.getOpcode()) {
3842 case AMDGPU::G_MUL: {
3843 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3848 unsigned TargetBankID = AMDGPU::InvalidRegBankID;
3849 unsigned BankLHS = AMDGPU::InvalidRegBankID;
3850 unsigned BankRHS = AMDGPU::InvalidRegBankID;
3852 TargetBankID = DstBank->
getID();
3853 if (DstBank == &AMDGPU::VCCRegBank) {
3854 TargetBankID = AMDGPU::VCCRegBankID;
3855 BankLHS = AMDGPU::VCCRegBankID;
3856 BankRHS = AMDGPU::VCCRegBankID;
3859 AMDGPU::SGPRRegBankID);
3861 AMDGPU::SGPRRegBankID);
3865 AMDGPU::VCCRegBankID);
3867 AMDGPU::VCCRegBankID);
3870 if (BankLHS == AMDGPU::VGPRRegBankID || BankRHS == AMDGPU::VGPRRegBankID) {
3871 TargetBankID = AMDGPU::VGPRRegBankID;
3872 }
else if (BankLHS == AMDGPU::VCCRegBankID || BankRHS == AMDGPU::VCCRegBankID) {
3873 TargetBankID = AMDGPU::VCCRegBankID;
3874 BankLHS = AMDGPU::VCCRegBankID;
3875 BankRHS = AMDGPU::VCCRegBankID;
3876 }
else if (BankLHS == AMDGPU::SGPRRegBankID && BankRHS == AMDGPU::SGPRRegBankID) {
3877 TargetBankID = AMDGPU::SGPRRegBankID;
3881 OpdsMapping[0] = AMDGPU::getValueMapping(TargetBankID,
Size);
3882 OpdsMapping[1] = AMDGPU::getValueMapping(BankLHS,
Size);
3883 OpdsMapping[2] = AMDGPU::getValueMapping(BankRHS,
Size);
3890 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::SGPRRegBankID,
Size);
3891 OpdsMapping[1] = OpdsMapping[2] = OpdsMapping[0];
3893 OpdsMapping[0] = getValueMappingSGPR64Only(AMDGPU::VGPRRegBankID,
Size);
3895 OpdsMapping[1] = AMDGPU::getValueMapping(Bank1,
Size);
3898 OpdsMapping[2] = AMDGPU::getValueMapping(Bank2,
Size);
3906 case AMDGPU::G_PTR_ADD:
3907 case AMDGPU::G_PTRMASK:
3911 case AMDGPU::G_LSHR:
3912 case AMDGPU::G_ASHR:
3913 case AMDGPU::G_UADDO:
3914 case AMDGPU::G_USUBO:
3915 case AMDGPU::G_UADDE:
3916 case AMDGPU::G_SADDE:
3917 case AMDGPU::G_USUBE:
3918 case AMDGPU::G_SSUBE:
3919 case AMDGPU::G_SMIN:
3920 case AMDGPU::G_SMAX:
3921 case AMDGPU::G_UMIN:
3922 case AMDGPU::G_UMAX:
3924 case AMDGPU::G_SHUFFLE_VECTOR:
3925 case AMDGPU::G_SBFX:
3926 case AMDGPU::G_UBFX:
3927 case AMDGPU::G_AMDGPU_S_MUL_I64_I32:
3928 case AMDGPU::G_AMDGPU_S_MUL_U64_U32:
3932 case AMDGPU::G_FADD:
3933 case AMDGPU::G_FSUB:
3934 case AMDGPU::G_FMUL:
3936 case AMDGPU::G_FFLOOR:
3937 case AMDGPU::G_FCEIL:
3938 case AMDGPU::G_INTRINSIC_ROUNDEVEN:
3939 case AMDGPU::G_FMINNUM:
3940 case AMDGPU::G_FMAXNUM:
3941 case AMDGPU::G_FMINIMUM:
3942 case AMDGPU::G_FMAXIMUM:
3943 case AMDGPU::G_INTRINSIC_TRUNC:
3944 case AMDGPU::G_STRICT_FADD:
3945 case AMDGPU::G_STRICT_FSUB:
3946 case AMDGPU::G_STRICT_FMUL:
3947 case AMDGPU::G_STRICT_FMA: {
3948 LLT Ty =
MRI.getType(
MI.getOperand(0).getReg());
3955 case AMDGPU::G_FPTOSI:
3956 case AMDGPU::G_FPTOUI:
3957 case AMDGPU::G_SITOFP:
3958 case AMDGPU::G_UITOFP: {
3959 unsigned SizeDst =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3960 unsigned SizeSrc =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
3966 case AMDGPU::G_FPTRUNC:
3967 case AMDGPU::G_FPEXT: {
3968 unsigned SizeDst =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3969 unsigned SizeSrc =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
3975 case AMDGPU::G_FSQRT:
3976 case AMDGPU::G_FEXP2:
3977 case AMDGPU::G_FLOG2: {
3978 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
3984 case AMDGPU::G_SADDSAT:
3985 case AMDGPU::G_SSUBSAT:
3986 case AMDGPU::G_UADDSAT:
3987 case AMDGPU::G_USUBSAT:
3988 case AMDGPU::G_FMAD:
3989 case AMDGPU::G_FLDEXP:
3990 case AMDGPU::G_FMINNUM_IEEE:
3991 case AMDGPU::G_FMAXNUM_IEEE:
3992 case AMDGPU::G_FCANONICALIZE:
3993 case AMDGPU::G_STRICT_FLDEXP:
3994 case AMDGPU::G_BSWAP:
3995 case AMDGPU::G_FSHR:
3996 case AMDGPU::G_AMDGPU_FMIN_LEGACY:
3997 case AMDGPU::G_AMDGPU_FMAX_LEGACY:
3998 case AMDGPU::G_AMDGPU_RCP_IFLAG:
3999 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE0:
4000 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE1:
4001 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE2:
4002 case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
4003 case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
4004 case AMDGPU::G_AMDGPU_SMED3:
4005 case AMDGPU::G_AMDGPU_FMED3:
4007 case AMDGPU::G_UMULH:
4008 case AMDGPU::G_SMULH: {
4013 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4014 case AMDGPU::G_AMDGPU_MAD_I64_I32: {
4023 bool AllSalu =
true;
4024 bool MulSalu =
true;
4025 for (
unsigned i = 0; i < 5; ++i) {
4028 if (Bank->getID() != AMDGPU::SGPRRegBankID) {
4030 if (i == 2 || i == 3) {
4048 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4049 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4050 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4051 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4052 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 64);
4055 case AMDGPU::G_IMPLICIT_DEF: {
4056 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4057 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4060 case AMDGPU::G_FCONSTANT:
4061 case AMDGPU::G_CONSTANT:
4062 case AMDGPU::G_GLOBAL_VALUE:
4063 case AMDGPU::G_BLOCK_ADDR:
4064 case AMDGPU::G_READSTEADYCOUNTER:
4065 case AMDGPU::G_READCYCLECOUNTER: {
4066 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4067 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4070 case AMDGPU::G_FRAME_INDEX: {
4073 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4074 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4077 case AMDGPU::G_DYN_STACKALLOC: {
4079 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4081 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, 32);
4084 case AMDGPU::G_AMDGPU_WAVE_ADDRESS: {
4089 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4090 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
4093 case AMDGPU::G_INSERT: {
4098 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4099 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4100 OpdsMapping[2] = AMDGPU::getValueMapping(BankID, EltSize);
4101 OpdsMapping[3] =
nullptr;
4104 case AMDGPU::G_EXTRACT: {
4108 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, DstSize);
4109 OpdsMapping[1] = AMDGPU::getValueMapping(BankID, SrcSize);
4110 OpdsMapping[2] =
nullptr;
4113 case AMDGPU::G_BUILD_VECTOR:
4114 case AMDGPU::G_BUILD_VECTOR_TRUNC: {
4115 LLT DstTy =
MRI.getType(
MI.getOperand(0).getReg());
4118 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4121 unsigned DstBankID =
regBankUnion(Src0BankID, Src1BankID);
4123 OpdsMapping[0] = AMDGPU::getValueMapping(DstBankID, DstSize);
4124 OpdsMapping[1] = AMDGPU::getValueMapping(Src0BankID, SrcSize);
4125 OpdsMapping[2] = AMDGPU::getValueMapping(Src1BankID, SrcSize);
4131 case AMDGPU::G_MERGE_VALUES:
4132 case AMDGPU::G_CONCAT_VECTORS: {
4134 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4135 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4137 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4139 for (
unsigned i = 1, e =
MI.getNumOperands(); i != e; ++i)
4140 OpdsMapping[i] = AMDGPU::getValueMapping(Bank, SrcSize);
4143 case AMDGPU::G_BITREVERSE:
4144 case AMDGPU::G_BITCAST:
4145 case AMDGPU::G_INTTOPTR:
4146 case AMDGPU::G_PTRTOINT:
4147 case AMDGPU::G_FABS:
4148 case AMDGPU::G_FNEG: {
4149 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4151 OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
4154 case AMDGPU::G_AMDGPU_FFBH_U32:
4155 case AMDGPU::G_AMDGPU_FFBL_B32:
4156 case AMDGPU::G_CTLZ_ZERO_UNDEF:
4157 case AMDGPU::G_CTTZ_ZERO_UNDEF: {
4158 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4160 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4161 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(BankID,
Size);
4164 case AMDGPU::G_CTPOP: {
4165 unsigned Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4167 OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
4172 OpdsMapping[1] = AMDGPU::getValueMapping(BankID,
Size);
4175 case AMDGPU::G_TRUNC: {
4181 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, DstSize);
4182 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, SrcSize);
4185 case AMDGPU::G_ZEXT:
4186 case AMDGPU::G_SEXT:
4187 case AMDGPU::G_ANYEXT:
4188 case AMDGPU::G_SEXT_INREG: {
4197 switch (SrcBank->
getID()) {
4198 case AMDGPU::SGPRRegBankID:
4199 DstBank = AMDGPU::SGPRRegBankID;
4202 DstBank = AMDGPU::VGPRRegBankID;
4208 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
4209 OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->
getID(),
4213 case AMDGPU::G_IS_FPCLASS: {
4215 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
4216 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4217 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4218 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4221 case AMDGPU::G_STORE: {
4223 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4228 AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4229 OpdsMapping[0] = ValMapping;
4233 case AMDGPU::G_ICMP:
4234 case AMDGPU::G_FCMP: {
4235 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4240 AMDGPU::SGPRRegBankID);
4244 auto canUseSCCICMP = [&]() {
4247 return Size == 32 ||
4252 auto canUseSCCFCMP = [&]() {
4256 bool isICMP =
MI.getOpcode() == AMDGPU::G_ICMP;
4257 bool CanUseSCC = DstBank == AMDGPU::SGPRRegBankID &&
4258 Op2Bank == AMDGPU::SGPRRegBankID &&
4259 Op3Bank == AMDGPU::SGPRRegBankID &&
4260 (isICMP ? canUseSCCICMP() : canUseSCCFCMP());
4262 DstBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
4263 unsigned SrcBank = CanUseSCC ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4267 const unsigned ResultSize = 1;
4269 OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, ResultSize);
4270 OpdsMapping[1] =
nullptr;
4271 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank,
Size);
4272 OpdsMapping[3] = AMDGPU::getValueMapping(SrcBank,
Size);
4275 case AMDGPU::G_EXTRACT_VECTOR_ELT: {
4278 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4279 unsigned SrcSize =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4280 unsigned IdxSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4282 unsigned OutputBankID =
regBankUnion(SrcBankID, IdxBank);
4284 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(OutputBankID, DstSize);
4285 OpdsMapping[1] = AMDGPU::getValueMapping(SrcBankID, SrcSize);
4288 OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4291 case AMDGPU::G_INSERT_VECTOR_ELT: {
4293 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
4295 unsigned VecSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4296 unsigned InsertSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4297 unsigned IdxSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4301 OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4302 OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize);
4306 if (InsertSize == 64 && OutputBankID == AMDGPU::VGPRRegBankID) {
4307 OpdsMapping[2] = AMDGPU::getValueMappingSplit64(InsertEltBankID,
4310 assert(InsertSize == 32 || InsertSize == 64);
4311 OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBankID, InsertSize);
4315 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBankID, IdxSize);
4318 case AMDGPU::G_UNMERGE_VALUES: {
4323 for (
unsigned i = 0, e =
MI.getNumOperands(); i != e; ++i) {
4325 OpdsMapping[i] = AMDGPU::getValueMapping(Bank,
Size);
4329 case AMDGPU::G_AMDGPU_BUFFER_LOAD:
4330 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
4331 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
4332 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
4333 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
4334 case AMDGPU::G_AMDGPU_BUFFER_LOAD_TFE:
4335 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE_TFE:
4336 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE_TFE:
4337 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT_TFE:
4338 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT_TFE:
4339 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT:
4340 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_TFE:
4341 case AMDGPU::G_AMDGPU_BUFFER_LOAD_FORMAT_D16:
4342 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT:
4343 case AMDGPU::G_AMDGPU_TBUFFER_LOAD_FORMAT_D16:
4344 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT:
4345 case AMDGPU::G_AMDGPU_TBUFFER_STORE_FORMAT_D16:
4346 case AMDGPU::G_AMDGPU_BUFFER_STORE:
4347 case AMDGPU::G_AMDGPU_BUFFER_STORE_BYTE:
4348 case AMDGPU::G_AMDGPU_BUFFER_STORE_SHORT:
4349 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT:
4350 case AMDGPU::G_AMDGPU_BUFFER_STORE_FORMAT_D16: {
4369 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP:
4370 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD:
4371 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB:
4372 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN:
4373 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN:
4374 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX:
4375 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX:
4376 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND:
4377 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR:
4378 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR:
4379 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC:
4380 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC:
4381 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD:
4382 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN:
4383 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX: {
4406 case AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP: {
4432 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD:
4433 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
4434 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE:
4435 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
4436 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT: {
4444 unsigned RSrcBank = OpdsMapping[1]->BreakDown[0].RegBank->getID();
4445 unsigned OffsetBank = OpdsMapping[2]->BreakDown[0].RegBank->getID();
4446 unsigned ResultBank =
regBankUnion(RSrcBank, OffsetBank);
4448 unsigned Size0 =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4449 OpdsMapping[0] = AMDGPU::getValueMapping(ResultBank, Size0);
4452 case AMDGPU::G_INTRINSIC:
4453 case AMDGPU::G_INTRINSIC_CONVERGENT: {
4457 case Intrinsic::amdgcn_div_fmas:
4458 case Intrinsic::amdgcn_div_fixup:
4459 case Intrinsic::amdgcn_trig_preop:
4460 case Intrinsic::amdgcn_sin:
4461 case Intrinsic::amdgcn_cos:
4462 case Intrinsic::amdgcn_log_clamp:
4463 case Intrinsic::amdgcn_rcp_legacy:
4464 case Intrinsic::amdgcn_rsq_legacy:
4465 case Intrinsic::amdgcn_rsq_clamp:
4466 case Intrinsic::amdgcn_fmul_legacy:
4467 case Intrinsic::amdgcn_fma_legacy:
4468 case Intrinsic::amdgcn_frexp_mant:
4469 case Intrinsic::amdgcn_frexp_exp:
4470 case Intrinsic::amdgcn_fract:
4471 case Intrinsic::amdgcn_cvt_pknorm_i16:
4472 case Intrinsic::amdgcn_cvt_pknorm_u16:
4473 case Intrinsic::amdgcn_cvt_pk_i16:
4474 case Intrinsic::amdgcn_cvt_pk_u16:
4475 case Intrinsic::amdgcn_fmed3:
4476 case Intrinsic::amdgcn_cubeid:
4477 case Intrinsic::amdgcn_cubema:
4478 case Intrinsic::amdgcn_cubesc:
4479 case Intrinsic::amdgcn_cubetc:
4480 case Intrinsic::amdgcn_sffbh:
4481 case Intrinsic::amdgcn_fmad_ftz:
4482 case Intrinsic::amdgcn_mbcnt_lo:
4483 case Intrinsic::amdgcn_mbcnt_hi:
4484 case Intrinsic::amdgcn_mul_u24:
4485 case Intrinsic::amdgcn_mul_i24:
4486 case Intrinsic::amdgcn_mulhi_u24:
4487 case Intrinsic::amdgcn_mulhi_i24:
4488 case Intrinsic::amdgcn_lerp:
4489 case Intrinsic::amdgcn_sad_u8:
4490 case Intrinsic::amdgcn_msad_u8:
4491 case Intrinsic::amdgcn_sad_hi_u8:
4492 case Intrinsic::amdgcn_sad_u16:
4493 case Intrinsic::amdgcn_qsad_pk_u16_u8:
4494 case Intrinsic::amdgcn_mqsad_pk_u16_u8:
4495 case Intrinsic::amdgcn_mqsad_u32_u8:
4496 case Intrinsic::amdgcn_cvt_pk_u8_f32:
4497 case Intrinsic::amdgcn_alignbyte:
4498 case Intrinsic::amdgcn_perm:
4499 case Intrinsic::amdgcn_fdot2:
4500 case Intrinsic::amdgcn_sdot2:
4501 case Intrinsic::amdgcn_udot2:
4502 case Intrinsic::amdgcn_sdot4:
4503 case Intrinsic::amdgcn_udot4:
4504 case Intrinsic::amdgcn_sdot8:
4505 case Intrinsic::amdgcn_udot8:
4506 case Intrinsic::amdgcn_fdot2_bf16_bf16:
4507 case Intrinsic::amdgcn_fdot2_f16_f16:
4508 case Intrinsic::amdgcn_fdot2_f32_bf16:
4509 case Intrinsic::amdgcn_sudot4:
4510 case Intrinsic::amdgcn_sudot8:
4511 case Intrinsic::amdgcn_dot4_f32_fp8_bf8:
4512 case Intrinsic::amdgcn_dot4_f32_bf8_fp8:
4513 case Intrinsic::amdgcn_dot4_f32_fp8_fp8:
4514 case Intrinsic::amdgcn_dot4_f32_bf8_bf8:
4515 case Intrinsic::amdgcn_cvt_f32_fp8:
4516 case Intrinsic::amdgcn_cvt_f32_bf8:
4517 case Intrinsic::amdgcn_cvt_pk_f32_fp8:
4518 case Intrinsic::amdgcn_cvt_pk_f32_bf8:
4519 case Intrinsic::amdgcn_cvt_pk_fp8_f32:
4520 case Intrinsic::amdgcn_cvt_pk_bf8_f32:
4521 case Intrinsic::amdgcn_cvt_sr_fp8_f32:
4522 case Intrinsic::amdgcn_cvt_sr_bf8_f32:
4523 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16:
4524 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16:
4525 case Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied:
4526 case Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied:
4527 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf16:
4528 case Intrinsic::amdgcn_wmma_f32_16x16x16_f16:
4529 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu4:
4530 case Intrinsic::amdgcn_wmma_i32_16x16x16_iu8:
4531 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8:
4532 case Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8:
4533 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8:
4534 case Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8:
4535 case Intrinsic::amdgcn_wmma_i32_16x16x32_iu4:
4536 case Intrinsic::amdgcn_swmmac_f32_16x16x32_f16:
4537 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16:
4538 case Intrinsic::amdgcn_swmmac_f16_16x16x32_f16:
4539 case Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16:
4540 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8:
4541 case Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4:
4542 case Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4:
4543 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8:
4544 case Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8:
4545 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8:
4546 case Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8:
4548 case Intrinsic::amdgcn_log:
4549 case Intrinsic::amdgcn_exp2:
4550 case Intrinsic::amdgcn_rcp:
4551 case Intrinsic::amdgcn_rsq:
4552 case Intrinsic::amdgcn_sqrt: {
4553 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4559 case Intrinsic::amdgcn_sbfe:
4560 case Intrinsic::amdgcn_ubfe:
4564 case Intrinsic::amdgcn_ds_swizzle:
4565 case Intrinsic::amdgcn_ds_permute:
4566 case Intrinsic::amdgcn_ds_bpermute:
4567 case Intrinsic::amdgcn_update_dpp:
4568 case Intrinsic::amdgcn_mov_dpp8:
4569 case Intrinsic::amdgcn_mov_dpp:
4570 case Intrinsic::amdgcn_strict_wwm:
4571 case Intrinsic::amdgcn_wwm:
4572 case Intrinsic::amdgcn_strict_wqm:
4573 case Intrinsic::amdgcn_wqm:
4574 case Intrinsic::amdgcn_softwqm:
4575 case Intrinsic::amdgcn_set_inactive:
4576 case Intrinsic::amdgcn_set_inactive_chain_arg:
4577 case Intrinsic::amdgcn_permlane64:
4579 case Intrinsic::amdgcn_cvt_pkrtz:
4583 case Intrinsic::amdgcn_kernarg_segment_ptr:
4584 case Intrinsic::amdgcn_s_getpc:
4585 case Intrinsic::amdgcn_groupstaticsize:
4586 case Intrinsic::amdgcn_reloc_constant:
4587 case Intrinsic::returnaddress: {
4588 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4589 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4592 case Intrinsic::amdgcn_wqm_vote: {
4593 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4594 OpdsMapping[0] = OpdsMapping[2]
4595 = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID,
Size);
4598 case Intrinsic::amdgcn_ps_live: {
4599 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4602 case Intrinsic::amdgcn_div_scale: {
4603 unsigned Dst0Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4604 unsigned Dst1Size =
MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
4605 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Dst0Size);
4606 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, Dst1Size);
4608 unsigned SrcSize =
MRI.getType(
MI.getOperand(3).getReg()).getSizeInBits();
4609 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4610 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4613 case Intrinsic::amdgcn_class: {
4614 Register Src0Reg =
MI.getOperand(2).getReg();
4615 Register Src1Reg =
MI.getOperand(3).getReg();
4616 unsigned Src0Size =
MRI.getType(Src0Reg).getSizeInBits();
4617 unsigned Src1Size =
MRI.getType(Src1Reg).getSizeInBits();
4618 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4619 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, DstSize);
4620 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src0Size);
4621 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Src1Size);
4624 case Intrinsic::amdgcn_icmp:
4625 case Intrinsic::amdgcn_fcmp: {
4626 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4628 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4629 unsigned OpSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4630 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4631 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, OpSize);
4634 case Intrinsic::amdgcn_readlane: {
4637 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4639 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4642 case Intrinsic::amdgcn_readfirstlane: {
4643 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4644 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4645 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4646 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4649 case Intrinsic::amdgcn_writelane: {
4650 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4652 unsigned SrcSize =
MRI.getType(SrcReg).getSizeInBits();
4655 unsigned IdxSize =
MRI.getType(IdxReg).getSizeInBits();
4657 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4661 OpdsMapping[2] = AMDGPU::getValueMapping(SrcBank, SrcSize);
4662 OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize);
4663 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize);
4666 case Intrinsic::amdgcn_if_break: {
4668 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4669 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4670 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4673 case Intrinsic::amdgcn_permlane16:
4674 case Intrinsic::amdgcn_permlanex16: {
4676 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4677 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4678 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4683 case Intrinsic::amdgcn_permlane16_var:
4684 case Intrinsic::amdgcn_permlanex16_var: {
4686 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4687 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4688 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4689 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4692 case Intrinsic::amdgcn_mfma_f32_4x4x1f32:
4693 case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
4694 case Intrinsic::amdgcn_mfma_i32_4x4x4i8:
4695 case Intrinsic::amdgcn_mfma_f32_4x4x2bf16:
4696 case Intrinsic::amdgcn_mfma_f32_16x16x1f32:
4697 case Intrinsic::amdgcn_mfma_f32_16x16x4f32:
4698 case Intrinsic::amdgcn_mfma_f32_16x16x4f16:
4699 case Intrinsic::amdgcn_mfma_f32_16x16x16f16:
4700 case Intrinsic::amdgcn_mfma_i32_16x16x4i8:
4701 case Intrinsic::amdgcn_mfma_i32_16x16x16i8:
4702 case Intrinsic::amdgcn_mfma_f32_16x16x2bf16:
4703 case Intrinsic::amdgcn_mfma_f32_16x16x8bf16:
4704 case Intrinsic::amdgcn_mfma_f32_32x32x1f32:
4705 case Intrinsic::amdgcn_mfma_f32_32x32x2f32:
4706 case Intrinsic::amdgcn_mfma_f32_32x32x4f16:
4707 case Intrinsic::amdgcn_mfma_f32_32x32x8f16:
4708 case Intrinsic::amdgcn_mfma_i32_32x32x4i8:
4709 case Intrinsic::amdgcn_mfma_i32_32x32x8i8:
4710 case Intrinsic::amdgcn_mfma_f32_32x32x2bf16:
4711 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16:
4712 case Intrinsic::amdgcn_mfma_f32_32x32x4bf16_1k:
4713 case Intrinsic::amdgcn_mfma_f32_16x16x4bf16_1k:
4714 case Intrinsic::amdgcn_mfma_f32_4x4x4bf16_1k:
4715 case Intrinsic::amdgcn_mfma_f32_32x32x8bf16_1k:
4716 case Intrinsic::amdgcn_mfma_f32_16x16x16bf16_1k:
4717 case Intrinsic::amdgcn_mfma_f64_16x16x4f64:
4718 case Intrinsic::amdgcn_mfma_f64_4x4x4f64:
4719 case Intrinsic::amdgcn_mfma_i32_16x16x32_i8:
4720 case Intrinsic::amdgcn_mfma_i32_32x32x16_i8:
4721 case Intrinsic::amdgcn_mfma_f32_16x16x8_xf32:
4722 case Intrinsic::amdgcn_mfma_f32_32x32x4_xf32:
4723 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_bf8:
4724 case Intrinsic::amdgcn_mfma_f32_16x16x32_bf8_fp8:
4725 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_bf8:
4726 case Intrinsic::amdgcn_mfma_f32_16x16x32_fp8_fp8:
4727 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_bf8:
4728 case Intrinsic::amdgcn_mfma_f32_32x32x16_bf8_fp8:
4729 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_bf8:
4730 case Intrinsic::amdgcn_mfma_f32_32x32x16_fp8_fp8: {
4739 Info->mayNeedAGPRs()
4745 Info->mayNeedAGPRs()
4750 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
4751 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
4752 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
4753 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
4754 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
4755 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
4756 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
4757 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
4758 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
4759 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
4760 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
4761 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
4762 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
4763 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8: {
4772 case Intrinsic::amdgcn_interp_p1:
4773 case Intrinsic::amdgcn_interp_p2:
4774 case Intrinsic::amdgcn_interp_mov:
4775 case Intrinsic::amdgcn_interp_p1_f16:
4776 case Intrinsic::amdgcn_interp_p2_f16:
4777 case Intrinsic::amdgcn_lds_param_load: {
4778 const int M0Idx =
MI.getNumOperands() - 1;
4779 Register M0Reg =
MI.getOperand(M0Idx).getReg();
4781 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4783 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4784 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
4785 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4789 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
4792 case Intrinsic::amdgcn_interp_inreg_p10:
4793 case Intrinsic::amdgcn_interp_inreg_p2:
4794 case Intrinsic::amdgcn_interp_inreg_p10_f16:
4795 case Intrinsic::amdgcn_interp_inreg_p2_f16:
4796 case Intrinsic::amdgcn_interp_p10_rtz_f16:
4797 case Intrinsic::amdgcn_interp_p2_rtz_f16: {
4798 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4799 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4800 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4801 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4802 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4805 case Intrinsic::amdgcn_ballot: {
4806 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4807 unsigned SrcSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4808 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4809 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, SrcSize);
4812 case Intrinsic::amdgcn_inverse_ballot: {
4814 Register MaskReg =
MI.getOperand(2).getReg();
4815 unsigned MaskSize =
MRI.getType(MaskReg).getSizeInBits();
4816 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
4817 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4818 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
4821 case Intrinsic::amdgcn_s_quadmask:
4822 case Intrinsic::amdgcn_s_wqm: {
4823 Register MaskReg =
MI.getOperand(2).getReg();
4824 unsigned MaskSize =
MRI.getType(MaskReg).getSizeInBits();
4825 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
4826 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, MaskSize);
4827 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
4830 case Intrinsic::amdgcn_wave_reduce_umin:
4831 case Intrinsic::amdgcn_wave_reduce_umax: {
4832 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4833 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
4834 unsigned OpSize =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4837 OpdsMapping[2] = AMDGPU::getValueMapping(regBankID, OpSize);
4840 case Intrinsic::amdgcn_s_bitreplicate:
4841 Register MaskReg =
MI.getOperand(2).getReg();
4842 unsigned MaskBank =
getRegBankID(MaskReg,
MRI, AMDGPU::SGPRRegBankID);
4843 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
4844 OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, 32);
4848 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4849 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4850 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4851 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4852 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4855 assert(RSrcIntrin &&
"missing RsrcIntrinsic for image intrinsic");
4862 case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
4863 unsigned N =
MI.getNumExplicitOperands() - 2;
4864 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 128);
4868 unsigned Size =
MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits();
4871 OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4874 for (
unsigned I = 2;
I <
N; ++
I) {
4875 unsigned Size =
MRI.getType(
MI.getOperand(
I).getReg()).getSizeInBits();
4876 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID,
Size);
4881 case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
4882 case AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: {
4883 auto IntrID = cast<GIntrinsic>(
MI).getIntrinsicID();
4885 case Intrinsic::amdgcn_s_getreg:
4886 case Intrinsic::amdgcn_s_memtime:
4887 case Intrinsic::amdgcn_s_memrealtime:
4888 case Intrinsic::amdgcn_s_get_waveid_in_workgroup:
4889 case Intrinsic::amdgcn_s_sendmsg_rtn: {
4890 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4891 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4894 case Intrinsic::amdgcn_global_atomic_fadd:
4895 case Intrinsic::amdgcn_global_atomic_csub:
4896 case Intrinsic::amdgcn_global_atomic_fmin:
4897 case Intrinsic::amdgcn_global_atomic_fmax:
4898 case Intrinsic::amdgcn_global_atomic_fmin_num:
4899 case Intrinsic::amdgcn_global_atomic_fmax_num:
4900 case Intrinsic::amdgcn_flat_atomic_fadd:
4901 case Intrinsic::amdgcn_flat_atomic_fmin:
4902 case Intrinsic::amdgcn_flat_atomic_fmax:
4903 case Intrinsic::amdgcn_flat_atomic_fmin_num:
4904 case Intrinsic::amdgcn_flat_atomic_fmax_num:
4905 case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
4906 case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16:
4907 case Intrinsic::amdgcn_atomic_cond_sub_u32:
4908 case Intrinsic::amdgcn_global_atomic_ordered_add_b64:
4909 case Intrinsic::amdgcn_global_load_tr_b64:
4910 case Intrinsic::amdgcn_global_load_tr_b128:
4912 case Intrinsic::amdgcn_ds_ordered_add:
4913 case Intrinsic::amdgcn_ds_ordered_swap: {
4914 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4915 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4917 AMDGPU::SGPRRegBankID);
4918 OpdsMapping[2] = AMDGPU::getValueMapping(M0Bank, 32);
4919 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4922 case Intrinsic::amdgcn_ds_append:
4923 case Intrinsic::amdgcn_ds_consume: {
4924 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
4925 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
4929 case Intrinsic::amdgcn_exp_compr:
4930 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4931 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4933 case Intrinsic::amdgcn_exp:
4935 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4936 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4937 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4938 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4940 case Intrinsic::amdgcn_exp_row:
4941 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4942 OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4943 OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4944 OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
4947 case Intrinsic::amdgcn_s_sendmsg:
4948 case Intrinsic::amdgcn_s_sendmsghalt: {
4951 AMDGPU::SGPRRegBankID);
4952 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
4955 case Intrinsic::amdgcn_s_setreg: {
4958 AMDGPU::SGPRRegBankID);
4959 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
4962 case Intrinsic::amdgcn_s_ttracedata: {
4966 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
4969 case Intrinsic::amdgcn_end_cf: {
4971 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
4974 case Intrinsic::amdgcn_else: {
4976 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4977 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
4978 OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, WaveSize);
4981 case Intrinsic::amdgcn_live_mask: {
4982 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4985 case Intrinsic::amdgcn_wqm_demote:
4986 case Intrinsic::amdgcn_kill: {
4987 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
4990 case Intrinsic::amdgcn_raw_buffer_load:
4991 case Intrinsic::amdgcn_raw_ptr_buffer_load:
4992 case Intrinsic::amdgcn_raw_atomic_buffer_load:
4993 case Intrinsic::amdgcn_raw_ptr_atomic_buffer_load:
4994 case Intrinsic::amdgcn_raw_tbuffer_load:
4995 case Intrinsic::amdgcn_raw_ptr_tbuffer_load: {
5004 case Intrinsic::amdgcn_raw_buffer_load_lds:
5005 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
5012 case Intrinsic::amdgcn_raw_buffer_store:
5013 case Intrinsic::amdgcn_raw_ptr_buffer_store:
5014 case Intrinsic::amdgcn_raw_buffer_store_format:
5015 case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
5016 case Intrinsic::amdgcn_raw_tbuffer_store:
5017 case Intrinsic::amdgcn_raw_ptr_tbuffer_store: {
5024 case Intrinsic::amdgcn_struct_buffer_load:
5025 case Intrinsic::amdgcn_struct_ptr_buffer_load:
5026 case Intrinsic::amdgcn_struct_tbuffer_load:
5027 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
5028 case Intrinsic::amdgcn_struct_atomic_buffer_load:
5029 case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load: {
5037 case Intrinsic::amdgcn_struct_buffer_load_lds:
5038 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
5046 case Intrinsic::amdgcn_struct_buffer_store:
5047 case Intrinsic::amdgcn_struct_ptr_buffer_store:
5048 case Intrinsic::amdgcn_struct_tbuffer_store:
5049 case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
5057 case Intrinsic::amdgcn_init_exec_from_input: {
5059 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID,
Size);
5062 case Intrinsic::amdgcn_ds_gws_init:
5063 case Intrinsic::amdgcn_ds_gws_barrier:
5064 case Intrinsic::amdgcn_ds_gws_sema_br: {
5065 OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5069 AMDGPU::SGPRRegBankID);
5070 OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
5073 case Intrinsic::amdgcn_ds_gws_sema_v:
5074 case Intrinsic::amdgcn_ds_gws_sema_p:
5075 case Intrinsic::amdgcn_ds_gws_sema_release_all: {
5078 AMDGPU::SGPRRegBankID);
5079 OpdsMapping[1] = AMDGPU::getValueMapping(Bank, 32);
5082 case Intrinsic::amdgcn_global_load_lds: {
5087 case Intrinsic::amdgcn_lds_direct_load: {
5088 const int M0Idx =
MI.getNumOperands() - 1;
5089 Register M0Reg =
MI.getOperand(M0Idx).getReg();
5091 unsigned DstSize =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5093 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
5094 for (
int I = 2;
I != M0Idx &&
MI.getOperand(
I).
isReg(); ++
I)
5095 OpdsMapping[
I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
5099 OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
5102 case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
5103 case Intrinsic::amdgcn_ds_sub_gs_reg_rtn:
5107 case Intrinsic::amdgcn_ds_bvh_stack_rtn: {
5120 case Intrinsic::amdgcn_s_sleep_var:
5123 case Intrinsic::amdgcn_s_barrier_signal_var:
5124 case Intrinsic::amdgcn_s_barrier_join:
5125 case Intrinsic::amdgcn_s_wakeup_barrier:
5128 case Intrinsic::amdgcn_s_barrier_init:
5132 case Intrinsic::amdgcn_s_barrier_signal_isfirst_var: {
5133 const unsigned ResultSize = 1;
5135 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, ResultSize);
5139 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
5140 case Intrinsic::amdgcn_s_barrier_leave: {
5141 const unsigned ResultSize = 1;
5143 AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, ResultSize);
5146 case Intrinsic::amdgcn_s_get_barrier_state: {
5151 case Intrinsic::amdgcn_pops_exiting_wave_id:
5158 case AMDGPU::G_SELECT: {
5159 unsigned Size =
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
5161 AMDGPU::SGPRRegBankID);
5163 AMDGPU::SGPRRegBankID);
5164 bool SGPRSrcs = Op2Bank == AMDGPU::SGPRRegBankID &&
5165 Op3Bank == AMDGPU::SGPRRegBankID;
5167 unsigned CondBankDefault = SGPRSrcs ?
5168 AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
5171 if (CondBank == AMDGPU::SGPRRegBankID)
5172 CondBank = SGPRSrcs ? AMDGPU::SGPRRegBankID : AMDGPU::VCCRegBankID;
5173 else if (CondBank == AMDGPU::VGPRRegBankID)
5174 CondBank = AMDGPU::VCCRegBankID;
5176 unsigned Bank = SGPRSrcs && CondBank == AMDGPU::SGPRRegBankID ?
5177 AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
5179 assert(CondBank == AMDGPU::VCCRegBankID || CondBank == AMDGPU::SGPRRegBankID);
5183 OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5184 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
5185 OpdsMapping[2] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5186 OpdsMapping[3] = AMDGPU::getValueMappingSGPR64Only(Bank,
Size);
5188 OpdsMapping[0] = AMDGPU::getValueMapping(Bank,
Size);
5189 OpdsMapping[1] = AMDGPU::getValueMapping(CondBank, 1);
5190 OpdsMapping[2] = AMDGPU::getValueMapping(Bank,
Size);
5191 OpdsMapping[3] = AMDGPU::getValueMapping(Bank,
Size);
5197 case AMDGPU::G_SI_CALL: {
5198 OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
5204 for (
unsigned I = 4;
I <
MI.getNumOperands(); ++
I) {
5205 if (
MI.getOperand(
I).isReg()) {
5209 OpdsMapping[
I] = AMDGPU::getValueMapping(OpBank,
Size);
5214 case AMDGPU::G_LOAD:
5215 case AMDGPU::G_ZEXTLOAD:
5216 case AMDGPU::G_SEXTLOAD:
5219 case AMDGPU::G_ATOMICRMW_XCHG:
5220 case AMDGPU::G_ATOMICRMW_ADD:
5221 case AMDGPU::G_ATOMICRMW_SUB:
5222 case AMDGPU::G_ATOMICRMW_AND:
5223 case AMDGPU::G_ATOMICRMW_OR:
5224 case AMDGPU::G_ATOMICRMW_XOR:
5225 case AMDGPU::G_ATOMICRMW_MAX:
5226 case AMDGPU::G_ATOMICRMW_MIN:
5227 case AMDGPU::G_ATOMICRMW_UMAX:
5228 case AMDGPU::G_ATOMICRMW_UMIN:
5229 case AMDGPU::G_ATOMICRMW_FADD:
5230 case AMDGPU::G_ATOMICRMW_FMIN:
5231 case AMDGPU::G_ATOMICRMW_FMAX:
5232 case AMDGPU::G_ATOMICRMW_UINC_WRAP:
5233 case AMDGPU::G_ATOMICRMW_UDEC_WRAP:
5234 case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG: {
5240 case AMDGPU::G_ATOMIC_CMPXCHG: {
5247 case AMDGPU::G_BRCOND: {
5249 AMDGPU::SGPRRegBankID);
5250 assert(
MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits() == 1);
5251 if (Bank != AMDGPU::SGPRRegBankID)
5252 Bank = AMDGPU::VCCRegBankID;
5254 OpdsMapping[0] = AMDGPU::getValueMapping(Bank, 1);
5257 case AMDGPU::G_FPTRUNC_ROUND_UPWARD:
5258 case AMDGPU::G_FPTRUNC_ROUND_DOWNWARD:
5260 case AMDGPU::G_PREFETCH:
5267 MI.getNumOperands());
unsigned const MachineRegisterInfo * MRI
static unsigned getIntrinsicID(const SDNode *N)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
amdgpu AMDGPU Register Bank Select
static bool substituteSimpleCopyRegs(const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, unsigned OpIdx)
static unsigned regBankBoolUnion(unsigned RB0, unsigned RB1)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
static Register constrainRegToBank(MachineRegisterInfo &MRI, MachineIRBuilder &B, Register &Reg, const RegisterBank &Bank)
static std::pair< Register, Register > unpackV2S16ToS32(MachineIRBuilder &B, Register Src, unsigned ExtOpcode)
static void extendLow32IntoHigh32(MachineIRBuilder &B, Register Hi32Reg, Register Lo32Reg, unsigned ExtOpc, const RegisterBank &RegBank, bool IsBooleanSrc=false)
Implement extending a 32-bit value to a 64-bit value.
static unsigned getExtendOp(unsigned Opc)
static bool isVectorRegisterBank(const RegisterBank &Bank)
static unsigned regBankUnion(unsigned RB0, unsigned RB1)
static std::pair< LLT, LLT > splitUnequalType(LLT Ty, unsigned FirstSize)
Split Ty into 2 pieces.
static void setRegsToType(MachineRegisterInfo &MRI, ArrayRef< Register > Regs, LLT NewTy)
Replace the current type each register in Regs has with NewTy.
static void reinsertVectorIndexAdd(MachineIRBuilder &B, MachineInstr &IdxUseInstr, unsigned OpIdx, unsigned ConstOffset)
Utility function for pushing dynamic vector indexes with a constant offset into waterfall loops.
static LLT widen96To128(LLT Ty)
static LLT getHalfSizedType(LLT Ty)
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
mir Rename Register Operands
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static bool isReg(const MCInst &MI, unsigned OpNo)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Interface definition for SIRegisterInfo.
static bool isUniformMMO(const MachineMemOperand *MMO)
bool applyMappingDynStackAlloc(MachineIRBuilder &B, const OperandsMapper &OpdMapper, MachineInstr &MI) const
std::pair< Register, unsigned > splitBufferOffsets(MachineIRBuilder &B, Register Offset) const
bool collectWaterfallOperands(SmallSet< Register, 4 > &SGPROperandRegs, MachineInstr &MI, MachineRegisterInfo &MRI, ArrayRef< unsigned > OpIndices) const
const InstructionMapping & getImageMapping(const MachineRegisterInfo &MRI, const MachineInstr &MI, int RsrcIdx) const
InstructionMappings addMappingFromTable(const MachineInstr &MI, const MachineRegisterInfo &MRI, const std::array< unsigned, NumOps > RegSrcOpIdx, ArrayRef< OpRegBankEntry< NumOps > > Table) const
unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const override
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
RegisterBankInfo::InstructionMappings getInstrAlternativeMappingsIntrinsicWSideEffects(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool buildVCopy(MachineIRBuilder &B, Register DstReg, Register SrcReg) const
bool executeInWaterfallLoop(MachineIRBuilder &B, iterator_range< MachineBasicBlock::iterator > Range, SmallSet< Register, 4 > &SGPROperandRegs) const
Legalize instruction MI where operands in OpIndices must be SGPRs.
const RegisterBank & getRegBankFromRegClass(const TargetRegisterClass &RC, LLT) const override
Get a register bank that covers RC.
AMDGPURegisterBankInfo(const GCNSubtarget &STI)
bool applyMappingMAD_64_32(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
unsigned getRegBankID(Register Reg, const MachineRegisterInfo &MRI, unsigned Default=AMDGPU::VGPRRegBankID) const
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Reg) const
Handle register layout difference for f16 images for some subtargets.
const RegisterBankInfo::InstructionMapping & getInstrMappingForLoad(const MachineInstr &MI) const
void applyMappingImpl(MachineIRBuilder &Builder, const OperandsMapper &OpdMapper) const override
See RegisterBankInfo::applyMapping.
bool applyMappingBFE(MachineIRBuilder &B, const OperandsMapper &OpdMapper, bool Signed) const
bool applyMappingImage(MachineIRBuilder &B, MachineInstr &MI, const OperandsMapper &OpdMapper, int RSrcIdx) const
const ValueMapping * getVGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
bool isScalarLoadLegal(const MachineInstr &MI) const
unsigned setBufferOffsets(MachineIRBuilder &B, Register CombinedOffset, Register &VOffsetReg, Register &SOffsetReg, int64_t &InstOffsetVal, Align Alignment) const
const ValueMapping * getSGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
bool applyMappingLoad(MachineIRBuilder &B, const OperandsMapper &OpdMapper, MachineInstr &MI) const
void split64BitValueForMapping(MachineIRBuilder &B, SmallVector< Register, 2 > &Regs, LLT HalfTy, Register Reg) const
Split 64-bit value Reg into two 32-bit halves and populate them into Regs.
const ValueMapping * getValueMappingForPtr(const MachineRegisterInfo &MRI, Register Ptr) const
Return the mapping for a pointer argument.
unsigned getMappingType(const MachineRegisterInfo &MRI, const MachineInstr &MI) const
RegisterBankInfo::InstructionMappings getInstrAlternativeMappingsIntrinsic(const MachineInstr &MI, const MachineRegisterInfo &MRI) const
bool isDivergentRegBank(const RegisterBank *RB) const override
Returns true if the register bank is considered divergent.
void constrainOpWithReadfirstlane(MachineIRBuilder &B, MachineInstr &MI, unsigned OpIdx) const
InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const override
Get the alternative mappings for MI.
const InstructionMapping & getDefaultMappingSOP(const MachineInstr &MI) const
const InstructionMapping & getDefaultMappingAllVGPR(const MachineInstr &MI) const
const InstructionMapping & getInstrMapping(const MachineInstr &MI) const override
This function must return a legal mapping, because AMDGPURegisterBankInfo::getInstrAlternativeMapping...
unsigned getBreakDownCost(const ValueMapping &ValMapping, const RegisterBank *CurBank=nullptr) const override
Get the cost of using ValMapping to decompose a register.
const ValueMapping * getAGPROpMapping(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
const GCNSubtarget & Subtarget
const InstructionMapping & getDefaultMappingVOP(const MachineInstr &MI) const
bool isSALUMapping(const MachineInstr &MI) const
Register buildReadFirstLane(MachineIRBuilder &B, MachineRegisterInfo &MRI, Register Src) const
bool applyMappingSBufferLoad(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
void applyMappingSMULU64(MachineIRBuilder &B, const OperandsMapper &OpdMapper) const
const SIRegisterInfo * TRI
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static constexpr ElementCount getFixed(ScalarTy MinVal)
bool hasScalarCompareEq64() const
bool hasScalarSubwordLoads() const
bool hasFullRate64Ops() const
bool hasScalarDwordx3Loads() const
bool hasScalarMulHiInsts() const
bool hasPseudoScalarTrans() const
bool useFlatForGlobal() const
Generation getGeneration() const
bool hasUnpackedD16VMem() const
bool hasSALUFloatInsts() const
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void createdInstr(MachineInstr &MI)=0
An instruction has been created and inserted into the function.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT getScalarType() const
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
constexpr LLT divide(int Factor) const
Return a type that is Factor times smaller.
This is an important class for using LLVM in a threaded context.
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
TypeSize getValue() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Helper class to build MachineInstr.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
MachineInstrSpan provides an interface to get an iteration range containing the instruction it was in...
MachineBasicBlock::iterator begin()
MachineBasicBlock::iterator end()
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
bool isAtomic() const
Returns true if this operation has an atomic ordering requirement of unordered or higher,...
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
Flags getFlags() const
Return the raw flags of the source value,.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Helper class that represents how the value of an instruction may be mapped and what is the related co...
bool isValid() const
Check whether this object is valid.
Helper class used to get/create the virtual registers that will be used to replace the MachineOperand...
const InstructionMapping & getInstrMapping() const
The final mapping of the instruction.
MachineInstr & getMI() const
MachineRegisterInfo & getMRI() const
The MachineRegisterInfo we used to realize the mapping.
iterator_range< SmallVectorImpl< Register >::const_iterator > getVRegs(unsigned OpIdx, bool ForDebug=false) const
Get all the virtual registers required to map the OpIdx-th operand of the instruction.
virtual InstructionMappings getInstrAlternativeMappings(const MachineInstr &MI) const
Get the alternative mappings for MI.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const InstructionMapping & getInstructionMapping(unsigned ID, unsigned Cost, const ValueMapping *OperandsMapping, unsigned NumOperands) const
Method to get a uniquely generated InstructionMapping.
static void applyDefaultMapping(const OperandsMapper &OpdMapper)
Helper method to apply something that is like the default mapping.
const ValueMapping & getValueMapping(unsigned StartIdx, unsigned Length, const RegisterBank &RegBank) const
The most common ValueMapping consists of a single PartialMapping.
const InstructionMapping & getInvalidInstructionMapping() const
Method to get a uniquely generated invalid InstructionMapping.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
const unsigned * Sizes
Hold the sizes of the register banks for all HwModes.
bool cannotCopy(const RegisterBank &Dst, const RegisterBank &Src, TypeSize Size) const
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
const ValueMapping * getOperandsMapping(Iterator Begin, Iterator End) const
Get the uniquely generated array of ValueMapping for the elements of between Begin and End.
virtual unsigned copyCost(const RegisterBank &A, const RegisterBank &B, TypeSize Size) const
Get the cost of a copy from B to A, or put differently, get the cost of A = COPY B.
const InstructionMapping & getInstrMappingImpl(const MachineInstr &MI) const
Try to get the mapping of MI.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, Align Alignment=Align(4)) const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
const TargetRegisterClass * getWaveMaskRegClass() const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
static bool shouldExpandVectorDynExt(unsigned EltSize, unsigned NumElem, bool IsDivergentIdx, const GCNSubtarget *Subtarget)
Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be expanded into a set of cmp...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static IntegerType * getInt32Ty(LLVMContext &C)
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
self_iterator getIterator()
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
bool isFlatGlobalAddrSpace(unsigned AS)
bool isExtendedGlobalAddrSpace(unsigned AS)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
const RsrcIntrinsic * lookupRsrcIntrinsic(unsigned Intr)
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelKnownBits *KnownBits=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
operand_type_match m_Reg()
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
@ Kill
The last use of a register.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
auto reverse(ContainerTy &&C)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
@ Default
The result values are uniform if and only if all operands are uniform.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned StartIdx
Number of bits at which this partial mapping starts in the original value.
const RegisterBank * RegBank
Register bank where the partial value lives.
unsigned Length
Length of this mapping in bits.
Helper struct that represents how a value is mapped through different register banks.
unsigned NumBreakDowns
Number of partial mapping to break down this value.
const PartialMapping * BreakDown
How the value is broken down between the different register banks.
The llvm::once_flag structure.