19#define DEBUG_TYPE "si-fold-operands"
37 bool Commuted_ =
false,
39 UseMI(
MI), OpToFold(
nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
42 if (FoldOp->
isImm()) {
43 ImmToFold = FoldOp->
getImm();
44 }
else if (FoldOp->
isFI()) {
45 FrameIndexToFold = FoldOp->
getIndex();
66 bool needsShrink()
const {
return ShrinkOpcode != -1; }
89 getRegSeqInit(
SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
111 std::pair<const MachineOperand *, int> isOMod(
const MachineInstr &
MI)
const;
137 "SI Fold Operands",
false,
false)
139char SIFoldOperands::
ID = 0;
148 TRI.getSubRegisterClass(RC, MO.getSubReg()))
156 case AMDGPU::V_MAC_F32_e64:
157 return AMDGPU::V_MAD_F32_e64;
158 case AMDGPU::V_MAC_F16_e64:
159 return AMDGPU::V_MAD_F16_e64;
160 case AMDGPU::V_FMAC_F32_e64:
161 return AMDGPU::V_FMA_F32_e64;
162 case AMDGPU::V_FMAC_F16_e64:
163 return AMDGPU::V_FMA_F16_gfx9_e64;
164 case AMDGPU::V_FMAC_F16_t16_e64:
165 return AMDGPU::V_FMA_F16_gfx9_e64;
166 case AMDGPU::V_FMAC_LEGACY_F32_e64:
167 return AMDGPU::V_FMA_LEGACY_F32_e64;
168 case AMDGPU::V_FMAC_F64_e64:
169 return AMDGPU::V_FMA_F64_e64;
171 return AMDGPU::INSTRUCTION_LIST_END;
178 if (!OpToFold.
isFI())
181 const unsigned Opc =
UseMI.getOpcode();
183 return OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
187 int SIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr);
191 int VIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
192 return OpNo == VIdx && SIdx == -1;
196 return new SIFoldOperands();
199bool SIFoldOperands::updateOperand(FoldCandidate &Fold)
const {
209 AMDGPU::isFoldableLiteralV216(Fold.ImmToFold,
210 ST->hasInv2PiInlineImm())) {
213 unsigned Opcode =
MI->getOpcode();
214 int OpNo =
MI->getOperandNo(&Old);
216 if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0))
217 ModIdx = AMDGPU::OpName::src0_modifiers;
218 else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1))
219 ModIdx = AMDGPU::OpName::src1_modifiers;
220 else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2))
221 ModIdx = AMDGPU::OpName::src2_modifiers;
223 ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModIdx);
225 unsigned Val =
Mod.getImm();
229 switch (
TII->get(Opcode).operands()[OpNo].OperandType) {
230 case AMDGPU::OPERAND_REG_IMM_V2FP16:
231 case AMDGPU::OPERAND_REG_IMM_V2INT16:
232 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
233 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
235 if (!isUInt<16>(Fold.ImmToFold)) {
236 if (!(Fold.ImmToFold & 0xffff)) {
254 if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
262 int Op32 = Fold.ShrinkOpcode;
267 bool HaveNonDbgCarryUse = !
MRI->use_nodbg_empty(Dst1.
getReg());
270 Register NewReg0 =
MRI->createVirtualRegister(Dst0RC);
274 if (HaveNonDbgCarryUse) {
287 for (
unsigned I =
MI->getNumOperands() - 1;
I > 0; --
I)
288 MI->removeOperand(
I);
289 MI->setDesc(
TII->get(AMDGPU::IMPLICIT_DEF));
292 TII->commuteInstruction(*Inst32,
false);
296 assert(!Fold.needsShrink() &&
"not handled");
300 int NewMFMAOpc = AMDGPU::getMFMAEarlyClobberOp(
MI->getOpcode());
301 if (NewMFMAOpc == -1)
303 MI->setDesc(
TII->get(NewMFMAOpc));
304 MI->untieRegOperand(0);
310 if (Fold.isGlobal()) {
311 Old.
ChangeToGA(Fold.OpToFold->getGlobal(), Fold.OpToFold->getOffset(),
312 Fold.OpToFold->getTargetFlags());
329 return any_of(FoldList, [&](
const auto &
C) {
return C.UseMI ==
MI; });
337 for (FoldCandidate &Fold : FoldList)
338 if (Fold.UseMI ==
MI && Fold.UseOpNo == OpNo)
340 LLVM_DEBUG(
dbgs() <<
"Append " << (Commuted ?
"commuted" :
"normal")
341 <<
" operand " << OpNo <<
"\n " << *
MI);
348 if (!
TII->isOperandLegal(*
MI, OpNo, OpToFold)) {
350 unsigned Opc =
MI->getOpcode();
352 if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
355 MI->setDesc(
TII->get(NewOpc));
356 bool AddOpSel = !AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel) &&
357 AMDGPU::hasNamedOperand(NewOpc, AMDGPU::OpName::op_sel);
360 bool FoldAsMAD = tryAddToFoldList(FoldList,
MI, OpNo, OpToFold);
362 MI->untieRegOperand(OpNo);
366 MI->removeOperand(
MI->getNumExplicitOperands() - 1);
367 MI->setDesc(
TII->get(Opc));
371 if (OpToFold->
isImm()) {
373 if (Opc == AMDGPU::S_SETREG_B32)
374 ImmOpc = AMDGPU::S_SETREG_IMM32_B32;
375 else if (Opc == AMDGPU::S_SETREG_B32_mode)
376 ImmOpc = AMDGPU::S_SETREG_IMM32_B32_mode;
378 MI->setDesc(
TII->get(ImmOpc));
390 unsigned CommuteOpNo = OpNo;
396 bool CanCommute =
TII->findCommutedOpIndices(*
MI, CommuteIdx0, CommuteIdx1);
399 if (CommuteIdx0 == OpNo)
400 CommuteOpNo = CommuteIdx1;
401 else if (CommuteIdx1 == OpNo)
402 CommuteOpNo = CommuteIdx0;
410 if (CanCommute && (!
MI->getOperand(CommuteIdx0).isReg() ||
411 !
MI->getOperand(CommuteIdx1).isReg()))
415 !
TII->commuteInstruction(*
MI,
false, CommuteIdx0, CommuteIdx1))
418 if (!
TII->isOperandLegal(*
MI, CommuteOpNo, OpToFold)) {
419 if ((Opc == AMDGPU::V_ADD_CO_U32_e64 ||
420 Opc == AMDGPU::V_SUB_CO_U32_e64 ||
421 Opc == AMDGPU::V_SUBREV_CO_U32_e64) &&
426 unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
428 if (!OtherOp.
isReg() ||
435 unsigned MaybeCommutedOpc =
MI->getOpcode();
436 int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
442 TII->commuteInstruction(*
MI,
false, CommuteIdx0, CommuteIdx1);
452 if (
TII->isSALU(
MI->getOpcode())) {
457 if (!OpToFold->
isReg() && !
TII->isInlineConstant(*OpToFold, OpInfo)) {
459 for (
unsigned i = 0, e = InstDesc.
getNumOperands(); i != e; ++i) {
460 auto &
Op =
MI->getOperand(i);
461 if (OpNo != i && !
Op.isReg() && !
TII->isInlineConstant(Op, OpInfo))
474 return !
TII->isSDWA(
MI);
480bool SIFoldOperands::getRegSeqInit(
484 if (!Def || !
Def->isRegSequence())
487 for (
unsigned I = 1,
E =
Def->getNumExplicitOperands();
I <
E;
I += 2) {
494 SubDef =
MRI->getVRegDef(Sub->
getReg())) {
497 if (
TII->isInlineConstant(*Op, OpTy))
501 if (!
Op->isReg() ||
Op->getReg().isPhysical())
506 Defs.emplace_back(Sub,
Def->getOperand(
I + 1).getImm());
512bool SIFoldOperands::tryToFoldACImm(
520 if ((OpTy < AMDGPU::OPERAND_REG_INLINE_AC_FIRST ||
521 OpTy > AMDGPU::OPERAND_REG_INLINE_AC_LAST) &&
522 (OpTy < AMDGPU::OPERAND_REG_INLINE_C_FIRST ||
523 OpTy > AMDGPU::OPERAND_REG_INLINE_C_LAST))
526 if (OpToFold.
isImm() &&
TII->isInlineConstant(OpToFold, OpTy) &&
532 if (!OpToFold.
isReg())
545 if (!UseOp.
getSubReg() && Def &&
TII->isFoldableCopy(*Def)) {
547 if (DefOp.
isImm() &&
TII->isInlineConstant(DefOp, OpTy) &&
555 if (!getRegSeqInit(Defs,
UseReg, OpTy))
559 for (
unsigned I = 0,
E = Defs.
size();
I !=
E; ++
I) {
564 auto SubImm =
Op->getImm();
567 if (!
TII->isInlineConstant(*Op, OpTy) ||
581void SIFoldOperands::foldOperand(
589 if (!isUseSafeToFold(*
UseMI, UseOp))
611 if (RSUse.getSubReg() != RegSeqDstSubReg)
614 foldOperand(OpToFold, RSUseMI, RSUseMI->
getOperandNo(&RSUse), FoldList,
629 if (
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::srsrc)->getReg() !=
630 MFI->getScratchRSrcReg())
636 *
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::soffset);
647 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr) &&
648 !AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::saddr)) {
649 unsigned NewOpc = AMDGPU::getFlatScratchInstSSfromSV(Opc);
656 bool FoldingImmLike =
674 if (
TRI->isSGPRClass(SrcRC) &&
TRI->hasVectorRegisters(DestRC)) {
676 for (
auto &
Use :
MRI->use_nodbg_operands(DestReg)) {
678 if (
Use.isImplicit())
682 Use.getParent()->getOperandNo(&
Use),
686 for (
auto &
F : CopyUses) {
687 foldOperand(*
F.OpToFold,
F.UseMI,
F.UseOpNo, FoldList,
692 if (DestRC == &AMDGPU::AGPR_32RegClass &&
693 TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
704 unsigned MovOp =
TII->getMovOpcode(DestRC);
705 if (MovOp == AMDGPU::COPY)
711 while (ImpOpI != ImpOpE) {
739 getRegSeqInit(Defs,
UseReg, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
750 for (
unsigned I = 0;
I <
Size / 4; ++
I) {
754 TII->isInlineConstant(*Def, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
755 int64_t
Imm =
Def->getImm();
757 auto Tmp =
MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
759 TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp).
addImm(Imm);
761 }
else if (
Def->isReg() &&
TRI->isAGPR(*
MRI,
Def->getReg())) {
763 Def->setIsKill(
false);
764 if (!SeenAGPRs.
insert(Src)) {
775 Def->setIsKill(
false);
781 if (
TRI->isSGPRReg(*
MRI, Src.Reg)) {
784 auto Tmp =
MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
790 if (CopyToVGPR.
Reg) {
792 if (VGPRCopies.
count(CopyToVGPR)) {
793 Vgpr = VGPRCopies[CopyToVGPR];
795 Vgpr =
MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
797 VGPRCopies[CopyToVGPR] = Vgpr;
799 auto Tmp =
MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
801 TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp).
addReg(Vgpr);
805 B.addImm(Defs[
I].second);
818 else if (
TRI->isVGPR(*
MRI, Reg0) &&
TRI->isAGPR(*
MRI, Reg1))
820 else if (
ST->hasGFX90AInsts() &&
TRI->isAGPR(*
MRI, Reg0) &&
827 if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
828 (UseOpc == AMDGPU::V_READLANE_B32 &&
830 AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) {
835 if (FoldingImmLike) {
844 if (OpToFold.
isImm())
881 if (!FoldingImmLike) {
882 if (OpToFold.
isReg() &&
ST->needsAlignedVGPRs()) {
886 if (
TRI->hasVectorRegisters(RC) && OpToFold.
getSubReg()) {
893 if (!RC || !
TRI->isProperlyAlignedRC(*RC))
911 if (UseOp.
getSubReg() && AMDGPU::getRegBitWidth(FoldRC->
getID()) == 64) {
915 if (AMDGPU::getRegBitWidth(UseRC->
getID()) != 64)
937 case AMDGPU::V_AND_B32_e64:
938 case AMDGPU::V_AND_B32_e32:
939 case AMDGPU::S_AND_B32:
942 case AMDGPU::V_OR_B32_e64:
943 case AMDGPU::V_OR_B32_e32:
944 case AMDGPU::S_OR_B32:
947 case AMDGPU::V_XOR_B32_e64:
948 case AMDGPU::V_XOR_B32_e32:
949 case AMDGPU::S_XOR_B32:
952 case AMDGPU::S_XNOR_B32:
955 case AMDGPU::S_NAND_B32:
958 case AMDGPU::S_NOR_B32:
961 case AMDGPU::S_ANDN2_B32:
964 case AMDGPU::S_ORN2_B32:
967 case AMDGPU::V_LSHL_B32_e64:
968 case AMDGPU::V_LSHL_B32_e32:
969 case AMDGPU::S_LSHL_B32:
971 Result =
LHS << (
RHS & 31);
973 case AMDGPU::V_LSHLREV_B32_e64:
974 case AMDGPU::V_LSHLREV_B32_e32:
975 Result =
RHS << (
LHS & 31);
977 case AMDGPU::V_LSHR_B32_e64:
978 case AMDGPU::V_LSHR_B32_e32:
979 case AMDGPU::S_LSHR_B32:
980 Result =
LHS >> (
RHS & 31);
982 case AMDGPU::V_LSHRREV_B32_e64:
983 case AMDGPU::V_LSHRREV_B32_e32:
984 Result =
RHS >> (
LHS & 31);
986 case AMDGPU::V_ASHR_I32_e64:
987 case AMDGPU::V_ASHR_I32_e32:
988 case AMDGPU::S_ASHR_I32:
989 Result =
static_cast<int32_t
>(
LHS) >> (
RHS & 31);
991 case AMDGPU::V_ASHRREV_I32_e64:
992 case AMDGPU::V_ASHRREV_I32_e32:
993 Result =
static_cast<int32_t
>(
RHS) >> (
LHS & 31);
1001 return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1005 MI.setDesc(NewDesc);
1014 for (
unsigned I =
MI.getNumOperands() - 1;
I >= NumOps; --
I)
1015 MI.removeOperand(
I);
1019SIFoldOperands::getImmOrMaterializedImm(
MachineOperand &Op)
const {
1021 if (!
Op.isReg() ||
Op.getSubReg() != AMDGPU::NoSubRegister ||
1022 !
Op.getReg().isVirtual())
1026 if (Def &&
Def->isMoveImmediate()) {
1039 unsigned Opc =
MI->getOpcode();
1041 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
1046 if ((Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
1047 Opc == AMDGPU::S_NOT_B32) &&
1049 MI->getOperand(1).ChangeToImmediate(~Src0->
getImm());
1054 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
1070 bool IsSGPR =
TRI->isSGPRReg(*
MRI,
MI->getOperand(0).getReg());
1074 MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
1075 MI->removeOperand(Src1Idx);
1080 if (!
MI->isCommutable())
1088 int32_t Src1Val =
static_cast<int32_t
>(Src1->
getImm());
1089 if (Opc == AMDGPU::V_OR_B32_e64 ||
1090 Opc == AMDGPU::V_OR_B32_e32 ||
1091 Opc == AMDGPU::S_OR_B32) {
1094 MI->removeOperand(Src1Idx);
1096 }
else if (Src1Val == -1) {
1098 MI->removeOperand(Src1Idx);
1106 if (Opc == AMDGPU::V_AND_B32_e64 || Opc == AMDGPU::V_AND_B32_e32 ||
1107 Opc == AMDGPU::S_AND_B32) {
1110 MI->removeOperand(Src0Idx);
1112 }
else if (Src1Val == -1) {
1114 MI->removeOperand(Src1Idx);
1122 if (Opc == AMDGPU::V_XOR_B32_e64 || Opc == AMDGPU::V_XOR_B32_e32 ||
1123 Opc == AMDGPU::S_XOR_B32) {
1126 MI->removeOperand(Src1Idx);
1137 unsigned Opc =
MI.getOpcode();
1138 if (Opc != AMDGPU::V_CNDMASK_B32_e32 && Opc != AMDGPU::V_CNDMASK_B32_e64 &&
1139 Opc != AMDGPU::V_CNDMASK_B64_PSEUDO)
1145 auto *Src0Imm = getImmOrMaterializedImm(*Src0);
1146 auto *Src1Imm = getImmOrMaterializedImm(*Src1);
1147 if (!Src1Imm->isIdenticalTo(*Src0Imm))
1152 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
1154 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
1155 if ((Src1ModIdx != -1 &&
MI.getOperand(Src1ModIdx).getImm() != 0) ||
1156 (Src0ModIdx != -1 &&
MI.getOperand(Src0ModIdx).getImm() != 0))
1162 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
1164 MI.removeOperand(Src2Idx);
1165 MI.removeOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
1166 if (Src1ModIdx != -1)
1167 MI.removeOperand(Src1ModIdx);
1168 if (Src0ModIdx != -1)
1169 MI.removeOperand(Src0ModIdx);
1175bool SIFoldOperands::tryFoldZeroHighBits(
MachineInstr &
MI)
const {
1176 if (
MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
1177 MI.getOpcode() != AMDGPU::V_AND_B32_e32)
1186 if (!
ST->zeroesHigh16BitsOfDest(SrcDef->
getOpcode()))
1191 MI.eraseFromParent();
1203 bool Changed =
false;
1205 if (OpToFold.
isImm()) {
1216 if (tryConstantFoldOp(&
UseMI)) {
1224 for (
auto &
Use :
MRI->use_nodbg_operands(Dst.getReg()))
1226 for (
auto *U : UsesToProcess) {
1232 if (CopiesToReplace.
empty() && FoldList.
empty())
1238 Copy->addImplicitDefUseOperands(*MF);
1240 for (FoldCandidate &Fold : FoldList) {
1241 assert(!Fold.isReg() || Fold.OpToFold);
1242 if (Fold.isReg() && Fold.OpToFold->getReg().isVirtual()) {
1252 assert(Fold.OpToFold && Fold.OpToFold->isReg());
1256 MRI->clearKillFlags(Fold.OpToFold->getReg());
1259 <<
static_cast<int>(Fold.UseOpNo) <<
" of "
1261 }
else if (Fold.Commuted) {
1263 TII->commuteInstruction(*Fold.UseMI,
false);
1269bool SIFoldOperands::tryFoldFoldableCopy(
1273 if (
MI.getOperand(0).getReg() == AMDGPU::M0) {
1275 if (CurrentKnownM0Val && CurrentKnownM0Val->
isIdenticalTo(NewM0Val)) {
1276 MI.eraseFromParent();
1291 if (!FoldingImm && !OpToFold.
isReg())
1303 if (!
MI.getOperand(0).getReg().isVirtual())
1306 bool Changed = foldInstOperand(
MI, OpToFold);
1313 auto *InstToErase = &
MI;
1314 while (
MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
1315 auto &
SrcOp = InstToErase->getOperand(1);
1317 InstToErase->eraseFromParent();
1319 InstToErase =
nullptr;
1322 InstToErase =
MRI->getVRegDef(SrcReg);
1323 if (!InstToErase || !
TII->isFoldableCopy(*InstToErase))
1327 if (InstToErase && InstToErase->isRegSequence() &&
1328 MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
1329 InstToErase->eraseFromParent();
1339 unsigned Op =
MI.getOpcode();
1341 case AMDGPU::V_MAX_F32_e64:
1342 case AMDGPU::V_MAX_F16_e64:
1343 case AMDGPU::V_MAX_F16_t16_e64:
1344 case AMDGPU::V_MAX_F64_e64:
1345 case AMDGPU::V_PK_MAX_F16: {
1346 if (!
TII->getNamedOperand(
MI, AMDGPU::OpName::clamp)->getImm())
1355 Src0->
getSubReg() != AMDGPU::NoSubRegister)
1359 if (
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
1363 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0_modifiers)->getImm();
1365 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1_modifiers)->getImm();
1371 if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
1383 if (!ClampSrc || !
MRI->hasOneNonDBGUser(ClampSrc->
getReg()))
1389 if (
TII->getClampMask(*Def) !=
TII->getClampMask(
MI))
1396 LLVM_DEBUG(
dbgs() <<
"Folding clamp " << *DefClamp <<
" into " << *Def);
1400 MRI->replaceRegWith(
MI.getOperand(0).getReg(),
Def->getOperand(0).getReg());
1401 MI.eraseFromParent();
1406 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
1407 Def->eraseFromParent();
1414 case AMDGPU::V_MUL_F64_e64: {
1416 case 0x3fe0000000000000:
1418 case 0x4000000000000000:
1420 case 0x4010000000000000:
1426 case AMDGPU::V_MUL_F32_e64: {
1427 switch (
static_cast<uint32_t>(Val)) {
1438 case AMDGPU::V_MUL_F16_e64:
1439 case AMDGPU::V_MUL_F16_t16_e64: {
1440 switch (
static_cast<uint16_t>(Val)) {
1459std::pair<const MachineOperand *, int>
1461 unsigned Op =
MI.getOpcode();
1463 case AMDGPU::V_MUL_F64_e64:
1464 case AMDGPU::V_MUL_F32_e64:
1465 case AMDGPU::V_MUL_F16_t16_e64:
1466 case AMDGPU::V_MUL_F16_e64: {
1468 if ((Op == AMDGPU::V_MUL_F32_e64 &&
1470 ((Op == AMDGPU::V_MUL_F64_e64 || Op == AMDGPU::V_MUL_F16_e64 ||
1471 Op == AMDGPU::V_MUL_F16_t16_e64) &&
1479 if (Src0->
isImm()) {
1482 }
else if (Src1->
isImm()) {
1490 TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) ||
1491 TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) ||
1492 TII->hasModifiersSet(
MI, AMDGPU::OpName::omod) ||
1493 TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp))
1496 return std::pair(RegOp, OMod);
1498 case AMDGPU::V_ADD_F64_e64:
1499 case AMDGPU::V_ADD_F32_e64:
1500 case AMDGPU::V_ADD_F16_e64:
1501 case AMDGPU::V_ADD_F16_t16_e64: {
1503 if ((Op == AMDGPU::V_ADD_F32_e64 &&
1505 ((Op == AMDGPU::V_ADD_F64_e64 || Op == AMDGPU::V_ADD_F16_e64 ||
1506 Op == AMDGPU::V_ADD_F16_t16_e64) &&
1516 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) &&
1517 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) &&
1518 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp) &&
1519 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
1533 std::tie(RegOp, OMod) = isOMod(
MI);
1535 RegOp->
getSubReg() != AMDGPU::NoSubRegister ||
1536 !
MRI->hasOneNonDBGUser(RegOp->
getReg()))
1546 if (
TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
1552 MRI->replaceRegWith(
MI.getOperand(0).getReg(),
Def->getOperand(0).getReg());
1553 MI.eraseFromParent();
1558 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
1559 Def->eraseFromParent();
1568 auto Reg =
MI.getOperand(0).getReg();
1570 if (!
ST->hasGFX90AInsts() || !
TRI->isVGPR(*
MRI, Reg) ||
1571 !
MRI->hasOneNonDBGUse(Reg))
1578 for (
auto &Def : Defs) {
1579 const auto *
Op =
Def.first;
1582 if (
TRI->isAGPR(*
MRI,
Op->getReg()))
1596 if (!
TRI->isVGPR(*
MRI, Reg) || !
MRI->hasOneNonDBGUse(Reg))
1598 Op = &*
MRI->use_nodbg_begin(Reg);
1602 if (
Op->getSubReg())
1608 TII->getRegClass(InstDesc, OpIdx,
TRI, *
MI.getMF());
1609 if (!OpRC || !
TRI->isVectorSuperClass(OpRC))
1612 const auto *NewDstRC =
TRI->getEquivalentAGPRClass(
MRI->getRegClass(Reg));
1613 auto Dst =
MRI->createVirtualRegister(NewDstRC);
1615 TII->get(AMDGPU::REG_SEQUENCE), Dst);
1617 for (
unsigned I = 0;
I < Defs.size(); ++
I) {
1619 Def->setIsKill(
false);
1627 RS.addImm(Defs[
I].second);
1631 if (!
TII->isOperandLegal(*
UseMI, OpIdx, Op)) {
1633 RS->eraseFromParent();
1641 if (
MRI->use_nodbg_empty(
MI.getOperand(0).getReg()))
1642 MI.eraseFromParent();
1679 if (!
TRI->isVGPR(*
MRI, PhiOut))
1685 for (
unsigned K = 1;
K <
PHI.getNumExplicitOperands();
K += 2) {
1693 if (!Copy || !
Copy->isCopy())
1711 for (
unsigned K = 1;
K <
PHI.getNumExplicitOperands();
K += 2) {
1719 bool UseAccVGPRWrite =
false;
1724 if (
Def->isCopy()) {
1737 if (!
ST->hasGFX90AInsts() && !
MRI->hasOneNonDBGUse(Reg) &&
1739 UseAccVGPRWrite =
true;
1742 InsertPt = ++
Def->getIterator();
1743 InsertMBB =
Def->getParent();
1749 const unsigned CopyOpc =
1750 UseAccVGPRWrite ? AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::COPY;
1751 Register NewReg =
MRI->createVirtualRegister(ARC);
1753 TII->get(CopyOpc), NewReg)
1762 Register NewReg =
MRI->createVirtualRegister(ARC);
1763 PHI.getOperand(0).setReg(NewReg);
1769 TII->get(AMDGPU::COPY), PhiOut)
1779 if (!
ST->hasGFX90AInsts() ||
MI.getNumExplicitDefs() != 1)
1800 while (!
Users.empty()) {
1802 if (!
I->isCopy() && !
I->isRegSequence())
1804 Register DstReg =
I->getOperand(0).getReg();
1808 if (
TRI->isAGPR(*
MRI, DstReg))
1812 Users.push_back(&U);
1816 MRI->setRegClass(DefReg,
TRI->getEquivalentAGPRClass(RC));
1817 if (!
TII->isOperandLegal(
MI, 0, &Def)) {
1818 MRI->setRegClass(DefReg, RC);
1822 while (!MoveRegs.
empty()) {
1824 MRI->setRegClass(Reg,
TRI->getEquivalentAGPRClass(
MRI->getRegClass(Reg)));
1867 if (
ST->hasGFX90AInsts())
1874 for (
auto &
MI :
MBB) {
1878 if (!
TRI->isAGPR(*
MRI,
MI.getOperand(0).getReg()))
1881 for (
unsigned K = 1;
K <
MI.getNumOperands();
K += 2) {
1889 bool Changed =
false;
1890 for (
const auto &[Entry, MOs] : RegToMO) {
1891 if (MOs.size() == 1)
1902 MRI->createVirtualRegister(
TRI->getEquivalentVGPRClass(ARC));
1905 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TempVGPR)
1909 Register TempAGPR =
MRI->createVirtualRegister(ARC);
1911 TII->get(AMDGPU::COPY), TempAGPR)
1933 TII =
ST->getInstrInfo();
1934 TRI = &
TII->getRegisterInfo();
1942 bool HasNSZ = MFI->hasNoSignedZerosFPMath();
1944 bool Changed =
false;
1948 Changed |= tryFoldCndMask(
MI);
1950 if (tryFoldZeroHighBits(
MI)) {
1955 if (
MI.isRegSequence() && tryFoldRegSequence(
MI)) {
1960 if (
MI.isPHI() && tryFoldPhiAGPR(
MI)) {
1965 if (
MI.mayLoad() && tryFoldLoad(
MI)) {
1970 if (
TII->isFoldableCopy(
MI)) {
1971 Changed |= tryFoldFoldableCopy(
MI, CurrentKnownM0Val);
1976 if (CurrentKnownM0Val &&
MI.modifiesRegister(AMDGPU::M0,
TRI))
1977 CurrentKnownM0Val =
nullptr;
1983 Changed |= tryFoldClamp(
MI);
1986 Changed |= tryOptimizeAGPRPhis(*
MBB);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat)
Updates the operand at Idx in instruction Inst with the result of instruction Mat.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
AMD GCN specific subclass of TargetSubtarget.
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
iv Induction Variable Users
unsigned const TargetRegisterInfo * TRI
static bool isReg(const MCInst &MI, unsigned OpNo)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static unsigned macToMad(unsigned Opc)
static const TargetRegisterClass * getRegOpRC(const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const MachineOperand &MO)
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
static int getOModValue(unsigned Opc, int64_t Val)
static bool isUseMIInFoldList(ArrayRef< FoldCandidate > FoldList, const MachineInstr *MI)
static unsigned getMovOpc(bool IsScalar)
static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc)
static void appendFoldCandidate(SmallVectorImpl< FoldCandidate > &FoldList, MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp, bool Commuted=false, int ShrinkOp=-1)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isImm(const MachineOperand &MO, MachineRegisterInfo *MRI)
Class for arbitrary precision integers.
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
FunctionPass class - This class is used to implement most global optimizations.
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
ArrayRef< MCPhysReg > implicit_uses() const
Return a list of registers that are potentially read by any instance of this machine instruction.
This holds information about one operand of a machine instruction, indicating the register class for ...
LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
@ LQR_Dead
Register is known to be fully dead.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
unsigned getNumOperands() const
Retuns the total number of operands.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool isRegSequence() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
iterator_range< mop_iterator > implicit_operands()
const MachineOperand & getOperand(unsigned i) const
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_FrameIndex
Abstract Stack Frame Index.
@ MO_Register
Register operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
SIModeRegisterDefaults getMode() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
unsigned getID() const
Return the register class ID number.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionPass * createSIFoldOperandsPass()
void initializeSIFoldOperandsPass(PassRegistry &)
iterator_range< df_iterator< T > > depth_first(const T &G)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
A pair composed of a register and a sub-register index.