23#define DEBUG_TYPE "si-fold-operands"
44 unsigned DefSubReg = AMDGPU::NoSubRegister;
49 FoldableDef() =
delete;
51 unsigned DefSubReg = AMDGPU::NoSubRegister)
52 : DefRC(DefRC), DefSubReg(DefSubReg), Kind(FoldOp.
getType()) {
55 ImmToFold = FoldOp.
getImm();
56 }
else if (FoldOp.
isFI()) {
57 FrameIndexToFold = FoldOp.
getIndex();
67 unsigned DefSubReg = AMDGPU::NoSubRegister)
68 : ImmToFold(FoldImm), DefRC(DefRC), DefSubReg(DefSubReg),
73 FoldableDef Copy(*
this);
74 Copy.DefSubReg =
TRI.composeSubRegIndices(DefSubReg,
SubReg);
82 return OpToFold->getReg();
85 unsigned getSubReg()
const {
87 return OpToFold->getSubReg();
98 return FrameIndexToFold;
106 std::optional<int64_t> getEffectiveImmVal()
const {
114 unsigned OpIdx)
const {
117 std::optional<int64_t> ImmToFold = getEffectiveImmVal();
127 if (DefSubReg != AMDGPU::NoSubRegister)
135 if (DefSubReg != AMDGPU::NoSubRegister)
137 return TII.isOperandLegal(
MI,
OpIdx, OpToFold);
144struct FoldCandidate {
152 bool Commuted =
false,
int ShrinkOp = -1)
153 :
UseMI(
MI), Def(Def), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
154 Commuted(Commuted) {}
156 bool isFI()
const {
return Def.isFI(); }
160 return Def.FrameIndexToFold;
163 bool isImm()
const {
return Def.isImm(); }
165 bool isReg()
const {
return Def.isReg(); }
169 bool isGlobal()
const {
return Def.isGlobal(); }
171 bool needsShrink()
const {
return ShrinkOpcode != -1; }
174class SIFoldOperandsImpl {
184 const FoldableDef &OpToFold)
const;
187 unsigned convertToVALUOp(
unsigned Opc,
bool UseVOP3 =
false)
const {
189 case AMDGPU::S_ADD_I32: {
190 if (ST->hasAddNoCarryInsts())
191 return UseVOP3 ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_U32_e32;
192 return UseVOP3 ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
194 case AMDGPU::S_OR_B32:
195 return UseVOP3 ? AMDGPU::V_OR_B32_e64 : AMDGPU::V_OR_B32_e32;
196 case AMDGPU::S_AND_B32:
197 return UseVOP3 ? AMDGPU::V_AND_B32_e64 : AMDGPU::V_AND_B32_e32;
198 case AMDGPU::S_MUL_I32:
199 return AMDGPU::V_MUL_LO_U32_e64;
201 return AMDGPU::INSTRUCTION_LIST_END;
205 bool foldCopyToVGPROfScalarAddOfFrameIndex(
Register DstReg,
Register SrcReg,
211 int64_t ImmVal)
const;
215 int64_t ImmVal)
const;
219 const FoldableDef &OpToFold)
const;
228 getRegSeqInit(
SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
231 std::pair<int64_t, const TargetRegisterClass *>
248 bool foldInstOperand(
MachineInstr &
MI,
const FoldableDef &OpToFold)
const;
250 bool foldCopyToAGPRRegSequence(
MachineInstr *CopyMI)
const;
257 std::pair<const MachineOperand *, int> isOMod(
const MachineInstr &
MI)
const;
266 SIFoldOperandsImpl() =
default;
280 return SIFoldOperandsImpl().run(MF);
283 StringRef getPassName()
const override {
return "SI Fold Operands"; }
300char SIFoldOperandsLegacy::
ID = 0;
309 TRI.getSubRegisterClass(RC, MO.getSubReg()))
317 case AMDGPU::V_MAC_F32_e64:
318 return AMDGPU::V_MAD_F32_e64;
319 case AMDGPU::V_MAC_F16_e64:
320 return AMDGPU::V_MAD_F16_e64;
321 case AMDGPU::V_FMAC_F32_e64:
322 return AMDGPU::V_FMA_F32_e64;
323 case AMDGPU::V_FMAC_F16_e64:
324 return AMDGPU::V_FMA_F16_gfx9_e64;
325 case AMDGPU::V_FMAC_F16_t16_e64:
326 return AMDGPU::V_FMA_F16_gfx9_t16_e64;
327 case AMDGPU::V_FMAC_F16_fake16_e64:
328 return AMDGPU::V_FMA_F16_gfx9_fake16_e64;
329 case AMDGPU::V_FMAC_LEGACY_F32_e64:
330 return AMDGPU::V_FMA_LEGACY_F32_e64;
331 case AMDGPU::V_FMAC_F64_e64:
332 return AMDGPU::V_FMA_F64_e64;
334 return AMDGPU::INSTRUCTION_LIST_END;
340 const FoldableDef &OpToFold)
const {
341 if (!OpToFold.isFI())
344 const unsigned Opc =
UseMI.getOpcode();
346 case AMDGPU::S_ADD_I32:
347 case AMDGPU::S_ADD_U32:
348 case AMDGPU::V_ADD_U32_e32:
349 case AMDGPU::V_ADD_CO_U32_e32:
353 return UseMI.getOperand(OpNo == 1 ? 2 : 1).isImm() &&
355 case AMDGPU::V_ADD_U32_e64:
356 case AMDGPU::V_ADD_CO_U32_e64:
357 return UseMI.getOperand(OpNo == 2 ? 3 : 2).isImm() &&
364 return OpNo == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
368 int SIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
372 int VIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
373 return OpNo == VIdx && SIdx == -1;
379bool SIFoldOperandsImpl::foldCopyToVGPROfScalarAddOfFrameIndex(
381 if (
TRI->isVGPR(*
MRI, DstReg) &&
TRI->isSGPRReg(*
MRI, SrcReg) &&
382 MRI->hasOneNonDBGUse(SrcReg)) {
383 MachineInstr *
Def =
MRI->getVRegDef(SrcReg);
384 if (!Def ||
Def->getNumOperands() != 4)
387 MachineOperand *Src0 = &
Def->getOperand(1);
388 MachineOperand *Src1 = &
Def->getOperand(2);
399 const bool UseVOP3 = !Src0->
isImm() ||
TII->isInlineConstant(*Src0);
400 unsigned NewOp = convertToVALUOp(
Def->getOpcode(), UseVOP3);
401 if (NewOp == AMDGPU::INSTRUCTION_LIST_END ||
402 !
Def->getOperand(3).isDead())
405 MachineBasicBlock *
MBB =
Def->getParent();
407 if (NewOp != AMDGPU::V_ADD_CO_U32_e32) {
408 MachineInstrBuilder
Add =
411 if (
Add->getDesc().getNumDefs() == 2) {
412 Register CarryOutReg =
MRI->createVirtualRegister(
TRI->getBoolRC());
413 Add.addDef(CarryOutReg, RegState::Dead);
414 MRI->setRegAllocationHint(CarryOutReg, 0,
TRI->getVCC());
417 Add.add(*Src0).add(*Src1).setMIFlags(
Def->getFlags());
421 Def->eraseFromParent();
422 MI.eraseFromParent();
426 assert(NewOp == AMDGPU::V_ADD_CO_U32_e32);
437 Def->eraseFromParent();
438 MI.eraseFromParent();
447 return new SIFoldOperandsLegacy();
450bool SIFoldOperandsImpl::canUseImmWithOpSel(
const MachineInstr *
MI,
452 int64_t ImmVal)
const {
453 const uint64_t TSFlags =
MI->getDesc().TSFlags;
461 int OpNo =
MI->getOperandNo(&Old);
463 unsigned Opcode =
MI->getOpcode();
464 uint8_t OpType =
TII->get(Opcode).operands()[OpNo].OperandType;
486bool SIFoldOperandsImpl::tryFoldImmWithOpSel(MachineInstr *
MI,
unsigned UseOpNo,
487 int64_t ImmVal)
const {
488 MachineOperand &Old =
MI->getOperand(UseOpNo);
489 unsigned Opcode =
MI->getOpcode();
490 int OpNo =
MI->getOperandNo(&Old);
491 uint8_t OpType =
TII->get(Opcode).operands()[OpNo].OperandType;
503 AMDGPU::OpName ModName = AMDGPU::OpName::NUM_OPERAND_NAMES;
504 unsigned SrcIdx = ~0;
505 if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0)) {
506 ModName = AMDGPU::OpName::src0_modifiers;
508 }
else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1)) {
509 ModName = AMDGPU::OpName::src1_modifiers;
511 }
else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2)) {
512 ModName = AMDGPU::OpName::src2_modifiers;
515 assert(ModName != AMDGPU::OpName::NUM_OPERAND_NAMES);
516 int ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModName);
517 MachineOperand &
Mod =
MI->getOperand(ModIdx);
518 unsigned ModVal =
Mod.getImm();
524 uint32_t
Imm = (
static_cast<uint32_t
>(ImmHi) << 16) | ImmLo;
529 auto tryFoldToInline = [&](uint32_t
Imm) ->
bool {
538 uint16_t
Lo =
static_cast<uint16_t
>(
Imm);
539 uint16_t
Hi =
static_cast<uint16_t
>(
Imm >> 16);
542 Mod.setImm(NewModVal);
547 if (
static_cast<int16_t
>(
Lo) < 0) {
548 int32_t SExt =
static_cast<int16_t
>(
Lo);
550 Mod.setImm(NewModVal);
565 uint32_t Swapped = (
static_cast<uint32_t
>(
Lo) << 16) |
Hi;
576 if (tryFoldToInline(Imm))
585 bool IsUAdd = Opcode == AMDGPU::V_PK_ADD_U16;
586 bool IsUSub = Opcode == AMDGPU::V_PK_SUB_U16;
587 if (SrcIdx == 1 && (IsUAdd || IsUSub)) {
589 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::clamp);
590 bool Clamp =
MI->getOperand(ClampIdx).getImm() != 0;
593 uint16_t NegLo = -
static_cast<uint16_t
>(
Imm);
594 uint16_t NegHi = -
static_cast<uint16_t
>(
Imm >> 16);
595 uint32_t NegImm = (
static_cast<uint32_t
>(NegHi) << 16) | NegLo;
597 if (tryFoldToInline(NegImm)) {
599 IsUAdd ? AMDGPU::V_PK_SUB_U16 : AMDGPU::V_PK_ADD_U16;
600 MI->setDesc(
TII->get(NegOpcode));
609bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold)
const {
610 MachineInstr *
MI = Fold.UseMI;
611 MachineOperand &Old =
MI->getOperand(Fold.UseOpNo);
614 std::optional<int64_t> ImmVal;
616 ImmVal = Fold.Def.getEffectiveImmVal();
618 if (ImmVal && canUseImmWithOpSel(Fold.UseMI, Fold.UseOpNo, *ImmVal)) {
619 if (tryFoldImmWithOpSel(Fold.UseMI, Fold.UseOpNo, *ImmVal))
625 int OpNo =
MI->getOperandNo(&Old);
626 if (!
TII->isOperandLegal(*
MI, OpNo, &New))
632 if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
633 MachineBasicBlock *
MBB =
MI->getParent();
640 int Op32 = Fold.ShrinkOpcode;
641 MachineOperand &Dst0 =
MI->getOperand(0);
642 MachineOperand &Dst1 =
MI->getOperand(1);
645 bool HaveNonDbgCarryUse = !
MRI->use_nodbg_empty(Dst1.
getReg());
647 const TargetRegisterClass *Dst0RC =
MRI->getRegClass(Dst0.
getReg());
648 Register NewReg0 =
MRI->createVirtualRegister(Dst0RC);
650 MachineInstr *Inst32 =
TII->buildShrunkInst(*
MI, Op32);
652 if (HaveNonDbgCarryUse) {
655 .
addReg(AMDGPU::VCC, RegState::Kill);
665 for (
unsigned I =
MI->getNumOperands() - 1;
I > 0; --
I)
666 MI->removeOperand(
I);
667 MI->setDesc(
TII->get(AMDGPU::IMPLICIT_DEF));
670 TII->commuteInstruction(*Inst32,
false);
674 assert(!Fold.needsShrink() &&
"not handled");
679 if (NewMFMAOpc == -1)
681 MI->setDesc(
TII->get(NewMFMAOpc));
682 MI->untieRegOperand(0);
683 const MCInstrDesc &MCID =
MI->getDesc();
684 for (
unsigned I = 0;
I <
MI->getNumDefs(); ++
I)
686 MI->getOperand(
I).setIsEarlyClobber(
true);
691 int OpNo =
MI->getOperandNo(&Old);
692 if (!
TII->isOperandLegal(*
MI, OpNo, &New))
699 if (Fold.isGlobal()) {
700 Old.
ChangeToGA(Fold.Def.OpToFold->getGlobal(),
701 Fold.Def.OpToFold->getOffset(),
702 Fold.Def.OpToFold->getTargetFlags());
711 MachineOperand *
New = Fold.Def.OpToFold;
714 if (
const TargetRegisterClass *OpRC =
715 TII->getRegClass(
MI->getDesc(), Fold.UseOpNo)) {
716 const TargetRegisterClass *NewRC =
717 TRI->getRegClassForReg(*
MRI,
New->getReg());
719 const TargetRegisterClass *ConstrainRC = OpRC;
720 if (
New->getSubReg()) {
722 TRI->getMatchingSuperRegClass(NewRC, OpRC,
New->getSubReg());
728 if (
New->getReg().isVirtual() &&
729 !
MRI->constrainRegClass(
New->getReg(), ConstrainRC)) {
731 <<
TRI->getRegClassName(ConstrainRC) <<
'\n');
740 if (
New->getReg().isPhysical()) {
750 FoldCandidate &&Entry) {
752 for (FoldCandidate &Fold : FoldList)
753 if (Fold.UseMI == Entry.UseMI && Fold.UseOpNo == Entry.UseOpNo)
755 LLVM_DEBUG(
dbgs() <<
"Append " << (Entry.Commuted ?
"commuted" :
"normal")
756 <<
" operand " << Entry.UseOpNo <<
"\n " << *Entry.UseMI);
762 const FoldableDef &FoldOp,
763 bool Commuted =
false,
int ShrinkOp = -1) {
765 FoldCandidate(
MI, OpNo, FoldOp, Commuted, ShrinkOp));
773 if (!ST->hasPKF32InstsReplicatingLower32BitsOfScalarInput())
783 const FoldableDef &OpToFold) {
784 assert(OpToFold.isImm() &&
"Expected immediate operand");
785 uint64_t ImmVal = OpToFold.getEffectiveImmVal().value();
791bool SIFoldOperandsImpl::tryAddToFoldList(
792 SmallVectorImpl<FoldCandidate> &FoldList, MachineInstr *
MI,
unsigned OpNo,
793 const FoldableDef &OpToFold)
const {
794 const unsigned Opc =
MI->getOpcode();
796 auto tryToFoldAsFMAAKorMK = [&]() {
797 if (!OpToFold.isImm())
800 const bool TryAK = OpNo == 3;
801 const unsigned NewOpc = TryAK ? AMDGPU::S_FMAAK_F32 : AMDGPU::S_FMAMK_F32;
802 MI->setDesc(
TII->get(NewOpc));
805 bool FoldAsFMAAKorMK =
806 tryAddToFoldList(FoldList,
MI, TryAK ? 3 : 2, OpToFold);
807 if (FoldAsFMAAKorMK) {
809 MI->untieRegOperand(3);
812 MachineOperand &Op1 =
MI->getOperand(1);
813 MachineOperand &Op2 =
MI->getOperand(2);
830 bool IsLegal = OpToFold.isOperandLegal(*
TII, *
MI, OpNo);
831 if (!IsLegal && OpToFold.isImm()) {
832 if (std::optional<int64_t> ImmVal = OpToFold.getEffectiveImmVal())
833 IsLegal = canUseImmWithOpSel(
MI, OpNo, *ImmVal);
839 if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
842 MI->setDesc(
TII->get(NewOpc));
847 bool FoldAsMAD = tryAddToFoldList(FoldList,
MI, OpNo, OpToFold);
849 MI->untieRegOperand(OpNo);
853 MI->removeOperand(
MI->getNumExplicitOperands() - 1);
859 if (
Opc == AMDGPU::S_FMAC_F32 && OpNo == 3) {
860 if (tryToFoldAsFMAAKorMK())
865 if (OpToFold.isImm()) {
867 if (
Opc == AMDGPU::S_SETREG_B32)
868 ImmOpc = AMDGPU::S_SETREG_IMM32_B32;
869 else if (
Opc == AMDGPU::S_SETREG_B32_mode)
870 ImmOpc = AMDGPU::S_SETREG_IMM32_B32_mode;
872 MI->setDesc(
TII->get(ImmOpc));
881 bool CanCommute =
TII->findCommutedOpIndices(*
MI, OpNo, CommuteOpNo);
885 MachineOperand &
Op =
MI->getOperand(OpNo);
886 MachineOperand &CommutedOp =
MI->getOperand(CommuteOpNo);
892 if (!
Op.isReg() || !CommutedOp.
isReg())
897 if (
Op.isReg() && CommutedOp.
isReg() &&
898 (
Op.getReg() == CommutedOp.
getReg() &&
902 if (!
TII->commuteInstruction(*
MI,
false, OpNo, CommuteOpNo))
906 if (!OpToFold.isOperandLegal(*
TII, *
MI, CommuteOpNo)) {
907 if ((
Opc != AMDGPU::V_ADD_CO_U32_e64 &&
Opc != AMDGPU::V_SUB_CO_U32_e64 &&
908 Opc != AMDGPU::V_SUBREV_CO_U32_e64) ||
909 (!OpToFold.isImm() && !OpToFold.isFI() && !OpToFold.isGlobal())) {
910 TII->commuteInstruction(*
MI,
false, OpNo, CommuteOpNo);
916 MachineOperand &OtherOp =
MI->getOperand(OpNo);
917 if (!OtherOp.
isReg() ||
924 unsigned MaybeCommutedOpc =
MI->getOpcode();
938 if (
Opc == AMDGPU::S_FMAC_F32 &&
939 (OpNo != 1 || !
MI->getOperand(1).isIdenticalTo(
MI->getOperand(2)))) {
940 if (tryToFoldAsFMAAKorMK())
946 if (OpToFold.isImm() &&
955bool SIFoldOperandsImpl::isUseSafeToFold(
const MachineInstr &
MI,
956 const MachineOperand &UseMO)
const {
958 return !
TII->isSDWA(
MI);
966 SubDef &&
TII.isFoldableCopy(*SubDef);
967 SubDef =
MRI.getVRegDef(
Sub->getReg())) {
968 unsigned SrcIdx =
TII.getFoldableCopySrcIdx(*SubDef);
977 if (
SrcOp.getSubReg())
984const TargetRegisterClass *SIFoldOperandsImpl::getRegSeqInit(
985 MachineInstr &RegSeq,
986 SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs)
const {
990 const TargetRegisterClass *RC =
nullptr;
1000 else if (!
TRI->getCommonSubClass(RC, OpRC))
1005 Defs.emplace_back(&SrcOp, SubRegIdx);
1010 if (DefSrc && (DefSrc->
isReg() || DefSrc->
isImm())) {
1011 Defs.emplace_back(DefSrc, SubRegIdx);
1015 Defs.emplace_back(&SrcOp, SubRegIdx);
1024const TargetRegisterClass *SIFoldOperandsImpl::getRegSeqInit(
1025 SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
1028 if (!Def || !
Def->isRegSequence())
1031 return getRegSeqInit(*Def, Defs);
1034std::pair<int64_t, const TargetRegisterClass *>
1035SIFoldOperandsImpl::isRegSeqSplat(MachineInstr &RegSeq)
const {
1037 const TargetRegisterClass *SrcRC = getRegSeqInit(RegSeq, Defs);
1041 bool TryToMatchSplat64 =
false;
1044 for (
unsigned I = 0,
E = Defs.
size();
I !=
E; ++
I) {
1045 const MachineOperand *
Op = Defs[
I].first;
1049 int64_t SubImm =
Op->getImm();
1055 if (Imm != SubImm) {
1056 if (
I == 1 && (
E & 1) == 0) {
1059 TryToMatchSplat64 =
true;
1067 if (!TryToMatchSplat64)
1068 return {Defs[0].first->getImm(), SrcRC};
1073 for (
unsigned I = 0,
E = Defs.
size();
I !=
E;
I += 2) {
1074 const MachineOperand *Op0 = Defs[
I].first;
1075 const MachineOperand *Op1 = Defs[
I + 1].first;
1080 unsigned SubReg0 = Defs[
I].second;
1081 unsigned SubReg1 = Defs[
I + 1].second;
1085 if (
TRI->getChannelFromSubReg(SubReg0) + 1 !=
1086 TRI->getChannelFromSubReg(SubReg1))
1091 SplatVal64 = MergedVal;
1092 else if (SplatVal64 != MergedVal)
1096 const TargetRegisterClass *RC64 =
TRI->getSubRegisterClass(
1099 return {SplatVal64, RC64};
1102bool SIFoldOperandsImpl::tryFoldRegSeqSplat(
1103 MachineInstr *
UseMI,
unsigned UseOpIdx, int64_t SplatVal,
1104 const TargetRegisterClass *SplatRC)
const {
1106 if (UseOpIdx >=
Desc.getNumOperands())
1113 int16_t RCID =
TII->getOpRegClassID(
Desc.operands()[UseOpIdx]);
1117 const TargetRegisterClass *OpRC =
TRI->getRegClass(RCID);
1122 if (SplatVal != 0 && SplatVal != -1) {
1126 uint8_t OpTy =
Desc.operands()[UseOpIdx].OperandType;
1132 OpRC =
TRI->getSubRegisterClass(OpRC, AMDGPU::sub0);
1137 OpRC =
TRI->getSubRegisterClass(OpRC, AMDGPU::sub0_sub1);
1143 if (!
TRI->getCommonSubClass(OpRC, SplatRC))
1148 if (!
TII->isOperandLegal(*
UseMI, UseOpIdx, &TmpOp))
1154bool SIFoldOperandsImpl::tryToFoldACImm(
1155 const FoldableDef &OpToFold, MachineInstr *
UseMI,
unsigned UseOpIdx,
1156 SmallVectorImpl<FoldCandidate> &FoldList)
const {
1158 if (UseOpIdx >=
Desc.getNumOperands())
1165 if (OpToFold.isImm() && OpToFold.isOperandLegal(*
TII, *
UseMI, UseOpIdx)) {
1176void SIFoldOperandsImpl::foldOperand(
1177 FoldableDef OpToFold, MachineInstr *
UseMI,
int UseOpIdx,
1178 SmallVectorImpl<FoldCandidate> &FoldList,
1179 SmallVectorImpl<MachineInstr *> &CopiesToReplace)
const {
1182 if (!isUseSafeToFold(*
UseMI, *UseOp))
1186 if (UseOp->
isReg() && OpToFold.isReg()) {
1190 if (UseOp->
getSubReg() != AMDGPU::NoSubRegister &&
1192 !
TRI->isSGPRReg(*
MRI, OpToFold.getReg())))
1204 const TargetRegisterClass *SplatRC;
1205 std::tie(SplatVal, SplatRC) = isRegSeqSplat(*
UseMI);
1210 for (
unsigned I = 0;
I != UsesToProcess.size(); ++
I) {
1211 MachineOperand *RSUse = UsesToProcess[
I];
1212 MachineInstr *RSUseMI = RSUse->
getParent();
1222 if (tryFoldRegSeqSplat(RSUseMI, OpNo, SplatVal, SplatRC)) {
1223 FoldableDef SplatDef(SplatVal, SplatRC);
1230 if (RSUse->
getSubReg() != RegSeqDstSubReg)
1235 foldOperand(OpToFold, RSUseMI, RSUseMI->
getOperandNo(RSUse), FoldList,
1242 if (tryToFoldACImm(OpToFold,
UseMI, UseOpIdx, FoldList))
1245 if (frameIndexMayFold(*
UseMI, UseOpIdx, OpToFold)) {
1250 if (
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::srsrc)->getReg() !=
1256 MachineOperand &SOff =
1257 *
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::soffset);
1268 TII->getNamedOperand(*
UseMI, AMDGPU::OpName::cpol)->getImm();
1283 bool FoldingImmLike =
1284 OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
1292 const TargetRegisterClass *SrcRC =
MRI->getRegClass(SrcReg);
1300 const TargetRegisterClass *DestRC =
TRI->getRegClassForReg(*
MRI, DestReg);
1303 for (
unsigned MovOp :
1304 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
1305 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64,
1306 AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO,
1307 AMDGPU::AV_MOV_B64_IMM_PSEUDO}) {
1308 const MCInstrDesc &MovDesc =
TII->get(MovOp);
1309 const TargetRegisterClass *MovDstRC =
1318 const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
1320 int16_t RegClassID =
TII->getOpRegClassID(MovDesc.
operands()[SrcIdx]);
1321 if (RegClassID != -1) {
1322 const TargetRegisterClass *MovSrcRC =
TRI->getRegClass(RegClassID);
1325 MovSrcRC =
TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
1329 if (MovOp == AMDGPU::AV_MOV_B32_IMM_PSEUDO &&
1330 (!OpToFold.isImm() ||
1331 !
TII->isImmOperandLegal(MovDesc, SrcIdx,
1332 *OpToFold.getEffectiveImmVal())))
1335 if (!
MRI->constrainRegClass(SrcReg, MovSrcRC))
1345 if (!OpToFold.isImm() ||
1346 !
TII->isImmOperandLegal(MovDesc, 1, *OpToFold.getEffectiveImmVal()))
1352 while (ImpOpI != ImpOpE) {
1359 if (MovOp == AMDGPU::V_MOV_B16_t16_e64) {
1361 MachineOperand NewSrcOp(SrcOp);
1383 LLVM_DEBUG(
dbgs() <<
"Folding " << OpToFold.OpToFold <<
"\n into "
1388 unsigned SubRegIdx = OpToFold.getSubReg();
1402 static_assert(AMDGPU::sub1_hi16 == 12,
"Subregister layout has changed");
1407 if (SubRegIdx > AMDGPU::sub1) {
1408 LaneBitmask
M =
TRI->getSubRegIndexLaneMask(SubRegIdx);
1409 M |=
M.getLane(
M.getHighestLane() - 1);
1410 SmallVector<unsigned, 4> Indexes;
1411 TRI->getCoveringSubRegIndexes(
TRI->getRegClassForReg(*
MRI,
UseReg), M,
1413 assert(Indexes.
size() == 1 &&
"Expected one 32-bit subreg to cover");
1414 SubRegIdx = Indexes[0];
1416 }
else if (
TII->getOpSize(*
UseMI, 1) == 4)
1419 SubRegIdx = AMDGPU::sub0;
1424 OpToFold.OpToFold->setIsKill(
false);
1428 if (foldCopyToAGPRRegSequence(
UseMI))
1433 if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
1434 (UseOpc == AMDGPU::V_READLANE_B32 &&
1436 AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) {
1441 if (FoldingImmLike) {
1444 *OpToFold.DefMI, *
UseMI))
1449 if (OpToFold.isImm()) {
1451 *OpToFold.getEffectiveImmVal());
1452 }
else if (OpToFold.isFI())
1455 assert(OpToFold.isGlobal());
1457 OpToFold.OpToFold->getOffset(),
1458 OpToFold.OpToFold->getTargetFlags());
1464 if (OpToFold.isReg() &&
TRI->isSGPRReg(*
MRI, OpToFold.getReg())) {
1467 *OpToFold.DefMI, *
UseMI))
1488 UseDesc.
operands()[UseOpIdx].RegClass == -1)
1496 tryAddToFoldList(FoldList,
UseMI, UseOpIdx, OpToFold);
1502 case AMDGPU::V_AND_B32_e64:
1503 case AMDGPU::V_AND_B32_e32:
1504 case AMDGPU::S_AND_B32:
1507 case AMDGPU::V_OR_B32_e64:
1508 case AMDGPU::V_OR_B32_e32:
1509 case AMDGPU::S_OR_B32:
1512 case AMDGPU::V_XOR_B32_e64:
1513 case AMDGPU::V_XOR_B32_e32:
1514 case AMDGPU::S_XOR_B32:
1517 case AMDGPU::S_XNOR_B32:
1520 case AMDGPU::S_NAND_B32:
1523 case AMDGPU::S_NOR_B32:
1526 case AMDGPU::S_ANDN2_B32:
1529 case AMDGPU::S_ORN2_B32:
1532 case AMDGPU::V_LSHL_B32_e64:
1533 case AMDGPU::V_LSHL_B32_e32:
1534 case AMDGPU::S_LSHL_B32:
1536 Result =
LHS << (
RHS & 31);
1538 case AMDGPU::V_LSHLREV_B32_e64:
1539 case AMDGPU::V_LSHLREV_B32_e32:
1540 Result =
RHS << (
LHS & 31);
1542 case AMDGPU::V_LSHR_B32_e64:
1543 case AMDGPU::V_LSHR_B32_e32:
1544 case AMDGPU::S_LSHR_B32:
1545 Result =
LHS >> (
RHS & 31);
1547 case AMDGPU::V_LSHRREV_B32_e64:
1548 case AMDGPU::V_LSHRREV_B32_e32:
1549 Result =
RHS >> (
LHS & 31);
1551 case AMDGPU::V_ASHR_I32_e64:
1552 case AMDGPU::V_ASHR_I32_e32:
1553 case AMDGPU::S_ASHR_I32:
1554 Result =
static_cast<int32_t
>(
LHS) >> (
RHS & 31);
1556 case AMDGPU::V_ASHRREV_I32_e64:
1557 case AMDGPU::V_ASHRREV_I32_e32:
1558 Result =
static_cast<int32_t
>(
RHS) >> (
LHS & 31);
1566 return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1572bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *
MI)
const {
1573 if (!
MI->allImplicitDefsAreDead())
1576 unsigned Opc =
MI->getOpcode();
1578 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
1582 MachineOperand *Src0 = &
MI->getOperand(Src0Idx);
1583 std::optional<int64_t> Src0Imm =
TII->getImmOrMaterializedImm(*Src0);
1585 if ((
Opc == AMDGPU::V_NOT_B32_e64 ||
Opc == AMDGPU::V_NOT_B32_e32 ||
1586 Opc == AMDGPU::S_NOT_B32) &&
1588 MI->getOperand(1).ChangeToImmediate(~*Src0Imm);
1589 TII->mutateAndCleanupImplicit(
1594 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
1598 MachineOperand *Src1 = &
MI->getOperand(Src1Idx);
1599 std::optional<int64_t> Src1Imm =
TII->getImmOrMaterializedImm(*Src1);
1601 if (!Src0Imm && !Src1Imm)
1607 if (Src0Imm && Src1Imm) {
1612 bool IsSGPR =
TRI->isSGPRReg(*
MRI,
MI->getOperand(0).getReg());
1616 MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
1617 MI->removeOperand(Src1Idx);
1622 if (!
MI->isCommutable())
1625 if (Src0Imm && !Src1Imm) {
1631 int32_t Src1Val =
static_cast<int32_t
>(*Src1Imm);
1632 if (
Opc == AMDGPU::V_OR_B32_e64 ||
1633 Opc == AMDGPU::V_OR_B32_e32 ||
1634 Opc == AMDGPU::S_OR_B32) {
1637 MI->removeOperand(Src1Idx);
1638 TII->mutateAndCleanupImplicit(*
MI,
TII->get(AMDGPU::COPY));
1639 }
else if (Src1Val == -1) {
1641 MI->removeOperand(Src1Idx);
1642 TII->mutateAndCleanupImplicit(
1650 if (
Opc == AMDGPU::V_AND_B32_e64 ||
Opc == AMDGPU::V_AND_B32_e32 ||
1651 Opc == AMDGPU::S_AND_B32) {
1654 MI->removeOperand(Src0Idx);
1655 TII->mutateAndCleanupImplicit(
1657 }
else if (Src1Val == -1) {
1659 MI->removeOperand(Src1Idx);
1660 TII->mutateAndCleanupImplicit(*
MI,
TII->get(AMDGPU::COPY));
1667 if (
Opc == AMDGPU::V_XOR_B32_e64 ||
Opc == AMDGPU::V_XOR_B32_e32 ||
1668 Opc == AMDGPU::S_XOR_B32) {
1671 MI->removeOperand(Src1Idx);
1672 TII->mutateAndCleanupImplicit(*
MI,
TII->get(AMDGPU::COPY));
1681bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &
MI)
const {
1682 unsigned Opc =
MI.getOpcode();
1683 if (
Opc != AMDGPU::V_CNDMASK_B32_e32 &&
Opc != AMDGPU::V_CNDMASK_B32_e64 &&
1684 Opc != AMDGPU::V_CNDMASK_B64_PSEUDO)
1687 MachineOperand *Src0 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
1688 MachineOperand *Src1 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
1690 std::optional<int64_t> Src1Imm =
TII->getImmOrMaterializedImm(*Src1);
1694 std::optional<int64_t> Src0Imm =
TII->getImmOrMaterializedImm(*Src0);
1695 if (!Src0Imm || *Src0Imm != *Src1Imm)
1700 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1_modifiers);
1702 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0_modifiers);
1703 if ((Src1ModIdx != -1 &&
MI.getOperand(Src1ModIdx).getImm() != 0) ||
1704 (Src0ModIdx != -1 &&
MI.getOperand(Src0ModIdx).getImm() != 0))
1710 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
1712 MI.removeOperand(Src2Idx);
1713 MI.removeOperand(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1));
1714 if (Src1ModIdx != -1)
1715 MI.removeOperand(Src1ModIdx);
1716 if (Src0ModIdx != -1)
1717 MI.removeOperand(Src0ModIdx);
1718 TII->mutateAndCleanupImplicit(
MI, NewDesc);
1723bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &
MI)
const {
1724 if (
MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
1725 MI.getOpcode() != AMDGPU::V_AND_B32_e32)
1728 std::optional<int64_t> Src0Imm =
1729 TII->getImmOrMaterializedImm(
MI.getOperand(1));
1730 if (!Src0Imm || *Src0Imm != 0xffff || !
MI.getOperand(2).isReg())
1734 MachineInstr *SrcDef =
MRI->getVRegDef(Src1);
1739 MRI->replaceRegWith(Dst, Src1);
1740 if (!
MI.getOperand(2).isKill())
1741 MRI->clearKillFlags(Src1);
1742 MI.eraseFromParent();
1746bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &
MI,
1747 const FoldableDef &OpToFold)
const {
1751 SmallVector<MachineInstr *, 4> CopiesToReplace;
1753 MachineOperand &Dst =
MI.getOperand(0);
1756 if (OpToFold.isImm()) {
1767 if (tryConstantFoldOp(&
UseMI)) {
1776 for (
auto *U : UsesToProcess) {
1777 MachineInstr *
UseMI =
U->getParent();
1779 FoldableDef SubOpToFold = OpToFold.getWithSubReg(*
TRI,
U->getSubReg());
1784 if (CopiesToReplace.
empty() && FoldList.
empty())
1787 MachineFunction *MF =
MI.getMF();
1789 for (MachineInstr *Copy : CopiesToReplace)
1790 Copy->addImplicitDefUseOperands(*MF);
1792 SetVector<MachineInstr *> ConstantFoldCandidates;
1793 for (FoldCandidate &Fold : FoldList) {
1794 assert(!Fold.isReg() || Fold.Def.OpToFold);
1795 if (Fold.isReg() && Fold.getReg().isVirtual()) {
1797 const MachineInstr *
DefMI = Fold.Def.DefMI;
1805 assert(Fold.Def.OpToFold && Fold.isReg());
1809 MRI->clearKillFlags(Fold.getReg());
1812 <<
static_cast<int>(Fold.UseOpNo) <<
" of "
1816 ConstantFoldCandidates.
insert(Fold.UseMI);
1818 }
else if (Fold.Commuted) {
1820 TII->commuteInstruction(*Fold.UseMI,
false);
1824 for (MachineInstr *
MI : ConstantFoldCandidates) {
1825 if (tryConstantFoldOp(
MI)) {
1835bool SIFoldOperandsImpl::foldCopyToAGPRRegSequence(MachineInstr *CopyMI)
const {
1840 const TargetRegisterClass *DefRC =
1842 if (!
TRI->isAGPRClass(DefRC))
1846 MachineInstr *RegSeq =
MRI->getVRegDef(
UseReg);
1854 DenseMap<TargetInstrInfo::RegSubRegPair, Register> VGPRCopies;
1856 const TargetRegisterClass *UseRC =
1863 unsigned NumFoldable = 0;
1865 for (
unsigned I = 1;
I != NumRegSeqOperands;
I += 2) {
1881 const TargetRegisterClass *DestSuperRC =
TRI->getMatchingSuperRegClass(
1882 DefRC, &AMDGPU::AGPR_32RegClass, SubRegIdx);
1891 const TargetRegisterClass *InputRC =
1901 const TargetRegisterClass *MatchRC =
1902 TRI->getMatchingSuperRegClass(DefRC, InputRC, SubRegIdx);
1913 if (NumFoldable == 0)
1916 CopyMI->
setDesc(
TII->get(AMDGPU::REG_SEQUENCE));
1920 for (
auto [Def, DestSubIdx] : NewDefs) {
1921 if (!
Def->isReg()) {
1924 Register Tmp =
MRI->createVirtualRegister(&AMDGPU::AGPR_32RegClass);
1925 BuildMI(
MBB, CopyMI,
DL,
TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp)
1930 Def->setIsKill(
false);
1932 Register &VGPRCopy = VGPRCopies[Src];
1934 const TargetRegisterClass *VGPRUseSubRC =
1935 TRI->getSubRegisterClass(UseRC, DestSubIdx);
1944 const TargetRegisterClass *SubRC =
1945 TRI->getSubRegisterClass(
MRI->getRegClass(Src.Reg), Src.SubReg);
1948 VGPRCopy =
MRI->createVirtualRegister(VGPRUseSubRC);
1960 B.addImm(DestSubIdx);
1967bool SIFoldOperandsImpl::tryFoldFoldableCopy(
1968 MachineInstr &
MI, MachineOperand *&CurrentKnownM0Val)
const {
1972 if (DstReg == AMDGPU::M0) {
1973 MachineOperand &NewM0Val =
MI.getOperand(1);
1974 if (CurrentKnownM0Val && CurrentKnownM0Val->
isIdenticalTo(NewM0Val)) {
1975 MI.eraseFromParent();
1986 MachineOperand *OpToFoldPtr;
1987 if (
MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e64) {
1989 if (
TII->hasAnyModifiersSet(
MI))
1991 OpToFoldPtr = &
MI.getOperand(2);
1993 OpToFoldPtr = &
MI.getOperand(1);
1994 MachineOperand &OpToFold = *OpToFoldPtr;
1998 if (!FoldingImm && !OpToFold.
isReg())
2003 !
TRI->isConstantPhysReg(OpToFold.
getReg()))
2015 const TargetRegisterClass *DstRC =
2016 MRI->getRegClass(
MI.getOperand(0).getReg());
2032 if (
MI.getOpcode() == AMDGPU::COPY && OpToFold.
isReg() &&
2034 if (DstRC == &AMDGPU::SReg_32RegClass &&
2035 DstRC ==
MRI->getRegClass(OpToFold.
getReg())) {
2043 if (OpToFold.
isReg() &&
MI.isCopy() && !
MI.getOperand(1).getSubReg()) {
2044 if (foldCopyToAGPRRegSequence(&
MI))
2048 FoldableDef
Def(OpToFold, DstRC);
2049 bool Changed = foldInstOperand(
MI, Def);
2056 auto *InstToErase = &
MI;
2057 while (
MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
2058 auto &SrcOp = InstToErase->getOperand(1);
2060 InstToErase->eraseFromParent();
2062 InstToErase =
nullptr;
2065 InstToErase =
MRI->getVRegDef(SrcReg);
2066 if (!InstToErase || !
TII->isFoldableCopy(*InstToErase))
2070 if (InstToErase && InstToErase->isRegSequence() &&
2071 MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
2072 InstToErase->eraseFromParent();
2082 return OpToFold.
isReg() &&
2083 foldCopyToVGPROfScalarAddOfFrameIndex(DstReg, OpToFold.
getReg(),
MI);
2088const MachineOperand *
2089SIFoldOperandsImpl::isClamp(
const MachineInstr &
MI)
const {
2090 unsigned Op =
MI.getOpcode();
2092 case AMDGPU::V_MAX_F32_e64:
2093 case AMDGPU::V_MAX_F16_e64:
2094 case AMDGPU::V_MAX_F16_t16_e64:
2095 case AMDGPU::V_MAX_F16_fake16_e64:
2096 case AMDGPU::V_MAX_F64_e64:
2097 case AMDGPU::V_MAX_NUM_F64_e64:
2098 case AMDGPU::V_PK_MAX_F16:
2099 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2100 case AMDGPU::V_PK_MAX_NUM_BF16: {
2101 if (
MI.mayRaiseFPException())
2104 if (!
TII->getNamedOperand(
MI, AMDGPU::OpName::clamp)->getImm())
2108 const MachineOperand *Src0 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
2109 const MachineOperand *Src1 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
2113 Src0->
getSubReg() != AMDGPU::NoSubRegister)
2117 if (
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
2121 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0_modifiers)->getImm();
2123 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1_modifiers)->getImm();
2127 unsigned UnsetMods =
2128 (
Op == AMDGPU::V_PK_MAX_F16 ||
Op == AMDGPU::V_PK_MAX_NUM_BF16)
2131 if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
2141bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &
MI) {
2142 const MachineOperand *ClampSrc = isClamp(
MI);
2143 if (!ClampSrc || !
MRI->hasOneNonDBGUser(ClampSrc->
getReg()))
2155 if (
TII->getClampMask(*Def) !=
TII->getClampMask(
MI))
2158 if (
Def->mayRaiseFPException())
2161 MachineOperand *DefClamp =
TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
2165 LLVM_DEBUG(
dbgs() <<
"Folding clamp " << *DefClamp <<
" into " << *Def);
2171 Register MIDstReg =
MI.getOperand(0).getReg();
2172 if (
TRI->isSGPRReg(*
MRI, DefReg)) {
2179 MRI->replaceRegWith(MIDstReg, DefReg);
2181 MI.eraseFromParent();
2186 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
2187 Def->eraseFromParent();
2194 case AMDGPU::V_MUL_F64_e64:
2195 case AMDGPU::V_MUL_F64_pseudo_e64: {
2197 case 0x3fe0000000000000:
2199 case 0x4000000000000000:
2201 case 0x4010000000000000:
2207 case AMDGPU::V_MUL_F32_e64: {
2208 switch (
static_cast<uint32_t>(Val)) {
2219 case AMDGPU::V_MUL_F16_e64:
2220 case AMDGPU::V_MUL_F16_t16_e64:
2221 case AMDGPU::V_MUL_F16_fake16_e64: {
2222 switch (
static_cast<uint16_t>(Val)) {
2241std::pair<const MachineOperand *, int>
2242SIFoldOperandsImpl::isOMod(
const MachineInstr &
MI)
const {
2243 unsigned Op =
MI.getOpcode();
2245 case AMDGPU::V_MUL_F64_e64:
2246 case AMDGPU::V_MUL_F64_pseudo_e64:
2247 case AMDGPU::V_MUL_F32_e64:
2248 case AMDGPU::V_MUL_F16_t16_e64:
2249 case AMDGPU::V_MUL_F16_fake16_e64:
2250 case AMDGPU::V_MUL_F16_e64: {
2252 if ((
Op == AMDGPU::V_MUL_F32_e64 &&
2254 ((
Op == AMDGPU::V_MUL_F64_e64 ||
Op == AMDGPU::V_MUL_F64_pseudo_e64 ||
2255 Op == AMDGPU::V_MUL_F16_e64 ||
Op == AMDGPU::V_MUL_F16_t16_e64 ||
2256 Op == AMDGPU::V_MUL_F16_fake16_e64) &&
2259 MI.mayRaiseFPException())
2262 const MachineOperand *RegOp =
nullptr;
2263 const MachineOperand *ImmOp =
nullptr;
2264 const MachineOperand *Src0 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
2265 const MachineOperand *Src1 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
2266 if (Src0->
isImm()) {
2269 }
else if (Src1->
isImm()) {
2277 TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) ||
2278 TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) ||
2279 TII->hasModifiersSet(
MI, AMDGPU::OpName::omod) ||
2280 TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp))
2283 return std::pair(RegOp, OMod);
2285 case AMDGPU::V_ADD_F64_e64:
2286 case AMDGPU::V_ADD_F64_pseudo_e64:
2287 case AMDGPU::V_ADD_F32_e64:
2288 case AMDGPU::V_ADD_F16_e64:
2289 case AMDGPU::V_ADD_F16_t16_e64:
2290 case AMDGPU::V_ADD_F16_fake16_e64: {
2292 if ((
Op == AMDGPU::V_ADD_F32_e64 &&
2294 ((
Op == AMDGPU::V_ADD_F64_e64 ||
Op == AMDGPU::V_ADD_F64_pseudo_e64 ||
2295 Op == AMDGPU::V_ADD_F16_e64 ||
Op == AMDGPU::V_ADD_F16_t16_e64 ||
2296 Op == AMDGPU::V_ADD_F16_fake16_e64) &&
2301 const MachineOperand *Src0 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
2302 const MachineOperand *Src1 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
2306 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) &&
2307 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) &&
2308 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp) &&
2309 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
2320bool SIFoldOperandsImpl::tryFoldOMod(MachineInstr &
MI) {
2321 const MachineOperand *RegOp;
2323 std::tie(RegOp, OMod) = isOMod(
MI);
2325 RegOp->
getSubReg() != AMDGPU::NoSubRegister ||
2326 !
MRI->hasOneNonDBGUser(RegOp->
getReg()))
2330 MachineOperand *DefOMod =
TII->getNamedOperand(*Def, AMDGPU::OpName::omod);
2334 if (
Def->mayRaiseFPException())
2339 if (
TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
2345 MRI->replaceRegWith(
MI.getOperand(0).getReg(),
Def->getOperand(0).getReg());
2348 MRI->clearKillFlags(
Def->getOperand(0).getReg());
2349 MI.eraseFromParent();
2354 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
2355 Def->eraseFromParent();
2362bool SIFoldOperandsImpl::tryFoldRegSequence(MachineInstr &
MI) {
2364 auto Reg =
MI.getOperand(0).getReg();
2366 if (!ST->hasGFX90AInsts() || !
TRI->isVGPR(*
MRI,
Reg) ||
2367 !
MRI->hasOneNonDBGUse(
Reg))
2371 if (!getRegSeqInit(Defs,
Reg))
2374 for (
auto &[
Op, SubIdx] : Defs) {
2377 if (
TRI->isAGPR(*
MRI,
Op->getReg()))
2380 const MachineInstr *SubDef =
MRI->getVRegDef(
Op->getReg());
2387 MachineOperand *
Op = &*
MRI->use_nodbg_begin(
Reg);
2388 MachineInstr *
UseMI =
Op->getParent();
2397 if (
Op->getSubReg())
2402 const TargetRegisterClass *OpRC =
TII->getRegClass(InstDesc,
OpIdx);
2403 if (!OpRC || !
TRI->isVectorSuperClass(OpRC))
2406 const auto *NewDstRC =
TRI->getEquivalentAGPRClass(
MRI->getRegClass(
Reg));
2407 auto Dst =
MRI->createVirtualRegister(NewDstRC);
2409 TII->get(AMDGPU::REG_SEQUENCE), Dst);
2411 for (
auto &[Def, SubIdx] : Defs) {
2412 Def->setIsKill(
false);
2416 MachineInstr *SubDef =
MRI->getVRegDef(
Def->getReg());
2426 RS->eraseFromParent();
2434 if (
MRI->use_nodbg_empty(
MI.getOperand(0).getReg()))
2435 MI.eraseFromParent();
2443 Register &OutReg,
unsigned &OutSubReg) {
2453 if (
TRI.isAGPR(
MRI, CopySrcReg)) {
2454 OutReg = CopySrcReg;
2463 if (!CopySrcDef || !CopySrcDef->
isCopy())
2470 OtherCopySrc.
getSubReg() != AMDGPU::NoSubRegister ||
2471 !
TRI.isAGPR(
MRI, OtherCopySrcReg))
2474 OutReg = OtherCopySrcReg;
2508bool SIFoldOperandsImpl::tryFoldPhiAGPR(MachineInstr &
PHI) {
2512 if (!
TRI->isVGPR(*
MRI, PhiOut))
2517 const TargetRegisterClass *ARC =
nullptr;
2518 for (
unsigned K = 1;
K <
PHI.getNumExplicitOperands();
K += 2) {
2519 MachineOperand &MO =
PHI.getOperand(K);
2521 if (!Copy || !
Copy->isCopy())
2525 unsigned AGPRRegMask = AMDGPU::NoSubRegister;
2529 const TargetRegisterClass *CopyInRC =
MRI->getRegClass(AGPRSrc);
2530 if (
const auto *SubRC =
TRI->getSubRegisterClass(CopyInRC, AGPRRegMask))
2541 bool IsAGPR32 = (ARC == &AMDGPU::AGPR_32RegClass);
2545 for (
unsigned K = 1;
K <
PHI.getNumExplicitOperands();
K += 2) {
2546 MachineOperand &MO =
PHI.getOperand(K);
2550 MachineBasicBlock *InsertMBB =
nullptr;
2553 unsigned CopyOpc = AMDGPU::COPY;
2554 if (MachineInstr *Def =
MRI->getVRegDef(
Reg)) {
2558 if (
Def->isCopy()) {
2560 unsigned AGPRSubReg = AMDGPU::NoSubRegister;
2573 MachineOperand &CopyIn =
Def->getOperand(1);
2574 if (IsAGPR32 && !ST->hasGFX90AInsts() && !
MRI->hasOneNonDBGUse(
Reg) &&
2576 CopyOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
2579 InsertMBB =
Def->getParent();
2586 Register NewReg =
MRI->createVirtualRegister(ARC);
2587 MachineInstr *
MI =
BuildMI(*InsertMBB, InsertPt,
PHI.getDebugLoc(),
2588 TII->get(CopyOpc), NewReg)
2597 Register NewReg =
MRI->createVirtualRegister(ARC);
2598 PHI.getOperand(0).setReg(NewReg);
2604 TII->get(AMDGPU::COPY), PhiOut)
2612bool SIFoldOperandsImpl::tryFoldLoad(MachineInstr &
MI) {
2614 if (!ST->hasGFX90AInsts() ||
MI.getNumExplicitDefs() != 1)
2617 MachineOperand &
Def =
MI.getOperand(0);
2634 while (!
Users.empty()) {
2635 const MachineInstr *
I =
Users.pop_back_val();
2636 if (!
I->isCopy() && !
I->isRegSequence())
2638 Register DstReg =
I->getOperand(0).getReg();
2642 if (
TRI->isAGPR(*
MRI, DstReg))
2645 for (
const MachineInstr &U :
MRI->use_nodbg_instructions(DstReg))
2646 Users.push_back(&U);
2649 const TargetRegisterClass *RC =
MRI->getRegClass(DefReg);
2650 MRI->setRegClass(DefReg,
TRI->getEquivalentAGPRClass(RC));
2651 if (!
TII->isOperandLegal(
MI, 0, &Def)) {
2652 MRI->setRegClass(DefReg, RC);
2656 while (!MoveRegs.
empty()) {
2658 MRI->setRegClass(
Reg,
TRI->getEquivalentAGPRClass(
MRI->getRegClass(
Reg)));
2698bool SIFoldOperandsImpl::tryOptimizeAGPRPhis(MachineBasicBlock &
MBB) {
2701 if (ST->hasGFX90AInsts())
2705 DenseMap<std::pair<Register, unsigned>, std::vector<MachineOperand *>>
2708 for (
auto &
MI :
MBB) {
2712 if (!
TRI->isAGPR(*
MRI,
MI.getOperand(0).getReg()))
2715 for (
unsigned K = 1;
K <
MI.getNumOperands();
K += 2) {
2716 MachineOperand &PhiMO =
MI.getOperand(K);
2726 for (
const auto &[Entry, MOs] : RegToMO) {
2727 if (MOs.size() == 1)
2731 MachineInstr *
Def =
MRI->getVRegDef(
Reg);
2732 MachineBasicBlock *DefMBB =
Def->getParent();
2738 MRI->createVirtualRegister(
TRI->getEquivalentVGPRClass(ARC));
2739 MachineInstr *VGPRCopy =
2741 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TempVGPR)
2745 Register TempAGPR =
MRI->createVirtualRegister(ARC);
2747 TII->get(AMDGPU::COPY), TempAGPR)
2751 for (MachineOperand *MO : MOs) {
2763bool SIFoldOperandsImpl::run(MachineFunction &MF) {
2769 MFI = MF.
getInfo<SIMachineFunctionInfo>();
2780 MachineOperand *CurrentKnownM0Val =
nullptr;
2784 if (tryFoldZeroHighBits(
MI)) {
2789 if (
MI.isRegSequence() && tryFoldRegSequence(
MI)) {
2794 if (
MI.isPHI() && tryFoldPhiAGPR(
MI)) {
2799 if (
MI.mayLoad() && tryFoldLoad(
MI)) {
2804 if (
TII->isFoldableCopy(
MI)) {
2805 Changed |= tryFoldFoldableCopy(
MI, CurrentKnownM0Val);
2810 if (CurrentKnownM0Val &&
MI.modifiesRegister(AMDGPU::M0,
TRI))
2811 CurrentKnownM0Val =
nullptr;
2830 bool Changed = SIFoldOperandsImpl().run(MF);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat)
Updates the operand at Idx in instruction Inst with the result of instruction Mat.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
AMD GCN specific subclass of TargetSubtarget.
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
iv Induction Variable Users
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static unsigned macToMad(unsigned Opc)
static bool isAGPRCopy(const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI, const MachineInstr &Copy, Register &OutReg, unsigned &OutSubReg)
Checks whether Copy is a AGPR -> VGPR copy.
static void appendFoldCandidate(SmallVectorImpl< FoldCandidate > &FoldList, FoldCandidate &&Entry)
static const TargetRegisterClass * getRegOpRC(const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const MachineOperand &MO)
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
static int getOModValue(unsigned Opc, int64_t Val)
static unsigned getMovOpc(bool IsScalar)
static MachineOperand * lookUpCopyChain(const SIInstrInfo &TII, const MachineRegisterInfo &MRI, Register SrcReg)
static bool checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand(const FoldableDef &OpToFold)
static bool isPKF32InstrReplicatesLower32BitsOfScalarOperand(const GCNSubtarget *ST, MachineInstr *MI, unsigned OpNo)
Interface definition for SIInstrInfo.
Interface definition for SIRegisterInfo.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
bool hasNoSignedZerosFPMath() const
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
FunctionPass class - This class is used to implement most global optimizations.
const SIInstrInfo * getInstrInfo() const override
bool hasDOTOpSelHazard() const
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
const HexagonRegisterInfo & getRegisterInfo() const
ArrayRef< MCOperandInfo > operands() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
An RAII based helper class to modify MachineFunctionProperties when running pass.
LLVM_ABI iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg=Register(), bool SkipPseudoOp=true)
Return the first instruction in MBB after I that is not a PHI, label or debug.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool isRegSequence() const
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
MachineOperand * mop_iterator
iterator/begin/end - Iterate over all operands of a machine instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
LLVM_ABI void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
LLVM_ABI void substPhysReg(MCRegister Reg, const TargetRegisterInfo &)
substPhysReg - Substitute the current register with the physical register Reg, taking any existing Su...
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_FrameIndex
Abstract Stack Frame Index.
@ MO_Register
Register operand.
static MachineOperand CreateFI(int Idx)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
SIModeRegisterDefaults getMode() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READONLY int getMFMAEarlyClobberOp(uint16_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
LLVM_READONLY int getFlatScratchInstSSfromSV(uint16_t Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
FunctionPass * createSIFoldOperandsLegacyPass()
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
char & SIFoldOperandsLegacyID
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
iterator_range< df_iterator< T > > depth_first(const T &G)
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
DenormalModeKind Output
Denormal flushing mode for floating point instruction results in the default floating point environme...
DenormalMode FP64FP16Denormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
DenormalMode FP32Denormals
If this is set, neither input or output denormals are flushed for most f32 instructions.