23#define DEBUG_TYPE "si-fold-operands"
44 unsigned DefSubReg = AMDGPU::NoSubRegister;
49 FoldableDef() =
delete;
51 unsigned DefSubReg = AMDGPU::NoSubRegister)
52 : DefRC(DefRC), DefSubReg(DefSubReg), Kind(FoldOp.
getType()) {
55 ImmToFold = FoldOp.
getImm();
56 }
else if (FoldOp.
isFI()) {
57 FrameIndexToFold = FoldOp.
getIndex();
67 unsigned DefSubReg = AMDGPU::NoSubRegister)
68 : ImmToFold(FoldImm), DefRC(DefRC), DefSubReg(DefSubReg),
73 FoldableDef Copy(*
this);
74 Copy.DefSubReg =
TRI.composeSubRegIndices(DefSubReg, SubReg);
82 return OpToFold->getReg();
85 unsigned getSubReg()
const {
87 return OpToFold->getSubReg();
98 return FrameIndexToFold;
106 std::optional<int64_t> getEffectiveImmVal()
const {
114 unsigned OpIdx)
const {
117 std::optional<int64_t> ImmToFold = getEffectiveImmVal();
127 if (DefSubReg != AMDGPU::NoSubRegister)
135 if (DefSubReg != AMDGPU::NoSubRegister)
137 return TII.isOperandLegal(
MI,
OpIdx, OpToFold);
144struct FoldCandidate {
152 bool Commuted =
false,
int ShrinkOp = -1)
153 :
UseMI(
MI), Def(Def), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
154 Commuted(Commuted) {}
156 bool isFI()
const {
return Def.isFI(); }
160 return Def.FrameIndexToFold;
163 bool isImm()
const {
return Def.isImm(); }
165 bool isReg()
const {
return Def.isReg(); }
169 bool isGlobal()
const {
return Def.isGlobal(); }
171 bool needsShrink()
const {
return ShrinkOpcode != -1; }
174class SIFoldOperandsImpl {
184 const FoldableDef &OpToFold)
const;
187 unsigned convertToVALUOp(
unsigned Opc,
bool UseVOP3 =
false)
const {
189 case AMDGPU::S_ADD_I32: {
190 if (ST->hasAddNoCarryInsts())
191 return UseVOP3 ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_U32_e32;
192 return UseVOP3 ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
194 case AMDGPU::S_OR_B32:
195 return UseVOP3 ? AMDGPU::V_OR_B32_e64 : AMDGPU::V_OR_B32_e32;
196 case AMDGPU::S_AND_B32:
197 return UseVOP3 ? AMDGPU::V_AND_B32_e64 : AMDGPU::V_AND_B32_e32;
198 case AMDGPU::S_MUL_I32:
199 return AMDGPU::V_MUL_LO_U32_e64;
201 return AMDGPU::INSTRUCTION_LIST_END;
205 bool foldCopyToVGPROfScalarAddOfFrameIndex(
Register DstReg,
Register SrcReg,
211 int64_t ImmVal)
const;
215 int64_t ImmVal)
const;
219 const FoldableDef &OpToFold)
const;
228 getRegSeqInit(
SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
231 std::pair<int64_t, const TargetRegisterClass *>
248 bool foldInstOperand(
MachineInstr &
MI,
const FoldableDef &OpToFold)
const;
250 bool foldCopyToAGPRRegSequence(
MachineInstr *CopyMI)
const;
257 std::pair<const MachineOperand *, int> isOMod(
const MachineInstr &
MI)
const;
266 SIFoldOperandsImpl() =
default;
280 return SIFoldOperandsImpl().run(MF);
283 StringRef getPassName()
const override {
return "SI Fold Operands"; }
300char SIFoldOperandsLegacy::
ID = 0;
309 TRI.getSubRegisterClass(RC, MO.getSubReg()))
317 case AMDGPU::V_MAC_F32_e64:
318 return AMDGPU::V_MAD_F32_e64;
319 case AMDGPU::V_MAC_F16_e64:
320 return AMDGPU::V_MAD_F16_e64;
321 case AMDGPU::V_FMAC_F32_e64:
322 return AMDGPU::V_FMA_F32_e64;
323 case AMDGPU::V_FMAC_F16_e64:
324 return AMDGPU::V_FMA_F16_gfx9_e64;
325 case AMDGPU::V_FMAC_F16_t16_e64:
326 return AMDGPU::V_FMA_F16_gfx9_t16_e64;
327 case AMDGPU::V_FMAC_F16_fake16_e64:
328 return AMDGPU::V_FMA_F16_gfx9_fake16_e64;
329 case AMDGPU::V_FMAC_LEGACY_F32_e64:
330 return AMDGPU::V_FMA_LEGACY_F32_e64;
331 case AMDGPU::V_FMAC_F64_e64:
332 return AMDGPU::V_FMA_F64_e64;
334 return AMDGPU::INSTRUCTION_LIST_END;
340 const FoldableDef &OpToFold)
const {
341 if (!OpToFold.isFI())
344 const unsigned Opc =
UseMI.getOpcode();
346 case AMDGPU::S_ADD_I32:
347 case AMDGPU::S_ADD_U32:
348 case AMDGPU::V_ADD_U32_e32:
349 case AMDGPU::V_ADD_CO_U32_e32:
353 return UseMI.getOperand(OpNo == 1 ? 2 : 1).isImm() &&
355 case AMDGPU::V_ADD_U32_e64:
356 case AMDGPU::V_ADD_CO_U32_e64:
357 return UseMI.getOperand(OpNo == 2 ? 3 : 2).isImm() &&
364 return OpNo == AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
368 int SIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr);
372 int VIdx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vaddr);
373 return OpNo == VIdx && SIdx == -1;
379bool SIFoldOperandsImpl::foldCopyToVGPROfScalarAddOfFrameIndex(
381 if (
TRI->isVGPR(*MRI, DstReg) &&
TRI->isSGPRReg(*MRI, SrcReg) &&
384 if (!Def ||
Def->getNumOperands() != 4)
387 MachineOperand *Src0 = &
Def->getOperand(1);
388 MachineOperand *Src1 = &
Def->getOperand(2);
399 const bool UseVOP3 = !Src0->
isImm() ||
TII->isInlineConstant(*Src0);
400 unsigned NewOp = convertToVALUOp(
Def->getOpcode(), UseVOP3);
401 if (NewOp == AMDGPU::INSTRUCTION_LIST_END ||
402 !
Def->getOperand(3).isDead())
405 MachineBasicBlock *
MBB =
Def->getParent();
407 if (NewOp != AMDGPU::V_ADD_CO_U32_e32) {
408 MachineInstrBuilder
Add =
411 if (
Add->getDesc().getNumDefs() == 2) {
413 Add.addDef(CarryOutReg, RegState::Dead);
417 Add.add(*Src0).add(*Src1).setMIFlags(
Def->getFlags());
421 Def->eraseFromParent();
422 MI.eraseFromParent();
426 assert(NewOp == AMDGPU::V_ADD_CO_U32_e32);
437 Def->eraseFromParent();
438 MI.eraseFromParent();
447 return new SIFoldOperandsLegacy();
450bool SIFoldOperandsImpl::canUseImmWithOpSel(
const MachineInstr *
MI,
452 int64_t ImmVal)
const {
453 const uint64_t TSFlags =
MI->getDesc().TSFlags;
461 int OpNo =
MI->getOperandNo(&Old);
463 unsigned Opcode =
MI->getOpcode();
464 uint8_t OpType =
TII->get(Opcode).operands()[OpNo].OperandType;
486bool SIFoldOperandsImpl::tryFoldImmWithOpSel(MachineInstr *
MI,
unsigned UseOpNo,
487 int64_t ImmVal)
const {
488 MachineOperand &Old =
MI->getOperand(UseOpNo);
489 unsigned Opcode =
MI->getOpcode();
490 int OpNo =
MI->getOperandNo(&Old);
491 uint8_t OpType =
TII->get(Opcode).operands()[OpNo].OperandType;
503 AMDGPU::OpName ModName = AMDGPU::OpName::NUM_OPERAND_NAMES;
504 unsigned SrcIdx = ~0;
505 if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0)) {
506 ModName = AMDGPU::OpName::src0_modifiers;
508 }
else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1)) {
509 ModName = AMDGPU::OpName::src1_modifiers;
511 }
else if (OpNo == AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2)) {
512 ModName = AMDGPU::OpName::src2_modifiers;
515 assert(ModName != AMDGPU::OpName::NUM_OPERAND_NAMES);
516 int ModIdx = AMDGPU::getNamedOperandIdx(Opcode, ModName);
517 MachineOperand &
Mod =
MI->getOperand(ModIdx);
518 unsigned ModVal =
Mod.getImm();
524 uint32_t
Imm = (
static_cast<uint32_t
>(ImmHi) << 16) | ImmLo;
529 auto tryFoldToInline = [&](uint32_t
Imm) ->
bool {
538 uint16_t
Lo =
static_cast<uint16_t
>(
Imm);
539 uint16_t
Hi =
static_cast<uint16_t
>(
Imm >> 16);
542 Mod.setImm(NewModVal);
547 if (
static_cast<int16_t
>(
Lo) < 0) {
548 int32_t SExt =
static_cast<int16_t
>(
Lo);
550 Mod.setImm(NewModVal);
565 uint32_t Swapped = (
static_cast<uint32_t
>(
Lo) << 16) |
Hi;
576 if (tryFoldToInline(Imm))
585 bool IsUAdd = Opcode == AMDGPU::V_PK_ADD_U16;
586 bool IsUSub = Opcode == AMDGPU::V_PK_SUB_U16;
587 if (SrcIdx == 1 && (IsUAdd || IsUSub)) {
589 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::clamp);
590 bool Clamp =
MI->getOperand(ClampIdx).getImm() != 0;
593 uint16_t NegLo = -
static_cast<uint16_t
>(
Imm);
594 uint16_t NegHi = -
static_cast<uint16_t
>(
Imm >> 16);
595 uint32_t NegImm = (
static_cast<uint32_t
>(NegHi) << 16) | NegLo;
597 if (tryFoldToInline(NegImm)) {
599 IsUAdd ? AMDGPU::V_PK_SUB_U16 : AMDGPU::V_PK_ADD_U16;
600 MI->setDesc(
TII->get(NegOpcode));
609bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold)
const {
610 MachineInstr *
MI = Fold.UseMI;
611 MachineOperand &Old =
MI->getOperand(Fold.UseOpNo);
614 std::optional<int64_t> ImmVal;
616 ImmVal = Fold.Def.getEffectiveImmVal();
618 if (ImmVal && canUseImmWithOpSel(Fold.UseMI, Fold.UseOpNo, *ImmVal)) {
619 if (tryFoldImmWithOpSel(Fold.UseMI, Fold.UseOpNo, *ImmVal))
625 int OpNo =
MI->getOperandNo(&Old);
626 if (!
TII->isOperandLegal(*
MI, OpNo, &New))
632 if ((Fold.isImm() || Fold.isFI() || Fold.isGlobal()) && Fold.needsShrink()) {
633 MachineBasicBlock *
MBB =
MI->getParent();
640 int Op32 = Fold.ShrinkOpcode;
641 MachineOperand &Dst0 =
MI->getOperand(0);
642 MachineOperand &Dst1 =
MI->getOperand(1);
650 MachineInstr *Inst32 =
TII->buildShrunkInst(*
MI, Op32);
652 if (HaveNonDbgCarryUse) {
655 .
addReg(AMDGPU::VCC, RegState::Kill);
665 for (
unsigned I =
MI->getNumOperands() - 1;
I > 0; --
I)
666 MI->removeOperand(
I);
667 MI->setDesc(
TII->get(AMDGPU::IMPLICIT_DEF));
670 TII->commuteInstruction(*Inst32,
false);
674 assert(!Fold.needsShrink() &&
"not handled");
679 if (NewMFMAOpc == -1)
681 MI->setDesc(
TII->get(NewMFMAOpc));
682 MI->untieRegOperand(0);
683 const MCInstrDesc &MCID =
MI->getDesc();
684 for (
unsigned I = 0;
I <
MI->getNumDefs(); ++
I)
686 MI->getOperand(
I).setIsEarlyClobber(
true);
691 int OpNo =
MI->getOperandNo(&Old);
692 if (!
TII->isOperandLegal(*
MI, OpNo, &New))
699 if (Fold.isGlobal()) {
700 Old.
ChangeToGA(Fold.Def.OpToFold->getGlobal(),
701 Fold.Def.OpToFold->getOffset(),
702 Fold.Def.OpToFold->getTargetFlags());
711 MachineOperand *
New = Fold.Def.OpToFold;
714 if (
const TargetRegisterClass *OpRC =
715 TII->getRegClass(
MI->getDesc(), Fold.UseOpNo)) {
716 const TargetRegisterClass *NewRC =
717 TRI->getRegClassForReg(*MRI,
New->getReg());
719 const TargetRegisterClass *ConstrainRC = OpRC;
720 if (
New->getSubReg()) {
722 TRI->getMatchingSuperRegClass(NewRC, OpRC,
New->getSubReg());
728 if (
New->getReg().isVirtual() &&
731 <<
TRI->getRegClassName(ConstrainRC) <<
'\n');
738 if (Old.
getSubReg() == AMDGPU::lo16 &&
TRI->isSGPRReg(*MRI,
New->getReg()))
740 if (
New->getReg().isPhysical()) {
748 if (
MI->isBundledWithPred()) {
750 for (MachineOperand &MO : Header.operands()) {
751 if (MO.getReg() == OldReg) {
752 MO.setReg(
New->getReg());
753 MO.setSubReg(
New->getSubReg());
762 FoldCandidate &&Entry) {
764 for (FoldCandidate &Fold : FoldList)
765 if (Fold.UseMI == Entry.UseMI && Fold.UseOpNo == Entry.UseOpNo)
767 LLVM_DEBUG(
dbgs() <<
"Append " << (Entry.Commuted ?
"commuted" :
"normal")
768 <<
" operand " << Entry.UseOpNo <<
"\n " << *Entry.UseMI);
774 const FoldableDef &FoldOp,
775 bool Commuted =
false,
int ShrinkOp = -1) {
777 FoldCandidate(
MI, OpNo, FoldOp, Commuted, ShrinkOp));
785 if (!ST->hasPKF32InstsReplicatingLower32BitsOfScalarInput())
795 const FoldableDef &OpToFold) {
796 assert(OpToFold.isImm() &&
"Expected immediate operand");
797 uint64_t ImmVal = OpToFold.getEffectiveImmVal().value();
803bool SIFoldOperandsImpl::tryAddToFoldList(
804 SmallVectorImpl<FoldCandidate> &FoldList, MachineInstr *
MI,
unsigned OpNo,
805 const FoldableDef &OpToFold)
const {
806 const unsigned Opc =
MI->getOpcode();
808 auto tryToFoldAsFMAAKorMK = [&]() {
809 if (!OpToFold.isImm())
812 const bool TryAK = OpNo == 3;
813 const unsigned NewOpc = TryAK ? AMDGPU::S_FMAAK_F32 : AMDGPU::S_FMAMK_F32;
814 MI->setDesc(
TII->get(NewOpc));
817 bool FoldAsFMAAKorMK =
818 tryAddToFoldList(FoldList,
MI, TryAK ? 3 : 2, OpToFold);
819 if (FoldAsFMAAKorMK) {
821 MI->untieRegOperand(3);
824 MachineOperand &Op1 =
MI->getOperand(1);
825 MachineOperand &Op2 =
MI->getOperand(2);
842 bool IsLegal = OpToFold.isOperandLegal(*
TII, *
MI, OpNo);
843 if (!IsLegal && OpToFold.isImm()) {
844 if (std::optional<int64_t> ImmVal = OpToFold.getEffectiveImmVal())
845 IsLegal = canUseImmWithOpSel(
MI, OpNo, *ImmVal);
851 if (NewOpc != AMDGPU::INSTRUCTION_LIST_END) {
854 MI->setDesc(
TII->get(NewOpc));
859 bool FoldAsMAD = tryAddToFoldList(FoldList,
MI, OpNo, OpToFold);
861 MI->untieRegOperand(OpNo);
865 MI->removeOperand(
MI->getNumExplicitOperands() - 1);
871 if (
Opc == AMDGPU::S_FMAC_F32 && OpNo == 3) {
872 if (tryToFoldAsFMAAKorMK())
877 if (OpToFold.isImm()) {
879 if (
Opc == AMDGPU::S_SETREG_B32)
880 ImmOpc = AMDGPU::S_SETREG_IMM32_B32;
881 else if (
Opc == AMDGPU::S_SETREG_B32_mode)
882 ImmOpc = AMDGPU::S_SETREG_IMM32_B32_mode;
884 MI->setDesc(
TII->get(ImmOpc));
893 bool CanCommute =
TII->findCommutedOpIndices(*
MI, OpNo, CommuteOpNo);
897 MachineOperand &
Op =
MI->getOperand(OpNo);
898 MachineOperand &CommutedOp =
MI->getOperand(CommuteOpNo);
904 if (!
Op.isReg() || !CommutedOp.
isReg())
909 if (
Op.isReg() && CommutedOp.
isReg() &&
910 (
Op.getReg() == CommutedOp.
getReg() &&
914 if (!
TII->commuteInstruction(*
MI,
false, OpNo, CommuteOpNo))
918 if (!OpToFold.isOperandLegal(*
TII, *
MI, CommuteOpNo)) {
919 if ((
Opc != AMDGPU::V_ADD_CO_U32_e64 &&
Opc != AMDGPU::V_SUB_CO_U32_e64 &&
920 Opc != AMDGPU::V_SUBREV_CO_U32_e64) ||
921 (!OpToFold.isImm() && !OpToFold.isFI() && !OpToFold.isGlobal())) {
922 TII->commuteInstruction(*
MI,
false, OpNo, CommuteOpNo);
928 MachineOperand &OtherOp =
MI->getOperand(OpNo);
929 if (!OtherOp.
isReg() ||
936 unsigned MaybeCommutedOpc =
MI->getOpcode();
950 if (
Opc == AMDGPU::S_FMAC_F32 &&
951 (OpNo != 1 || !
MI->getOperand(1).isIdenticalTo(
MI->getOperand(2)))) {
952 if (tryToFoldAsFMAAKorMK())
958 if (OpToFold.isImm() &&
967bool SIFoldOperandsImpl::isUseSafeToFold(
const MachineInstr &
MI,
968 const MachineOperand &UseMO)
const {
970 return !
TII->isSDWA(
MI);
978 SubDef &&
TII.isFoldableCopy(*SubDef);
980 unsigned SrcIdx =
TII.getFoldableCopySrcIdx(*SubDef);
989 if (
SrcOp.getSubReg())
996const TargetRegisterClass *SIFoldOperandsImpl::getRegSeqInit(
997 MachineInstr &RegSeq,
998 SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs)
const {
1002 const TargetRegisterClass *RC =
nullptr;
1009 const TargetRegisterClass *OpRC =
getRegOpRC(*MRI, *
TRI, SrcOp);
1012 else if (!
TRI->getCommonSubClass(RC, OpRC))
1017 Defs.emplace_back(&SrcOp, SubRegIdx);
1022 if (DefSrc && (DefSrc->
isReg() || DefSrc->
isImm())) {
1023 Defs.emplace_back(DefSrc, SubRegIdx);
1027 Defs.emplace_back(&SrcOp, SubRegIdx);
1036const TargetRegisterClass *SIFoldOperandsImpl::getRegSeqInit(
1037 SmallVectorImpl<std::pair<MachineOperand *, unsigned>> &Defs,
1040 if (!Def || !
Def->isRegSequence())
1043 return getRegSeqInit(*Def, Defs);
1046std::pair<int64_t, const TargetRegisterClass *>
1047SIFoldOperandsImpl::isRegSeqSplat(MachineInstr &RegSeq)
const {
1049 const TargetRegisterClass *SrcRC = getRegSeqInit(RegSeq, Defs);
1053 bool TryToMatchSplat64 =
false;
1056 for (
unsigned I = 0,
E = Defs.
size();
I !=
E; ++
I) {
1057 const MachineOperand *
Op = Defs[
I].first;
1061 int64_t SubImm =
Op->getImm();
1067 if (Imm != SubImm) {
1068 if (
I == 1 && (
E & 1) == 0) {
1071 TryToMatchSplat64 =
true;
1079 if (!TryToMatchSplat64)
1080 return {Defs[0].first->getImm(), SrcRC};
1085 for (
unsigned I = 0,
E = Defs.
size();
I !=
E;
I += 2) {
1086 const MachineOperand *Op0 = Defs[
I].first;
1087 const MachineOperand *Op1 = Defs[
I + 1].first;
1092 unsigned SubReg0 = Defs[
I].second;
1093 unsigned SubReg1 = Defs[
I + 1].second;
1097 if (
TRI->getChannelFromSubReg(SubReg0) + 1 !=
1098 TRI->getChannelFromSubReg(SubReg1))
1101 if (
TRI->getSubRegIdxSize(SubReg0) != 32)
1106 SplatVal64 = MergedVal;
1107 else if (SplatVal64 != MergedVal)
1111 const TargetRegisterClass *RC64 =
TRI->getSubRegisterClass(
1114 return {SplatVal64, RC64};
1117bool SIFoldOperandsImpl::tryFoldRegSeqSplat(
1118 MachineInstr *
UseMI,
unsigned UseOpIdx, int64_t SplatVal,
1119 const TargetRegisterClass *SplatRC)
const {
1121 if (UseOpIdx >=
Desc.getNumOperands())
1128 int16_t RCID =
TII->getOpRegClassID(
Desc.operands()[UseOpIdx]);
1132 const TargetRegisterClass *OpRC =
TRI->getRegClass(RCID);
1137 if (SplatVal != 0 && SplatVal != -1) {
1141 uint8_t OpTy =
Desc.operands()[UseOpIdx].OperandType;
1147 OpRC =
TRI->getSubRegisterClass(OpRC, AMDGPU::sub0);
1152 OpRC =
TRI->getSubRegisterClass(OpRC, AMDGPU::sub0_sub1);
1158 if (!
TRI->getCommonSubClass(OpRC, SplatRC))
1163 if (!
TII->isOperandLegal(*
UseMI, UseOpIdx, &TmpOp))
1169bool SIFoldOperandsImpl::tryToFoldACImm(
1170 const FoldableDef &OpToFold, MachineInstr *
UseMI,
unsigned UseOpIdx,
1171 SmallVectorImpl<FoldCandidate> &FoldList)
const {
1173 if (UseOpIdx >=
Desc.getNumOperands())
1180 if (OpToFold.isImm() && OpToFold.isOperandLegal(*
TII, *
UseMI, UseOpIdx)) {
1191void SIFoldOperandsImpl::foldOperand(
1192 FoldableDef OpToFold, MachineInstr *
UseMI,
int UseOpIdx,
1193 SmallVectorImpl<FoldCandidate> &FoldList,
1194 SmallVectorImpl<MachineInstr *> &CopiesToReplace)
const {
1197 if (!isUseSafeToFold(*
UseMI, *UseOp))
1201 if (UseOp->
isReg() && OpToFold.isReg()) {
1205 if (UseOp->
getSubReg() != AMDGPU::NoSubRegister &&
1207 !
TRI->isSGPRReg(*MRI, OpToFold.getReg())))
1219 const TargetRegisterClass *SplatRC;
1220 std::tie(SplatVal, SplatRC) = isRegSeqSplat(*
UseMI);
1225 for (
unsigned I = 0;
I != UsesToProcess.size(); ++
I) {
1226 MachineOperand *RSUse = UsesToProcess[
I];
1227 MachineInstr *RSUseMI = RSUse->
getParent();
1237 if (tryFoldRegSeqSplat(RSUseMI, OpNo, SplatVal, SplatRC)) {
1238 FoldableDef SplatDef(SplatVal, SplatRC);
1245 if (RSUse->
getSubReg() != RegSeqDstSubReg)
1250 foldOperand(OpToFold, RSUseMI, RSUseMI->
getOperandNo(RSUse), FoldList,
1257 if (tryToFoldACImm(OpToFold,
UseMI, UseOpIdx, FoldList))
1260 if (frameIndexMayFold(*
UseMI, UseOpIdx, OpToFold)) {
1265 if (
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::srsrc)->getReg() !=
1271 MachineOperand &SOff =
1272 *
TII->getNamedOperand(*
UseMI, AMDGPU::OpName::soffset);
1283 TII->getNamedOperand(*
UseMI, AMDGPU::OpName::cpol)->getImm();
1298 bool FoldingImmLike =
1299 OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
1307 const TargetRegisterClass *SrcRC = MRI->
getRegClass(SrcReg);
1315 const TargetRegisterClass *DestRC =
TRI->getRegClassForReg(*MRI, DestReg);
1318 for (
unsigned MovOp :
1319 {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
1320 AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_MOV_B16_t16_e64,
1321 AMDGPU::V_ACCVGPR_WRITE_B32_e64, AMDGPU::AV_MOV_B32_IMM_PSEUDO,
1322 AMDGPU::AV_MOV_B64_IMM_PSEUDO}) {
1323 const MCInstrDesc &MovDesc =
TII->get(MovOp);
1324 const TargetRegisterClass *MovDstRC =
1333 const int SrcIdx = MovOp == AMDGPU::V_MOV_B16_t16_e64 ? 2 : 1;
1335 int16_t RegClassID =
TII->getOpRegClassID(MovDesc.
operands()[SrcIdx]);
1336 if (RegClassID != -1) {
1337 const TargetRegisterClass *MovSrcRC =
TRI->getRegClass(RegClassID);
1340 MovSrcRC =
TRI->getMatchingSuperRegClass(SrcRC, MovSrcRC, UseSubReg);
1344 if (MovOp == AMDGPU::AV_MOV_B32_IMM_PSEUDO &&
1345 (!OpToFold.isImm() ||
1346 !
TII->isImmOperandLegal(MovDesc, SrcIdx,
1347 *OpToFold.getEffectiveImmVal())))
1360 if (!OpToFold.isImm() ||
1361 !
TII->isImmOperandLegal(MovDesc, 1, *OpToFold.getEffectiveImmVal()))
1367 while (ImpOpI != ImpOpE) {
1374 if (MovOp == AMDGPU::V_MOV_B16_t16_e64) {
1376 MachineOperand NewSrcOp(SrcOp);
1398 LLVM_DEBUG(
dbgs() <<
"Folding " << OpToFold.OpToFold <<
"\n into "
1403 unsigned SubRegIdx = OpToFold.getSubReg();
1417 static_assert(AMDGPU::sub1_hi16 == 12,
"Subregister layout has changed");
1422 if (SubRegIdx > AMDGPU::sub1) {
1423 LaneBitmask
M =
TRI->getSubRegIndexLaneMask(SubRegIdx);
1424 M |=
M.getLane(
M.getHighestLane() - 1);
1425 SmallVector<unsigned, 4> Indexes;
1426 TRI->getCoveringSubRegIndexes(
TRI->getRegClassForReg(*MRI,
UseReg), M,
1428 assert(Indexes.
size() == 1 &&
"Expected one 32-bit subreg to cover");
1429 SubRegIdx = Indexes[0];
1431 }
else if (
TII->getOpSize(*
UseMI, 1) == 4)
1434 SubRegIdx = AMDGPU::sub0;
1439 OpToFold.OpToFold->setIsKill(
false);
1443 if (foldCopyToAGPRRegSequence(
UseMI))
1448 if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
1449 (UseOpc == AMDGPU::V_READLANE_B32 &&
1451 AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) {
1456 if (FoldingImmLike) {
1459 *OpToFold.DefMI, *
UseMI))
1465 if (OpToFold.isImm()) {
1467 *OpToFold.getEffectiveImmVal());
1468 }
else if (OpToFold.isFI())
1471 assert(OpToFold.isGlobal());
1473 OpToFold.OpToFold->getOffset(),
1474 OpToFold.OpToFold->getTargetFlags());
1480 if (OpToFold.isReg() &&
TRI->isSGPRReg(*MRI, OpToFold.getReg())) {
1483 *OpToFold.DefMI, *
UseMI))
1505 UseDesc.
operands()[UseOpIdx].RegClass == -1)
1513 tryAddToFoldList(FoldList,
UseMI, UseOpIdx, OpToFold);
1519 case AMDGPU::S_ADD_I32:
1520 case AMDGPU::S_ADD_U32:
1523 case AMDGPU::S_SUB_I32:
1524 case AMDGPU::S_SUB_U32:
1527 case AMDGPU::V_AND_B32_e64:
1528 case AMDGPU::V_AND_B32_e32:
1529 case AMDGPU::S_AND_B32:
1532 case AMDGPU::V_OR_B32_e64:
1533 case AMDGPU::V_OR_B32_e32:
1534 case AMDGPU::S_OR_B32:
1537 case AMDGPU::V_XOR_B32_e64:
1538 case AMDGPU::V_XOR_B32_e32:
1539 case AMDGPU::S_XOR_B32:
1542 case AMDGPU::S_XNOR_B32:
1545 case AMDGPU::S_NAND_B32:
1548 case AMDGPU::S_NOR_B32:
1551 case AMDGPU::S_ANDN2_B32:
1554 case AMDGPU::S_ORN2_B32:
1557 case AMDGPU::V_LSHL_B32_e64:
1558 case AMDGPU::V_LSHL_B32_e32:
1559 case AMDGPU::S_LSHL_B32:
1561 Result =
LHS << (
RHS & 31);
1563 case AMDGPU::V_LSHLREV_B32_e64:
1564 case AMDGPU::V_LSHLREV_B32_e32:
1565 Result =
RHS << (
LHS & 31);
1567 case AMDGPU::V_LSHR_B32_e64:
1568 case AMDGPU::V_LSHR_B32_e32:
1569 case AMDGPU::S_LSHR_B32:
1570 Result =
LHS >> (
RHS & 31);
1572 case AMDGPU::V_LSHRREV_B32_e64:
1573 case AMDGPU::V_LSHRREV_B32_e32:
1574 Result =
RHS >> (
LHS & 31);
1576 case AMDGPU::V_ASHR_I32_e64:
1577 case AMDGPU::V_ASHR_I32_e32:
1578 case AMDGPU::S_ASHR_I32:
1579 Result =
static_cast<int32_t
>(
LHS) >> (
RHS & 31);
1581 case AMDGPU::V_ASHRREV_I32_e64:
1582 case AMDGPU::V_ASHRREV_I32_e32:
1583 Result =
static_cast<int32_t
>(
RHS) >> (
LHS & 31);
1591 return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1597bool SIFoldOperandsImpl::tryConstantFoldOp(MachineInstr *
MI)
const {
1598 if (!
MI->allImplicitDefsAreDead())
1601 unsigned Opc =
MI->getOpcode();
1603 int Src0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0);
1607 MachineOperand *Src0 = &
MI->getOperand(Src0Idx);
1608 std::optional<int64_t> Src0Imm =
TII->getImmOrMaterializedImm(*Src0);
1610 if ((
Opc == AMDGPU::V_NOT_B32_e64 ||
Opc == AMDGPU::V_NOT_B32_e32 ||
1611 Opc == AMDGPU::S_NOT_B32) &&
1613 MI->getOperand(1).ChangeToImmediate(~*Src0Imm);
1614 TII->mutateAndCleanupImplicit(
1619 int Src1Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1);
1623 MachineOperand *Src1 = &
MI->getOperand(Src1Idx);
1624 std::optional<int64_t> Src1Imm =
TII->getImmOrMaterializedImm(*Src1);
1626 if (!Src0Imm && !Src1Imm)
1632 if (Src0Imm && Src1Imm) {
1637 bool IsSGPR =
TRI->isSGPRReg(*MRI,
MI->getOperand(0).getReg());
1641 MI->getOperand(Src0Idx).ChangeToImmediate(NewImm);
1642 MI->removeOperand(Src1Idx);
1649 if (
Opc == AMDGPU::S_SUB_I32 ||
Opc == AMDGPU::S_SUB_U32) {
1650 if (Src1Imm &&
static_cast<int32_t
>(*Src1Imm) == 0) {
1652 MI->removeOperand(Src1Idx);
1653 TII->mutateAndCleanupImplicit(*
MI,
TII->get(AMDGPU::COPY));
1659 if (!
MI->isCommutable())
1662 if (Src0Imm && !Src1Imm) {
1668 int32_t Src1Val =
static_cast<int32_t
>(*Src1Imm);
1669 if (
Opc == AMDGPU::S_ADD_I32 ||
Opc == AMDGPU::S_ADD_U32) {
1672 MI->removeOperand(Src1Idx);
1673 TII->mutateAndCleanupImplicit(*
MI,
TII->get(AMDGPU::COPY));
1679 if (
Opc == AMDGPU::V_OR_B32_e64 ||
1680 Opc == AMDGPU::V_OR_B32_e32 ||
1681 Opc == AMDGPU::S_OR_B32) {
1684 MI->removeOperand(Src1Idx);
1685 TII->mutateAndCleanupImplicit(*
MI,
TII->get(AMDGPU::COPY));
1686 }
else if (Src1Val == -1) {
1688 MI->removeOperand(Src0Idx);
1689 TII->mutateAndCleanupImplicit(
1697 if (
Opc == AMDGPU::V_AND_B32_e64 ||
Opc == AMDGPU::V_AND_B32_e32 ||
1698 Opc == AMDGPU::S_AND_B32) {
1701 MI->removeOperand(Src0Idx);
1702 TII->mutateAndCleanupImplicit(
1704 }
else if (Src1Val == -1) {
1706 MI->removeOperand(Src1Idx);
1707 TII->mutateAndCleanupImplicit(*
MI,
TII->get(AMDGPU::COPY));
1714 if (
Opc == AMDGPU::V_XOR_B32_e64 ||
Opc == AMDGPU::V_XOR_B32_e32 ||
1715 Opc == AMDGPU::S_XOR_B32) {
1718 MI->removeOperand(Src1Idx);
1719 TII->mutateAndCleanupImplicit(*
MI,
TII->get(AMDGPU::COPY));
1728bool SIFoldOperandsImpl::tryFoldCndMask(MachineInstr &
MI)
const {
1729 unsigned Opc =
MI.getOpcode();
1730 if (
Opc != AMDGPU::V_CNDMASK_B32_e32 &&
Opc != AMDGPU::V_CNDMASK_B32_e64 &&
1731 Opc != AMDGPU::V_CNDMASK_B64_PSEUDO)
1734 MachineOperand *Src0 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
1735 MachineOperand *Src1 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
1737 std::optional<int64_t> Src1Imm =
TII->getImmOrMaterializedImm(*Src1);
1741 std::optional<int64_t> Src0Imm =
TII->getImmOrMaterializedImm(*Src0);
1742 if (!Src0Imm || *Src0Imm != *Src1Imm)
1747 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1_modifiers);
1749 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src0_modifiers);
1750 if ((Src1ModIdx != -1 &&
MI.getOperand(Src1ModIdx).getImm() != 0) ||
1751 (Src0ModIdx != -1 &&
MI.getOperand(Src0ModIdx).getImm() != 0))
1757 int Src2Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src2);
1759 MI.removeOperand(Src2Idx);
1760 MI.removeOperand(AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::src1));
1761 if (Src1ModIdx != -1)
1762 MI.removeOperand(Src1ModIdx);
1763 if (Src0ModIdx != -1)
1764 MI.removeOperand(Src0ModIdx);
1765 TII->mutateAndCleanupImplicit(
MI, NewDesc);
1770bool SIFoldOperandsImpl::tryFoldZeroHighBits(MachineInstr &
MI)
const {
1771 if (
MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
1772 MI.getOpcode() != AMDGPU::V_AND_B32_e32)
1775 std::optional<int64_t> Src0Imm =
1776 TII->getImmOrMaterializedImm(
MI.getOperand(1));
1777 if (!Src0Imm || *Src0Imm != 0xffff || !
MI.getOperand(2).isReg())
1781 MachineInstr *SrcDef = MRI->
getVRegDef(Src1);
1787 if (!
MI.getOperand(2).isKill())
1789 MI.eraseFromParent();
1793bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &
MI,
1794 const FoldableDef &OpToFold)
const {
1798 SmallVector<MachineInstr *, 4> CopiesToReplace;
1800 MachineOperand &Dst =
MI.getOperand(0);
1803 if (OpToFold.isImm()) {
1814 if (tryConstantFoldOp(&
UseMI)) {
1823 for (
auto *U : UsesToProcess) {
1824 MachineInstr *
UseMI =
U->getParent();
1826 FoldableDef SubOpToFold = OpToFold.getWithSubReg(*
TRI,
U->getSubReg());
1831 if (CopiesToReplace.
empty() && FoldList.
empty())
1834 MachineFunction *MF =
MI.getMF();
1836 for (MachineInstr *Copy : CopiesToReplace)
1837 Copy->addImplicitDefUseOperands(*MF);
1839 SetVector<MachineInstr *> ConstantFoldCandidates;
1840 for (FoldCandidate &Fold : FoldList) {
1841 assert(!Fold.isReg() || Fold.Def.OpToFold);
1842 if (Fold.isReg() && Fold.getReg().isVirtual()) {
1844 const MachineInstr *
DefMI = Fold.Def.DefMI;
1852 assert(Fold.Def.OpToFold && Fold.isReg());
1859 <<
static_cast<int>(Fold.UseOpNo) <<
" of "
1863 ConstantFoldCandidates.
insert(Fold.UseMI);
1865 }
else if (Fold.Commuted) {
1867 TII->commuteInstruction(*Fold.UseMI,
false);
1871 for (MachineInstr *
MI : ConstantFoldCandidates) {
1872 if (tryConstantFoldOp(
MI)) {
1882bool SIFoldOperandsImpl::foldCopyToAGPRRegSequence(MachineInstr *CopyMI)
const {
1887 const TargetRegisterClass *DefRC =
1889 if (!
TRI->isAGPRClass(DefRC))
1901 DenseMap<TargetInstrInfo::RegSubRegPair, Register> VGPRCopies;
1903 const TargetRegisterClass *UseRC =
1910 unsigned NumFoldable = 0;
1912 for (
unsigned I = 1;
I != NumRegSeqOperands;
I += 2) {
1928 const TargetRegisterClass *DestSuperRC =
TRI->getMatchingSuperRegClass(
1929 DefRC, &AMDGPU::AGPR_32RegClass, SubRegIdx);
1938 const TargetRegisterClass *InputRC =
1948 const TargetRegisterClass *MatchRC =
1949 TRI->getMatchingSuperRegClass(DefRC, InputRC, SubRegIdx);
1960 if (NumFoldable == 0)
1963 CopyMI->
setDesc(
TII->get(AMDGPU::REG_SEQUENCE));
1967 for (
auto [Def, DestSubIdx] : NewDefs) {
1968 if (!
Def->isReg()) {
1972 BuildMI(
MBB, CopyMI,
DL,
TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), Tmp)
1977 Def->setIsKill(
false);
1979 Register &VGPRCopy = VGPRCopies[Src];
1981 const TargetRegisterClass *VGPRUseSubRC =
1982 TRI->getSubRegisterClass(UseRC, DestSubIdx);
1991 const TargetRegisterClass *SubRC =
2007 B.addImm(DestSubIdx);
2014bool SIFoldOperandsImpl::tryFoldFoldableCopy(
2015 MachineInstr &
MI, MachineOperand *&CurrentKnownM0Val)
const {
2019 if (DstReg == AMDGPU::M0) {
2020 MachineOperand &NewM0Val =
MI.getOperand(1);
2021 if (CurrentKnownM0Val && CurrentKnownM0Val->
isIdenticalTo(NewM0Val)) {
2022 MI.eraseFromParent();
2033 MachineOperand *OpToFoldPtr;
2034 if (
MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e64) {
2036 if (
TII->hasAnyModifiersSet(
MI))
2038 OpToFoldPtr = &
MI.getOperand(2);
2040 OpToFoldPtr = &
MI.getOperand(1);
2041 MachineOperand &OpToFold = *OpToFoldPtr;
2045 if (!FoldingImm && !OpToFold.
isReg())
2050 !
TRI->isConstantPhysReg(OpToFold.
getReg()))
2062 const TargetRegisterClass *DstRC =
2079 if (
MI.getOpcode() == AMDGPU::COPY && OpToFold.
isReg() &&
2081 if (DstRC == &AMDGPU::SReg_32RegClass &&
2090 if (OpToFold.
isReg() &&
MI.isCopy() && !
MI.getOperand(1).getSubReg()) {
2091 if (foldCopyToAGPRRegSequence(&
MI))
2095 FoldableDef
Def(OpToFold, DstRC);
2096 bool Changed = foldInstOperand(
MI, Def);
2103 auto *InstToErase = &
MI;
2105 auto &SrcOp = InstToErase->getOperand(1);
2107 InstToErase->eraseFromParent();
2109 InstToErase =
nullptr;
2113 if (!InstToErase || !
TII->isFoldableCopy(*InstToErase))
2117 if (InstToErase && InstToErase->isRegSequence() &&
2119 InstToErase->eraseFromParent();
2129 return OpToFold.
isReg() &&
2130 foldCopyToVGPROfScalarAddOfFrameIndex(DstReg, OpToFold.
getReg(),
MI);
2135const MachineOperand *
2136SIFoldOperandsImpl::isClamp(
const MachineInstr &
MI)
const {
2137 unsigned Op =
MI.getOpcode();
2139 case AMDGPU::V_MAX_F32_e64:
2140 case AMDGPU::V_MAX_F16_e64:
2141 case AMDGPU::V_MAX_F16_t16_e64:
2142 case AMDGPU::V_MAX_F16_fake16_e64:
2143 case AMDGPU::V_MAX_F64_e64:
2144 case AMDGPU::V_MAX_NUM_F64_e64:
2145 case AMDGPU::V_PK_MAX_F16:
2146 case AMDGPU::V_MAX_BF16_PSEUDO_e64:
2147 case AMDGPU::V_PK_MAX_NUM_BF16: {
2148 if (
MI.mayRaiseFPException())
2151 if (!
TII->getNamedOperand(
MI, AMDGPU::OpName::clamp)->getImm())
2155 const MachineOperand *Src0 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
2156 const MachineOperand *Src1 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
2160 Src0->
getSubReg() != AMDGPU::NoSubRegister)
2164 if (
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
2168 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0_modifiers)->getImm();
2170 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1_modifiers)->getImm();
2174 unsigned UnsetMods =
2175 (
Op == AMDGPU::V_PK_MAX_F16 ||
Op == AMDGPU::V_PK_MAX_NUM_BF16)
2178 if (Src0Mods != UnsetMods && Src1Mods != UnsetMods)
2188bool SIFoldOperandsImpl::tryFoldClamp(MachineInstr &
MI) {
2189 const MachineOperand *ClampSrc = isClamp(
MI);
2205 if (
Def->mayRaiseFPException())
2208 MachineOperand *DefClamp =
TII->getNamedOperand(*Def, AMDGPU::OpName::clamp);
2212 LLVM_DEBUG(
dbgs() <<
"Folding clamp " << *DefClamp <<
" into " << *Def);
2218 Register MIDstReg =
MI.getOperand(0).getReg();
2219 if (
TRI->isSGPRReg(*MRI, DefReg)) {
2228 MI.eraseFromParent();
2233 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
2234 Def->eraseFromParent();
2241 case AMDGPU::V_MUL_F64_e64:
2242 case AMDGPU::V_MUL_F64_pseudo_e64: {
2244 case 0x3fe0000000000000:
2246 case 0x4000000000000000:
2248 case 0x4010000000000000:
2254 case AMDGPU::V_MUL_F32_e64: {
2255 switch (
static_cast<uint32_t>(Val)) {
2266 case AMDGPU::V_MUL_F16_e64:
2267 case AMDGPU::V_MUL_F16_t16_e64:
2268 case AMDGPU::V_MUL_F16_fake16_e64: {
2269 switch (
static_cast<uint16_t>(Val)) {
2288std::pair<const MachineOperand *, int>
2289SIFoldOperandsImpl::isOMod(
const MachineInstr &
MI)
const {
2290 unsigned Op =
MI.getOpcode();
2292 case AMDGPU::V_MUL_F64_e64:
2293 case AMDGPU::V_MUL_F64_pseudo_e64:
2294 case AMDGPU::V_MUL_F32_e64:
2295 case AMDGPU::V_MUL_F16_t16_e64:
2296 case AMDGPU::V_MUL_F16_fake16_e64:
2297 case AMDGPU::V_MUL_F16_e64: {
2299 if ((
Op == AMDGPU::V_MUL_F32_e64 &&
2301 ((
Op == AMDGPU::V_MUL_F64_e64 ||
Op == AMDGPU::V_MUL_F64_pseudo_e64 ||
2302 Op == AMDGPU::V_MUL_F16_e64 ||
Op == AMDGPU::V_MUL_F16_t16_e64 ||
2303 Op == AMDGPU::V_MUL_F16_fake16_e64) &&
2306 MI.mayRaiseFPException())
2309 const MachineOperand *RegOp =
nullptr;
2310 const MachineOperand *ImmOp =
nullptr;
2311 const MachineOperand *Src0 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
2312 const MachineOperand *Src1 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
2313 if (Src0->
isImm()) {
2316 }
else if (Src1->
isImm()) {
2324 TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) ||
2325 TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) ||
2326 TII->hasModifiersSet(
MI, AMDGPU::OpName::omod) ||
2327 TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp))
2330 return std::pair(RegOp, OMod);
2332 case AMDGPU::V_ADD_F64_e64:
2333 case AMDGPU::V_ADD_F64_pseudo_e64:
2334 case AMDGPU::V_ADD_F32_e64:
2335 case AMDGPU::V_ADD_F16_e64:
2336 case AMDGPU::V_ADD_F16_t16_e64:
2337 case AMDGPU::V_ADD_F16_fake16_e64: {
2339 if ((
Op == AMDGPU::V_ADD_F32_e64 &&
2341 ((
Op == AMDGPU::V_ADD_F64_e64 ||
Op == AMDGPU::V_ADD_F64_pseudo_e64 ||
2342 Op == AMDGPU::V_ADD_F16_e64 ||
Op == AMDGPU::V_ADD_F16_t16_e64 ||
2343 Op == AMDGPU::V_ADD_F16_fake16_e64) &&
2348 const MachineOperand *Src0 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
2349 const MachineOperand *Src1 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
2353 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src0_modifiers) &&
2354 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::src1_modifiers) &&
2355 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::clamp) &&
2356 !
TII->hasModifiersSet(
MI, AMDGPU::OpName::omod))
2367bool SIFoldOperandsImpl::tryFoldOMod(MachineInstr &
MI) {
2368 const MachineOperand *RegOp;
2370 std::tie(RegOp, OMod) = isOMod(
MI);
2372 RegOp->
getSubReg() != AMDGPU::NoSubRegister ||
2377 MachineOperand *DefOMod =
TII->getNamedOperand(*Def, AMDGPU::OpName::omod);
2381 if (
Def->mayRaiseFPException())
2386 if (
TII->hasModifiersSet(*Def, AMDGPU::OpName::clamp))
2396 MI.eraseFromParent();
2401 if (
TII->convertToThreeAddress(*Def,
nullptr,
nullptr))
2402 Def->eraseFromParent();
2409bool SIFoldOperandsImpl::tryFoldRegSequence(MachineInstr &
MI) {
2411 auto Reg =
MI.getOperand(0).getReg();
2413 if (!ST->hasGFX90AInsts() || !
TRI->isVGPR(*MRI,
Reg) ||
2418 if (!getRegSeqInit(Defs,
Reg))
2421 for (
auto &[
Op, SubIdx] : Defs) {
2424 if (
TRI->isAGPR(*MRI,
Op->getReg()))
2427 const MachineInstr *SubDef = MRI->
getVRegDef(
Op->getReg());
2435 MachineInstr *
UseMI =
Op->getParent();
2444 if (
Op->getSubReg())
2449 const TargetRegisterClass *OpRC =
TII->getRegClass(InstDesc,
OpIdx);
2450 if (!OpRC || !
TRI->isVectorSuperClass(OpRC))
2456 TII->get(AMDGPU::REG_SEQUENCE), Dst);
2458 for (
auto &[Def, SubIdx] : Defs) {
2459 Def->setIsKill(
false);
2460 if (
TRI->isAGPR(*MRI,
Def->getReg())) {
2473 RS->eraseFromParent();
2482 MI.eraseFromParent();
2490 Register &OutReg,
unsigned &OutSubReg) {
2500 if (
TRI.isAGPR(MRI, CopySrcReg)) {
2501 OutReg = CopySrcReg;
2510 if (!CopySrcDef || !CopySrcDef->
isCopy())
2517 OtherCopySrc.
getSubReg() != AMDGPU::NoSubRegister ||
2518 !
TRI.isAGPR(MRI, OtherCopySrcReg))
2521 OutReg = OtherCopySrcReg;
2555bool SIFoldOperandsImpl::tryFoldPhiAGPR(MachineInstr &
PHI) {
2559 if (!
TRI->isVGPR(*MRI, PhiOut))
2564 const TargetRegisterClass *ARC =
nullptr;
2565 for (
unsigned K = 1;
K <
PHI.getNumExplicitOperands();
K += 2) {
2566 MachineOperand &MO =
PHI.getOperand(K);
2568 if (!Copy || !
Copy->isCopy())
2572 unsigned AGPRRegMask = AMDGPU::NoSubRegister;
2576 const TargetRegisterClass *CopyInRC = MRI->
getRegClass(AGPRSrc);
2577 if (
const auto *SubRC =
TRI->getSubRegisterClass(CopyInRC, AGPRRegMask))
2588 bool IsAGPR32 = (ARC == &AMDGPU::AGPR_32RegClass);
2592 for (
unsigned K = 1;
K <
PHI.getNumExplicitOperands();
K += 2) {
2593 MachineOperand &MO =
PHI.getOperand(K);
2597 MachineBasicBlock *InsertMBB =
nullptr;
2600 unsigned CopyOpc = AMDGPU::COPY;
2605 if (
Def->isCopy()) {
2607 unsigned AGPRSubReg = AMDGPU::NoSubRegister;
2620 MachineOperand &CopyIn =
Def->getOperand(1);
2623 CopyOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
2626 InsertMBB =
Def->getParent();
2634 MachineInstr *
MI =
BuildMI(*InsertMBB, InsertPt,
PHI.getDebugLoc(),
2635 TII->get(CopyOpc), NewReg)
2645 PHI.getOperand(0).setReg(NewReg);
2651 TII->get(AMDGPU::COPY), PhiOut)
2659bool SIFoldOperandsImpl::tryFoldLoad(MachineInstr &
MI) {
2661 if (!ST->hasGFX90AInsts() ||
MI.getNumExplicitDefs() != 1)
2664 MachineOperand &
Def =
MI.getOperand(0);
2681 while (!
Users.empty()) {
2682 const MachineInstr *
I =
Users.pop_back_val();
2683 if (!
I->isCopy() && !
I->isRegSequence())
2685 Register DstReg =
I->getOperand(0).getReg();
2689 if (
TRI->isAGPR(*MRI, DstReg))
2693 Users.push_back(&U);
2696 const TargetRegisterClass *RC = MRI->
getRegClass(DefReg);
2698 if (!
TII->isOperandLegal(
MI, 0, &Def)) {
2703 while (!MoveRegs.
empty()) {
2745bool SIFoldOperandsImpl::tryOptimizeAGPRPhis(MachineBasicBlock &
MBB) {
2748 if (ST->hasGFX90AInsts())
2752 DenseMap<std::pair<Register, unsigned>, std::vector<MachineOperand *>>
2755 for (
auto &
MI :
MBB) {
2759 if (!
TRI->isAGPR(*MRI,
MI.getOperand(0).getReg()))
2762 for (
unsigned K = 1;
K <
MI.getNumOperands();
K += 2) {
2763 MachineOperand &PhiMO =
MI.getOperand(K);
2773 for (
const auto &[Entry, MOs] : RegToMO) {
2774 if (MOs.size() == 1)
2779 MachineBasicBlock *DefMBB =
Def->getParent();
2783 const TargetRegisterClass *ARC =
getRegOpRC(*MRI, *
TRI, *MOs.front());
2786 MachineInstr *VGPRCopy =
2788 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TempVGPR)
2794 TII->get(AMDGPU::COPY), TempAGPR)
2798 for (MachineOperand *MO : MOs) {
2810bool SIFoldOperandsImpl::run(MachineFunction &MF) {
2816 MFI = MF.
getInfo<SIMachineFunctionInfo>();
2826 MachineOperand *CurrentKnownM0Val =
nullptr;
2830 if (tryFoldZeroHighBits(
MI)) {
2835 if (
MI.isRegSequence() && tryFoldRegSequence(
MI)) {
2840 if (
MI.isPHI() && tryFoldPhiAGPR(
MI)) {
2845 if (
MI.mayLoad() && tryFoldLoad(
MI)) {
2850 if (
TII->isFoldableCopy(
MI)) {
2851 Changed |= tryFoldFoldableCopy(
MI, CurrentKnownM0Val);
2856 if (CurrentKnownM0Val &&
MI.modifiesRegister(AMDGPU::M0,
TRI))
2857 CurrentKnownM0Val =
nullptr;
2875 bool Changed = SIFoldOperandsImpl().run(MF);
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat)
Updates the operand at Idx in instruction Inst with the result of instruction Mat.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
AMD GCN specific subclass of TargetSubtarget.
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
iv Induction Variable Users
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static unsigned macToMad(unsigned Opc)
static bool isAGPRCopy(const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI, const MachineInstr &Copy, Register &OutReg, unsigned &OutSubReg)
Checks whether Copy is a AGPR -> VGPR copy.
static void appendFoldCandidate(SmallVectorImpl< FoldCandidate > &FoldList, FoldCandidate &&Entry)
static const TargetRegisterClass * getRegOpRC(const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const MachineOperand &MO)
static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result, uint32_t LHS, uint32_t RHS)
static int getOModValue(unsigned Opc, int64_t Val)
static unsigned getMovOpc(bool IsScalar)
static MachineOperand * lookUpCopyChain(const SIInstrInfo &TII, const MachineRegisterInfo &MRI, Register SrcReg)
static bool checkImmOpForPKF32InstrReplicatesLower32BitsOfScalarOperand(const FoldableDef &OpToFold)
static bool isPKF32InstrReplicatesLower32BitsOfScalarOperand(const GCNSubtarget *ST, MachineInstr *MI, unsigned OpNo)
Interface definition for SIInstrInfo.
Interface definition for SIRegisterInfo.
static int Lookup(ArrayRef< TableEntry > Table, unsigned Opcode)
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
FunctionPass class - This class is used to implement most global optimizations.
const SIInstrInfo * getInstrInfo() const override
bool hasDOTOpSelHazard() const
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
const HexagonRegisterInfo & getRegisterInfo() const
ArrayRef< MCOperandInfo > operands() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
This holds information about one operand of a machine instruction, indicating the register class for ...
uint8_t OperandType
Information about the type of the operand.
An RAII based helper class to modify MachineFunctionProperties when running pass.
LLVM_ABI iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg=Register(), bool SkipPseudoOp=true)
Return the first instruction in MBB after I that is not a PHI, label or debug.
LLVM_ABI LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, MCRegister Reg, const_iterator Before, unsigned Neighborhood=10) const
Return whether (physical) register Reg has been defined and not killed as of just before Before.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
LivenessQueryResult
Possible outcome of a register liveness query to computeRegisterLiveness()
@ LQR_Dead
Register is known to be fully dead.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned getOperandNo(const_mop_iterator I) const
Returns the number of the operand iterator I points to.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
mop_range implicit_operands()
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
void clearFlag(MIFlag Flag)
clearFlag - Clear a MI flag.
bool isRegSequence() const
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
MachineOperand * mop_iterator
iterator/begin/end - Iterate over all operands of a machine instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
LLVM_ABI void substVirtReg(Register Reg, unsigned SubIdx, const TargetRegisterInfo &)
substVirtReg - Substitute the current register with the virtual subregister Reg:SubReg.
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
LLVM_ABI void substPhysReg(MCRegister Reg, const TargetRegisterInfo &)
substPhysReg - Substitute the current register with the physical register Reg, taking any existing Su...
static MachineOperand CreateImm(int64_t Val)
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
@ MO_Immediate
Immediate operand.
@ MO_GlobalAddress
Address of a global value.
@ MO_FrameIndex
Abstract Stack Frame Index.
@ MO_Register
Register operand.
static MachineOperand CreateFI(int Idx)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
use_nodbg_iterator use_nodbg_begin(Register RegNo) const
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI void clearKillFlags(Register Reg) const
clearKillFlags - Iterate over all the uses of the given register and clear the kill flag from the Mac...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI bool hasOneNonDBGUser(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug instruction using the specified regis...
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
static bool hasSameClamp(const MachineInstr &A, const MachineInstr &B)
static std::optional< int64_t > extractSubregFromImm(int64_t ImmVal, unsigned SubRegIndex)
Return the extracted immediate value in a subregister use from a constant materialized in a super reg...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
SIModeRegisterDefaults getMode() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
LLVM_READONLY int32_t getMFMAEarlyClobberOp(uint32_t Opcode)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
LLVM_READONLY int32_t getVOPe32(uint32_t Opcode)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
@ OPERAND_REG_INLINE_C_FP64
@ OPERAND_REG_INLINE_C_V2BF16
@ OPERAND_REG_IMM_V2INT16
@ OPERAND_REG_INLINE_C_INT64
@ OPERAND_REG_IMM_NOINLINE_V2FP16
@ OPERAND_REG_INLINE_C_V2FP16
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
@ OPERAND_REG_INLINE_AC_FP32
@ OPERAND_REG_INLINE_C_FP32
@ OPERAND_REG_INLINE_C_INT32
@ OPERAND_REG_INLINE_C_V2INT16
@ OPERAND_REG_INLINE_AC_FP64
LLVM_READONLY int32_t getFlatScratchInstSSfromSV(uint32_t Opcode)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O)
Create RegSubRegPair from a register MachineOperand.
MachineBasicBlock::instr_iterator getBundleStart(MachineBasicBlock::instr_iterator I)
Returns an iterator to the first instruction in the bundle containing I.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI, const MachineInstr &UseMI)
Return false if EXEC is not changed between the def of VReg at DefMI and the use at UseMI.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
FunctionPass * createSIFoldOperandsLegacyPass()
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
char & SIFoldOperandsLegacyID
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
iterator_range< df_iterator< T > > depth_first(const T &G)
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
DenormalModeKind Output
Denormal flushing mode for floating point instruction results in the default floating point environme...
DenormalMode FP64FP16Denormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
DenormalMode FP32Denormals
If this is set, neither input or output denormals are flushed for most f32 instructions.