58#define DEBUG_TYPE "machinelicm"
62 cl::desc(
"MachineLICM should avoid speculation"),
67 cl::desc(
"MachineLICM should hoist even cheap instructions"),
83 cl::desc(
"Do not hoist instructions if target"
84 "block is N times hotter than the source."),
91 cl::desc(
"Disable hoisting instructions to"
95 "disable the feature"),
97 "enable the feature when using profile data"),
99 "enable the feature with/wo profile data")));
102 "Number of machine instructions hoisted out of loops");
104 "Number of instructions hoisted in low reg pressure situation");
106 "Number of high latency instructions hoisted");
108 "Number of hoisted machine instructions CSEed");
110 "Number of machine instructions hoisted out of loops post regalloc");
112 "Number of stores of const phys reg hoisted out of loops");
114 "Number of instructions not hoisted due to block frequency");
117 enum HoistResult { NotHoisted = 1, Hoisted = 2, ErasedMI = 4 };
119 class MachineLICMImpl {
120 const TargetInstrInfo *TII =
nullptr;
121 const TargetLoweringBase *TLI =
nullptr;
122 const TargetRegisterInfo *TRI =
nullptr;
123 const MachineFrameInfo *MFI =
nullptr;
124 MachineRegisterInfo *MRI =
nullptr;
125 TargetSchedModel SchedModel;
126 bool PreRegAlloc =
false;
127 bool HasProfileData =
false;
133 MachineBlockFrequencyInfo *MBFI =
nullptr;
134 MachineLoopInfo *MLI =
nullptr;
135 MachineDomTreeUpdater *MDTU =
nullptr;
138 bool Changed =
false;
139 bool FirstInLoop =
false;
143 SmallDenseMap<MachineLoop *, bool> AllowedToHoistLoads;
146 DenseMap<MachineLoop *, SmallVector<MachineBasicBlock *, 8>> ExitBlockMap;
148 bool isExitBlock(MachineLoop *CurLoop,
const MachineBasicBlock *
MBB) {
149 auto [It,
Inserted] = ExitBlockMap.try_emplace(CurLoop);
151 SmallVector<MachineBasicBlock *, 8> ExitBlocks;
153 It->second = std::move(ExitBlocks);
159 SmallDenseSet<Register> RegSeen;
160 SmallVector<unsigned, 8> RegPressure;
164 SmallVector<unsigned, 8> RegLimit;
170 DenseMap<MachineBasicBlock *,
171 DenseMap<unsigned, std::vector<MachineInstr *>>>
183 unsigned SpeculationState = SpeculateUnknown;
186 MachineLICMImpl(
bool PreRegAlloc,
Pass *LegacyPass,
188 : PreRegAlloc(PreRegAlloc), LegacyPass(LegacyPass), MFAM(MFAM) {
189 assert((LegacyPass || MFAM) &&
"LegacyPass or MFAM must be provided");
190 assert(!(LegacyPass && MFAM) &&
191 "LegacyPass and MFAM cannot be provided at the same time");
194 bool run(MachineFunction &MF);
196 void releaseMemory() {
202 ExitBlockMap.clear();
207 struct CandidateInfo {
212 CandidateInfo(MachineInstr *mi,
Register def,
int fi)
213 : MI(mi), Def(def), FI(fi) {}
216 void HoistRegionPostRA(MachineLoop *CurLoop);
218 void HoistPostRA(MachineInstr *
MI,
Register Def, MachineLoop *CurLoop);
220 void ProcessMI(MachineInstr *
MI, BitVector &RUDefs, BitVector &RUClobbers,
221 SmallDenseSet<int> &StoredFIs,
222 SmallVectorImpl<CandidateInfo> &Candidates,
223 MachineLoop *CurLoop);
225 void AddToLiveIns(MCRegister
Reg, MachineLoop *CurLoop);
227 bool IsLICMCandidate(MachineInstr &
I, MachineLoop *CurLoop);
229 bool IsLoopInvariantInst(MachineInstr &
I, MachineLoop *CurLoop);
231 bool HasLoopPHIUse(
const MachineInstr *
MI, MachineLoop *CurLoop);
233 bool HasHighOperandLatency(MachineInstr &
MI,
unsigned DefIdx,
Register Reg,
234 MachineLoop *CurLoop)
const;
236 bool IsCheapInstruction(MachineInstr &
MI)
const;
238 bool CanCauseHighRegPressure(
const SmallDenseMap<unsigned, int> &
Cost,
241 void UpdateBackTraceRegPressure(
const MachineInstr *
MI);
243 bool IsProfitableToHoist(MachineInstr &
MI, MachineLoop *CurLoop);
245 bool IsGuaranteedToExecute(MachineBasicBlock *BB, MachineLoop *CurLoop);
247 void EnterScope(MachineBasicBlock *
MBB);
249 void ExitScope(MachineBasicBlock *
MBB);
251 void ExitScopeIfDone(
253 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
254 const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
258 void InitRegPressure(MachineBasicBlock *BB);
260 SmallDenseMap<unsigned, int> calcRegisterCost(
const MachineInstr *
MI,
262 bool ConsiderUnseenAsDef);
264 void UpdateRegPressure(
const MachineInstr *
MI,
265 bool ConsiderUnseenAsDef =
false);
267 MachineInstr *ExtractHoistableLoad(MachineInstr *
MI, MachineLoop *CurLoop);
269 MachineInstr *LookForDuplicate(
const MachineInstr *
MI,
270 std::vector<MachineInstr *> &PrevMIs);
273 EliminateCSE(MachineInstr *
MI,
274 DenseMap<
unsigned, std::vector<MachineInstr *>>::iterator &CI);
276 bool MayCSE(MachineInstr *
MI);
278 unsigned Hoist(MachineInstr *
MI, MachineBasicBlock *Preheader,
279 MachineLoop *CurLoop);
281 void InitCSEMap(MachineBasicBlock *BB);
283 void InitializeLoadsHoistableLoops();
285 bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
286 MachineBasicBlock *TgtBlock);
287 MachineBasicBlock *getOrCreatePreheader(MachineLoop *CurLoop);
294 MachineLICMBase(
char &
ID,
bool PreRegAlloc)
295 : MachineFunctionPass(
ID), PreRegAlloc(PreRegAlloc) {}
297 bool runOnMachineFunction(MachineFunction &MF)
override;
299 void getAnalysisUsage(AnalysisUsage &AU)
const override {
302 AU.
addRequired<MachineBlockFrequencyInfoWrapperPass>();
310 class MachineLICM :
public MachineLICMBase {
313 MachineLICM() : MachineLICMBase(ID,
false) {
318 class EarlyMachineLICM :
public MachineLICMBase {
321 EarlyMachineLICM() : MachineLICMBase(ID,
true) {
329char EarlyMachineLICM::ID;
335 "Machine Loop Invariant Code Motion",
false,
false)
344 "Early Machine Loop Invariant Code Motion",
false,
false)
350 "Early Machine Loop Invariant Code Motion",
false,
false)
353 if (skipFunction(MF.getFunction()))
356 MachineLICMImpl Impl(PreRegAlloc,
this,
nullptr);
360#define GET_RESULT(RESULT, GETTER, INFIX) \
362 ? &LegacyPass->getAnalysis<RESULT##INFIX##WrapperPass>().GETTER() \
363 : &MFAM->getResult<RESULT##Analysis>(MF))
372 MachineDomTreeUpdater::UpdateStrategy::Lazy);
381 TII = ST.getInstrInfo();
382 TLI = ST.getTargetLowering();
383 TRI = ST.getRegisterInfo();
386 SchedModel.
init(&ST);
398 unsigned NumRPS =
TRI->getNumRegPressureSets();
399 RegPressure.resize(NumRPS);
402 for (
unsigned i = 0, e = NumRPS; i != e; ++i)
403 RegLimit[i] =
TRI->getRegPressureSetLimit(MF, i);
407 InitializeLoadsHoistableLoops();
410 while (!Worklist.
empty()) {
414 HoistRegionPostRA(CurLoop);
420 HoistOutOfLoop(
N, CurLoop);
436 if (
MI->memoperands_empty())
439 if (!
MemOp->isStore() || !
MemOp->getPseudoValue())
443 if (
Value->getFrameIndex() == FI)
484 const unsigned NumRegs =
TRI.getNumRegs();
485 const unsigned MaskWords = (NumRegs + 31) / 32;
486 for (
unsigned K = 0; K < MaskWords; ++K) {
488 for (
unsigned Bit = 0; Bit < 32; ++Bit) {
489 const unsigned PhysReg = (K * 32) + Bit;
490 if (PhysReg == NumRegs)
493 if (PhysReg && !((Word >> Bit) & 1)) {
494 for (MCRegUnit Unit :
TRI.regunits(PhysReg))
495 RUsFromRegsNotInMask.
set(
static_cast<unsigned>(Unit));
500 RUs |= RUsFromRegsNotInMask;
505void MachineLICMImpl::ProcessMI(MachineInstr *
MI, BitVector &RUDefs,
506 BitVector &RUClobbers,
507 SmallDenseSet<int> &StoredFIs,
508 SmallVectorImpl<CandidateInfo> &Candidates,
509 MachineLoop *CurLoop) {
510 bool RuledOut =
false;
511 bool HasNonInvariantUse =
false;
513 for (
const MachineOperand &MO :
MI->operands()) {
516 int FI = MO.getIndex();
517 if (!StoredFIs.
count(FI) &&
521 HasNonInvariantUse =
true;
527 if (MO.isRegMask()) {
540 if (!HasNonInvariantUse) {
541 for (MCRegUnit Unit :
TRI->regunits(
Reg)) {
544 if (RUDefs.
test(
static_cast<unsigned>(Unit)) ||
545 RUClobbers.
test(
static_cast<unsigned>(Unit))) {
546 HasNonInvariantUse =
true;
565 for (MCRegUnit Unit :
TRI->regunits(
Reg)) {
566 if (RUDefs.
test(
static_cast<unsigned>(Unit))) {
567 RUClobbers.
set(
static_cast<unsigned>(Unit));
569 }
else if (RUClobbers.
test(
static_cast<unsigned>(Unit))) {
575 RUDefs.
set(
static_cast<unsigned>(Unit));
581 if (Def && !RuledOut) {
582 int FI = std::numeric_limits<int>::min();
583 if ((!HasNonInvariantUse && IsLICMCandidate(*
MI, CurLoop)) ||
591void MachineLICMImpl::HoistRegionPostRA(MachineLoop *CurLoop) {
592 MachineBasicBlock *Preheader = getOrCreatePreheader(CurLoop);
596 unsigned NumRegUnits =
TRI->getNumRegUnits();
597 BitVector RUDefs(NumRegUnits);
598 BitVector RUClobbers(NumRegUnits);
601 SmallDenseSet<int> StoredFIs;
605 for (MachineBasicBlock *BB : CurLoop->
getBlocks()) {
609 if (
ML &&
ML->getHeader()->isEHPad())
continue;
614 for (
const auto &LI : BB->liveins()) {
615 for (MCRegUnit Unit :
TRI->regunits(LI.PhysReg))
616 RUDefs.
set(
static_cast<unsigned>(Unit));
620 if (
const uint32_t *Mask = BB->getBeginClobberMask(
TRI))
625 const MachineFunction &MF = *BB->getParent();
629 for (MCRegUnit Unit :
TRI->regunits(
Reg))
630 RUClobbers.
set(
static_cast<unsigned>(Unit));
632 for (MCRegUnit Unit :
TRI->regunits(
Reg))
633 RUClobbers.
set(
static_cast<unsigned>(Unit));
636 SpeculationState = SpeculateUnknown;
637 for (MachineInstr &
MI : *BB)
638 ProcessMI(&
MI, RUDefs, RUClobbers, StoredFIs, Candidates, CurLoop);
642 BitVector TermRUs(NumRegUnits);
644 if (TI != Preheader->
end()) {
645 for (
const MachineOperand &MO : TI->operands()) {
651 for (MCRegUnit Unit :
TRI->regunits(
Reg))
652 TermRUs.set(
static_cast<unsigned>(Unit));
664 for (CandidateInfo &Candidate : Candidates) {
665 if (Candidate.FI != std::numeric_limits<int>::min() &&
666 StoredFIs.
count(Candidate.FI))
671 for (MCRegUnit Unit :
TRI->regunits(Def)) {
672 if (RUClobbers.
test(
static_cast<unsigned>(Unit)) ||
673 TermRUs.test(
static_cast<unsigned>(Unit))) {
682 MachineInstr *
MI = Candidate.MI;
683 for (
const MachineOperand &MO :
MI->all_uses()) {
686 for (MCRegUnit Unit :
TRI->regunits(MO.getReg())) {
687 if (RUDefs.
test(
static_cast<unsigned>(Unit)) ||
688 RUClobbers.
test(
static_cast<unsigned>(Unit))) {
701 HoistPostRA(
MI, Candidate.Def, CurLoop);
707void MachineLICMImpl::AddToLiveIns(MCRegister
Reg, MachineLoop *CurLoop) {
708 for (MachineBasicBlock *BB : CurLoop->
getBlocks()) {
709 if (!BB->isLiveIn(
Reg))
711 for (MachineInstr &
MI : *BB) {
712 for (MachineOperand &MO :
MI.all_uses()) {
715 if (
TRI->regsOverlap(
Reg, MO.getReg()))
724void MachineLICMImpl::HoistPostRA(MachineInstr *
MI,
Register Def,
725 MachineLoop *CurLoop) {
735 MachineBasicBlock *
MBB =
MI->getParent();
741 assert(!
MI->isDebugInstr() &&
"Should not hoist debug inst");
747 AddToLiveIns(Def, CurLoop);
755bool MachineLICMImpl::IsGuaranteedToExecute(MachineBasicBlock *BB,
756 MachineLoop *CurLoop) {
757 if (SpeculationState != SpeculateUnknown)
758 return SpeculationState == SpeculateFalse;
762 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
764 for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
766 SpeculationState = SpeculateTrue;
771 SpeculationState = SpeculateFalse;
775void MachineLICMImpl::EnterScope(MachineBasicBlock *
MBB) {
782void MachineLICMImpl::ExitScope(MachineBasicBlock *
MBB) {
790void MachineLICMImpl::ExitScopeIfDone(
792 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
793 const DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap) {
794 if (OpenChildren[Node])
798 ExitScope(
Node->getBlock());
801 if (!Parent || --OpenChildren[Parent] != 0)
812 MachineLoop *CurLoop) {
813 MachineBasicBlock *Preheader = getOrCreatePreheader(CurLoop);
819 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
820 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
824 while (!WorkList.
empty()) {
826 assert(Node &&
"Null dominator tree node?");
827 MachineBasicBlock *BB =
Node->getBlock();
832 if (
ML &&
ML->getHeader()->isEHPad())
840 unsigned NumChildren =
Node->getNumChildren();
848 OpenChildren[
Node] = NumChildren;
854 ParentMap[Child] =
Node;
866 InitRegPressure(Preheader);
870 MachineBasicBlock *
MBB =
Node->getBlock();
875 SpeculationState = SpeculateUnknown;
877 unsigned HoistRes = HoistResult::NotHoisted;
878 HoistRes = Hoist(&
MI, Preheader, CurLoop);
879 if (HoistRes & HoistResult::NotHoisted) {
883 for (MachineLoop *L = MLI->
getLoopFor(
MI.getParent()); L != CurLoop;
884 L =
L->getParentLoop())
887 while (!InnerLoopWorkList.
empty()) {
888 MachineLoop *InnerLoop = InnerLoopWorkList.
pop_back_val();
890 if (InnerLoopPreheader) {
891 HoistRes = Hoist(&
MI, InnerLoopPreheader, InnerLoop);
892 if (HoistRes & HoistResult::Hoisted)
898 if (HoistRes & HoistResult::ErasedMI)
901 UpdateRegPressure(&
MI);
905 ExitScopeIfDone(Node, OpenChildren, ParentMap);
916void MachineLICMImpl::InitRegPressure(MachineBasicBlock *BB) {
924 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
930 for (
const MachineInstr &
MI : *BB)
931 UpdateRegPressure(&
MI,
true);
935void MachineLICMImpl::UpdateRegPressure(
const MachineInstr *
MI,
936 bool ConsiderUnseenAsDef) {
937 auto Cost = calcRegisterCost(
MI,
true, ConsiderUnseenAsDef);
938 for (
const auto &[Class, Weight] :
Cost) {
939 if (
static_cast<int>(RegPressure[Class]) < -Weight)
952SmallDenseMap<unsigned, int>
953MachineLICMImpl::calcRegisterCost(
const MachineInstr *
MI,
bool ConsiderSeen,
954 bool ConsiderUnseenAsDef) {
955 SmallDenseMap<unsigned, int>
Cost;
956 if (
MI->isImplicitDef())
958 for (
unsigned i = 0, e =
MI->getDesc().getNumOperands(); i != e; ++i) {
959 const MachineOperand &MO =
MI->getOperand(i);
967 bool isNew = ConsiderSeen ? RegSeen.
insert(
Reg).second :
false;
968 const TargetRegisterClass *RC =
MRI->getRegClass(
Reg);
970 RegClassWeight
W =
TRI->getRegClassWeight(RC);
973 RCCost =
W.RegWeight;
976 if (isNew && !isKill && ConsiderUnseenAsDef)
978 RCCost =
W.RegWeight;
979 else if (!isNew && isKill)
980 RCCost = -
W.RegWeight;
984 const int *PS =
TRI->getRegClassPressureSets(RC);
985 for (; *PS != -1; ++PS)
994 assert(
MI.mayLoad() &&
"Expected MI that loads!");
998 if (
MI.memoperands_empty())
1003 if (PSV->isGOT() || PSV->isConstantPool())
1020 bool FoundCallerPresReg =
false;
1021 if (!
MI.mayStore() ||
MI.hasUnmodeledSideEffects() ||
1022 (
MI.getNumOperands() == 0))
1031 if (
Reg.isVirtual())
1033 if (
Reg.isVirtual())
1035 if (!
TRI->isCallerPreservedPhysReg(
Reg.asMCReg(), *
MI.getMF()))
1038 FoundCallerPresReg =
true;
1039 }
else if (!MO.
isImm()) {
1043 return FoundCallerPresReg;
1061 Register CopySrcReg =
MI.getOperand(1).getReg();
1065 if (!
TRI->isCallerPreservedPhysReg(CopySrcReg.
asMCReg(), *MF))
1068 Register CopyDstReg =
MI.getOperand(0).getReg();
1081bool MachineLICMImpl::IsLICMCandidate(MachineInstr &
I, MachineLoop *CurLoop) {
1083 bool DontMoveAcrossStore = !
HoistConstLoads || !AllowedToHoistLoads[CurLoop];
1084 if ((!
I.isSafeToMove(DontMoveAcrossStore)) &&
1097 !IsGuaranteedToExecute(
I.getParent(), CurLoop)) {
1106 if (
I.isConvergent())
1109 if (!
TII->shouldHoist(
I, CurLoop))
1116bool MachineLICMImpl::IsLoopInvariantInst(MachineInstr &
I,
1117 MachineLoop *CurLoop) {
1118 if (!IsLICMCandidate(
I, CurLoop)) {
1119 LLVM_DEBUG(
dbgs() <<
"LICM: Instruction not a LICM candidate\n");
1127bool MachineLICMImpl::HasLoopPHIUse(
const MachineInstr *
MI,
1128 MachineLoop *CurLoop) {
1131 MI = Work.pop_back_val();
1132 for (
const MachineOperand &MO :
MI->all_defs()) {
1136 for (MachineInstr &
UseMI :
MRI->use_instructions(
Reg)) {
1138 if (
UseMI.isPHI()) {
1152 Work.push_back(&
UseMI);
1155 }
while (!Work.empty());
1161bool MachineLICMImpl::HasHighOperandLatency(MachineInstr &
MI,
unsigned DefIdx,
1163 MachineLoop *CurLoop)
const {
1164 if (
MRI->use_nodbg_empty(
Reg))
1167 for (MachineInstr &
UseMI :
MRI->use_nodbg_instructions(
Reg)) {
1168 if (
UseMI.isCopyLike())
1172 for (
unsigned i = 0, e =
UseMI.getNumOperands(); i != e; ++i) {
1173 const MachineOperand &MO =
UseMI.getOperand(i);
1180 if (
TII->hasHighOperandLatency(SchedModel,
MRI,
MI, DefIdx,
UseMI, i))
1193bool MachineLICMImpl::IsCheapInstruction(MachineInstr &
MI)
const {
1197 bool isCheap =
false;
1198 unsigned NumDefs =
MI.getDesc().getNumDefs();
1199 for (
unsigned i = 0, e =
MI.getNumOperands(); NumDefs && i != e; ++i) {
1200 MachineOperand &DefMO =
MI.getOperand(i);
1208 if (!
TII->hasLowDefLatency(SchedModel,
MI, i))
1218bool MachineLICMImpl::CanCauseHighRegPressure(
1219 const SmallDenseMap<unsigned, int> &
Cost,
bool CheapInstr) {
1220 for (
const auto &[Class, Weight] :
Cost) {
1224 int Limit = RegLimit[
Class];
1231 for (
const auto &RP : BackTrace)
1232 if (
static_cast<int>(RP[Class]) + Weight >= Limit)
1242void MachineLICMImpl::UpdateBackTraceRegPressure(
const MachineInstr *
MI) {
1245 auto Cost = calcRegisterCost(
MI,
false,
1249 for (
auto &RP : BackTrace)
1250 for (
const auto &[Class, Weight] :
Cost)
1256bool MachineLICMImpl::IsProfitableToHoist(MachineInstr &
MI,
1257 MachineLoop *CurLoop) {
1258 if (
MI.isImplicitDef())
1276 bool CheapInstr = IsCheapInstruction(
MI);
1277 bool CreatesCopy = HasLoopPHIUse(&
MI, CurLoop);
1280 if (CheapInstr && CreatesCopy) {
1287 if (
TII->isTriviallyReMaterializable(
MI))
1292 for (
unsigned i = 0, e =
MI.getDesc().getNumOperands(); i != e; ++i) {
1293 const MachineOperand &MO =
MI.getOperand(i);
1299 if (MO.
isDef() && HasHighOperandLatency(
MI, i,
Reg, CurLoop)) {
1312 auto Cost = calcRegisterCost(&
MI,
false,
1317 if (!CanCauseHighRegPressure(
Cost, CheapInstr)) {
1333 (!IsGuaranteedToExecute(
MI.getParent(), CurLoop) && !MayCSE(&
MI))) {
1341 if (
MI.isCopy() ||
MI.isRegSequence()) {
1345 [
this](
const MachineOperand &UseOp) {
1346 return !UseOp.isReg() || UseOp.getReg().isVirtual() ||
1347 MRI->isConstantPhysReg(UseOp.getReg());
1349 IsLoopInvariantInst(
MI, CurLoop) &&
1350 any_of(
MRI->use_nodbg_instructions(DefReg),
1351 [&CurLoop,
this, DefReg,
1353 if (!CurLoop->contains(&UseMI))
1360 if (CanCauseHighRegPressure(Cost, false) &&
1361 !CurLoop->isLoopInvariant(UseMI, DefReg))
1371 if (!
TII->isTriviallyReMaterializable(
MI) &&
1372 !
MI.isDereferenceableInvariantLoad()) {
1383MachineInstr *MachineLICMImpl::ExtractHoistableLoad(MachineInstr *
MI,
1384 MachineLoop *CurLoop) {
1386 if (
MI->canFoldAsLoad())
1392 if (!
MI->isDereferenceableInvariantLoad())
1396 unsigned LoadRegIndex;
1398 TII->getOpcodeAfterMemoryUnfold(
MI->getOpcode(),
1402 if (NewOpc == 0)
return nullptr;
1403 const MCInstrDesc &MID =
TII->get(NewOpc);
1404 MachineFunction &MF = *
MI->getMF();
1405 const TargetRegisterClass *RC =
TII->getRegClass(MID, LoadRegIndex);
1409 SmallVector<MachineInstr *, 2> NewMIs;
1415 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1418 "Unfolded a load into multiple instructions!");
1419 MachineBasicBlock *
MBB =
MI->getParent();
1425 if (!IsLoopInvariantInst(*NewMIs[0], CurLoop) ||
1426 !IsProfitableToHoist(*NewMIs[0], CurLoop)) {
1427 NewMIs[0]->eraseFromParent();
1428 NewMIs[1]->eraseFromParent();
1433 UpdateRegPressure(NewMIs[1]);
1438 if (
MI->shouldUpdateAdditionalCallInfo())
1441 MI->eraseFromParent();
1448void MachineLICMImpl::InitCSEMap(MachineBasicBlock *BB) {
1449 for (MachineInstr &
MI : *BB)
1455void MachineLICMImpl::InitializeLoadsHoistableLoops() {
1461 while (!Worklist.empty()) {
1462 auto *
L = Worklist.pop_back_val();
1463 AllowedToHoistLoads[
L] =
true;
1475 for (
auto *Loop :
reverse(LoopsInPreOrder)) {
1476 for (
auto *
MBB : Loop->blocks()) {
1478 if (!AllowedToHoistLoads[Loop])
1480 for (
auto &
MI : *
MBB) {
1481 if (!
MI.isLoadFoldBarrier() && !
MI.mayStore() && !
MI.isCall() &&
1482 !(
MI.mayLoad() &&
MI.hasOrderedMemoryRef()))
1484 for (MachineLoop *L = Loop;
L !=
nullptr;
L =
L->getParentLoop())
1485 AllowedToHoistLoads[
L] =
false;
1495MachineLICMImpl::LookForDuplicate(
const MachineInstr *
MI,
1496 std::vector<MachineInstr *> &PrevMIs) {
1497 for (MachineInstr *PrevMI : PrevMIs)
1498 if (
TII->produceSameValue(*
MI, *PrevMI, (PreRegAlloc ?
MRI :
nullptr)))
1508bool MachineLICMImpl::EliminateCSE(
1510 DenseMap<
unsigned, std::vector<MachineInstr *>>::iterator &CI) {
1513 if (
MI->isImplicitDef())
1518 if (
MI->mayLoad() && !
MI->isDereferenceableInvariantLoad())
1521 if (MachineInstr *Dup = LookForDuplicate(
MI, CI->second)) {
1526 SmallVector<unsigned, 2> Defs;
1527 for (
unsigned i = 0, e =
MI->getNumOperands(); i != e; ++i) {
1528 const MachineOperand &MO =
MI->getOperand(i);
1532 MO.
getReg() == Dup->getOperand(i).getReg()) &&
1533 "Instructions with different phys regs are not identical!");
1540 for (
unsigned i = 0, e = Defs.
size(); i != e; ++i) {
1541 unsigned Idx = Defs[i];
1543 Register DupReg = Dup->getOperand(Idx).getReg();
1546 if (!
MRI->constrainRegClass(DupReg,
MRI->getRegClass(
Reg))) {
1548 for (
unsigned j = 0;
j != i; ++
j)
1549 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
1554 for (
unsigned Idx : Defs) {
1556 Register DupReg = Dup->getOperand(Idx).getReg();
1557 MRI->replaceRegWith(
Reg, DupReg);
1558 MRI->clearKillFlags(DupReg);
1560 if (!
MRI->use_nodbg_empty(DupReg))
1561 Dup->getOperand(Idx).setIsDead(
false);
1564 MI->eraseFromParent();
1573bool MachineLICMImpl::MayCSE(MachineInstr *
MI) {
1574 if (
MI->mayLoad() && !
MI->isDereferenceableInvariantLoad())
1577 unsigned Opcode =
MI->getOpcode();
1578 for (
auto &Map : CSEMap) {
1581 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1582 Map.second.find(Opcode);
1585 if (CI ==
Map.second.end() ||
MI->isImplicitDef())
1587 if (LookForDuplicate(
MI, CI->second) !=
nullptr)
1598unsigned MachineLICMImpl::Hoist(MachineInstr *
MI, MachineBasicBlock *Preheader,
1599 MachineLoop *CurLoop) {
1600 MachineBasicBlock *SrcBlock =
MI->getParent();
1605 isTgtHotterThanSrc(SrcBlock, Preheader)) {
1606 ++NumNotHoistedDueToHotness;
1607 return HoistResult::NotHoisted;
1610 bool HasExtractHoistableLoad =
false;
1611 if (!IsLoopInvariantInst(*
MI, CurLoop) ||
1612 !IsProfitableToHoist(*
MI, CurLoop)) {
1614 MI = ExtractHoistableLoad(
MI, CurLoop);
1616 return HoistResult::NotHoisted;
1617 HasExtractHoistableLoad =
true;
1628 dbgs() <<
"Hoisting " << *
MI;
1629 if (
MI->getParent()->getBasicBlock())
1639 InitCSEMap(Preheader);
1640 FirstInLoop =
false;
1644 unsigned Opcode =
MI->getOpcode();
1645 bool HasCSEDone =
false;
1646 for (
auto &Map : CSEMap) {
1649 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator CI =
1650 Map.second.find(Opcode);
1651 if (CI !=
Map.second.end()) {
1652 if (EliminateCSE(
MI, CI)) {
1667 assert(!
MI->isDebugInstr() &&
"Should not hoist debug inst");
1671 UpdateBackTraceRegPressure(
MI);
1676 for (MachineOperand &MO :
MI->all_defs())
1680 CSEMap[Preheader][Opcode].push_back(
MI);
1686 if (HasCSEDone || HasExtractHoistableLoad)
1687 return HoistResult::Hoisted | HoistResult::ErasedMI;
1688 return HoistResult::Hoisted;
1692MachineBasicBlock *MachineLICMImpl::getOrCreatePreheader(MachineLoop *CurLoop) {
1701 MachineBasicBlock *NewPreheader = Pred->SplitCriticalEdge(
1702 CurLoop->
getHeader(), LegacyPass, MFAM,
nullptr, MDTU);
1705 return NewPreheader;
1713bool MachineLICMImpl::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
1714 MachineBasicBlock *TgtBlock) {
1723 double Ratio = (double)DstBF / SrcBF;
1729template <
typename DerivedT,
bool PreRegAlloc>
1732 bool Changed = MachineLICMImpl(PreRegAlloc,
nullptr, &MFAM).run(MF);