LLVM 18.0.0git
MachineLICM.cpp
Go to the documentation of this file.
1//===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs loop invariant code motion on machine instructions. We
10// attempt to remove as much code from the body of a loop as possible.
11//
12// This pass is not intended to be a replacement or a complete alternative
13// for the LLVM-IR-level LICM pass. It is only designed to hoist simple
14// constructs that are not exposed before lowering and instruction selection.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/BitVector.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
42#include "llvm/IR/DebugLoc.h"
44#include "llvm/MC/MCInstrDesc.h"
45#include "llvm/MC/MCRegister.h"
47#include "llvm/Pass.h"
50#include "llvm/Support/Debug.h"
52#include <algorithm>
53#include <cassert>
54#include <limits>
55#include <vector>
56
57using namespace llvm;
58
59#define DEBUG_TYPE "machinelicm"
60
61static cl::opt<bool>
62AvoidSpeculation("avoid-speculation",
63 cl::desc("MachineLICM should avoid speculation"),
64 cl::init(true), cl::Hidden);
65
66static cl::opt<bool>
67HoistCheapInsts("hoist-cheap-insts",
68 cl::desc("MachineLICM should hoist even cheap instructions"),
69 cl::init(false), cl::Hidden);
70
71static cl::opt<bool>
72HoistConstStores("hoist-const-stores",
73 cl::desc("Hoist invariant stores"),
74 cl::init(true), cl::Hidden);
75// The default threshold of 100 (i.e. if target block is 100 times hotter)
76// is based on empirical data on a single target and is subject to tuning.
78BlockFrequencyRatioThreshold("block-freq-ratio-threshold",
79 cl::desc("Do not hoist instructions if target"
80 "block is N times hotter than the source."),
81 cl::init(100), cl::Hidden);
82
83enum class UseBFI { None, PGO, All };
84
85static cl::opt<UseBFI>
86DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks",
87 cl::desc("Disable hoisting instructions to"
88 " hotter blocks"),
91 "disable the feature"),
93 "enable the feature when using profile data"),
95 "enable the feature with/wo profile data")));
96
97STATISTIC(NumHoisted,
98 "Number of machine instructions hoisted out of loops");
99STATISTIC(NumLowRP,
100 "Number of instructions hoisted in low reg pressure situation");
101STATISTIC(NumHighLatency,
102 "Number of high latency instructions hoisted");
103STATISTIC(NumCSEed,
104 "Number of hoisted machine instructions CSEed");
105STATISTIC(NumPostRAHoisted,
106 "Number of machine instructions hoisted out of loops post regalloc");
107STATISTIC(NumStoreConst,
108 "Number of stores of const phys reg hoisted out of loops");
109STATISTIC(NumNotHoistedDueToHotness,
110 "Number of instructions not hoisted due to block frequency");
111
112namespace {
113
114 class MachineLICMBase : public MachineFunctionPass {
115 const TargetInstrInfo *TII = nullptr;
116 const TargetLoweringBase *TLI = nullptr;
117 const TargetRegisterInfo *TRI = nullptr;
118 const MachineFrameInfo *MFI = nullptr;
119 MachineRegisterInfo *MRI = nullptr;
120 TargetSchedModel SchedModel;
121 bool PreRegAlloc = false;
122 bool HasProfileData = false;
123
124 // Various analyses that we use...
125 AliasAnalysis *AA = nullptr; // Alias analysis info.
126 MachineBlockFrequencyInfo *MBFI = nullptr; // Machine block frequncy info
127 MachineLoopInfo *MLI = nullptr; // Current MachineLoopInfo
128 MachineDominatorTree *DT = nullptr; // Machine dominator tree for the cur loop
129
130 // State that is updated as we process loops
131 bool Changed = false; // True if a loop is changed.
132 bool FirstInLoop = false; // True if it's the first LICM in the loop.
133 MachineLoop *CurLoop = nullptr; // The current loop we are working on.
134 MachineBasicBlock *CurPreheader = nullptr; // The preheader for CurLoop.
135
136 // Exit blocks for CurLoop.
138
139 bool isExitBlock(const MachineBasicBlock *MBB) const {
140 return is_contained(ExitBlocks, MBB);
141 }
142
143 // Track 'estimated' register pressure.
146
147 // Register pressure "limit" per register pressure set. If the pressure
148 // is higher than the limit, then it's considered high.
150
151 // Register pressure on path leading from loop preheader to current BB.
153
154 // For each opcode, keep a list of potential CSE instructions.
156
157 enum {
158 SpeculateFalse = 0,
159 SpeculateTrue = 1,
160 SpeculateUnknown = 2
161 };
162
163 // If a MBB does not dominate loop exiting blocks then it may not safe
164 // to hoist loads from this block.
165 // Tri-state: 0 - false, 1 - true, 2 - unknown
166 unsigned SpeculationState = SpeculateUnknown;
167
168 public:
169 MachineLICMBase(char &PassID, bool PreRegAlloc)
170 : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {}
171
172 bool runOnMachineFunction(MachineFunction &MF) override;
173
174 void getAnalysisUsage(AnalysisUsage &AU) const override {
176 if (DisableHoistingToHotterBlocks != UseBFI::None)
182 }
183
184 void releaseMemory() override {
185 RegSeen.clear();
186 RegPressure.clear();
187 RegLimit.clear();
188 BackTrace.clear();
189 CSEMap.clear();
190 }
191
192 private:
193 /// Keep track of information about hoisting candidates.
194 struct CandidateInfo {
196 unsigned Def;
197 int FI;
198
199 CandidateInfo(MachineInstr *mi, unsigned def, int fi)
200 : MI(mi), Def(def), FI(fi) {}
201 };
202
203 void HoistRegionPostRA();
204
205 void HoistPostRA(MachineInstr *MI, unsigned Def);
206
207 void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
208 BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs,
210
211 void AddToLiveIns(MCRegister Reg);
212
213 bool IsLICMCandidate(MachineInstr &I);
214
215 bool IsLoopInvariantInst(MachineInstr &I);
216
217 bool HasLoopPHIUse(const MachineInstr *MI) const;
218
219 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
220 Register Reg) const;
221
222 bool IsCheapInstruction(MachineInstr &MI) const;
223
224 bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
225 bool Cheap);
226
227 void UpdateBackTraceRegPressure(const MachineInstr *MI);
228
229 bool IsProfitableToHoist(MachineInstr &MI);
230
231 bool IsGuaranteedToExecute(MachineBasicBlock *BB);
232
233 bool isTriviallyReMaterializable(const MachineInstr &MI) const;
234
235 void EnterScope(MachineBasicBlock *MBB);
236
237 void ExitScope(MachineBasicBlock *MBB);
238
239 void ExitScopeIfDone(
243
244 void HoistOutOfLoop(MachineDomTreeNode *HeaderN);
245
246 void InitRegPressure(MachineBasicBlock *BB);
247
248 DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
249 bool ConsiderSeen,
250 bool ConsiderUnseenAsDef);
251
252 void UpdateRegPressure(const MachineInstr *MI,
253 bool ConsiderUnseenAsDef = false);
254
255 MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
256
257 MachineInstr *LookForDuplicate(const MachineInstr *MI,
258 std::vector<MachineInstr *> &PrevMIs);
259
260 bool
261 EliminateCSE(MachineInstr *MI,
262 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI);
263
264 bool MayCSE(MachineInstr *MI);
265
266 bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
267
268 void InitCSEMap(MachineBasicBlock *BB);
269
270 bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
271 MachineBasicBlock *TgtBlock);
272 MachineBasicBlock *getCurPreheader();
273 };
274
275 class MachineLICM : public MachineLICMBase {
276 public:
277 static char ID;
278 MachineLICM() : MachineLICMBase(ID, false) {
280 }
281 };
282
283 class EarlyMachineLICM : public MachineLICMBase {
284 public:
285 static char ID;
286 EarlyMachineLICM() : MachineLICMBase(ID, true) {
288 }
289 };
290
291} // end anonymous namespace
292
293char MachineLICM::ID;
294char EarlyMachineLICM::ID;
295
296char &llvm::MachineLICMID = MachineLICM::ID;
297char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID;
298
300 "Machine Loop Invariant Code Motion", false, false)
306 "Machine Loop Invariant Code Motion", false, false)
307
308INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm",
309 "Early Machine Loop Invariant Code Motion", false, false)
314INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
315 "Early Machine Loop Invariant Code Motion", false, false)
316
317/// Test if the given loop is the outer-most loop that has a unique predecessor.
319 // Check whether this loop even has a unique predecessor.
320 if (!CurLoop->getLoopPredecessor())
321 return false;
322 // Ok, now check to see if any of its outer loops do.
323 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
324 if (L->getLoopPredecessor())
325 return false;
326 // None of them did, so this is the outermost with a unique predecessor.
327 return true;
328}
329
330bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
331 if (skipFunction(MF.getFunction()))
332 return false;
333
334 Changed = FirstInLoop = false;
335 const TargetSubtargetInfo &ST = MF.getSubtarget();
336 TII = ST.getInstrInfo();
337 TLI = ST.getTargetLowering();
338 TRI = ST.getRegisterInfo();
339 MFI = &MF.getFrameInfo();
340 MRI = &MF.getRegInfo();
341 SchedModel.init(&ST);
342
343 PreRegAlloc = MRI->isSSA();
344 HasProfileData = MF.getFunction().hasProfileData();
345
346 if (PreRegAlloc)
347 LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
348 else
349 LLVM_DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
350 LLVM_DEBUG(dbgs() << MF.getName() << " ********\n");
351
352 if (PreRegAlloc) {
353 // Estimate register pressure during pre-regalloc pass.
354 unsigned NumRPS = TRI->getNumRegPressureSets();
355 RegPressure.resize(NumRPS);
356 std::fill(RegPressure.begin(), RegPressure.end(), 0);
357 RegLimit.resize(NumRPS);
358 for (unsigned i = 0, e = NumRPS; i != e; ++i)
359 RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
360 }
361
362 // Get our Loop information...
364 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
365 MLI = &getAnalysis<MachineLoopInfo>();
366 DT = &getAnalysis<MachineDominatorTree>();
367 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
368
369 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
370 while (!Worklist.empty()) {
371 CurLoop = Worklist.pop_back_val();
372 CurPreheader = nullptr;
373 ExitBlocks.clear();
374
375 // If this is done before regalloc, only visit outer-most preheader-sporting
376 // loops.
377 if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
378 Worklist.append(CurLoop->begin(), CurLoop->end());
379 continue;
380 }
381
382 CurLoop->getExitBlocks(ExitBlocks);
383
384 if (!PreRegAlloc)
385 HoistRegionPostRA();
386 else {
387 // CSEMap is initialized for loop header when the first instruction is
388 // being hoisted.
389 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
390 FirstInLoop = true;
391 HoistOutOfLoop(N);
392 CSEMap.clear();
393 }
394 }
395
396 return Changed;
397}
398
399/// Return true if instruction stores to the specified frame.
400static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
401 // Check mayStore before memory operands so that e.g. DBG_VALUEs will return
402 // true since they have no memory operands.
403 if (!MI->mayStore())
404 return false;
405 // If we lost memory operands, conservatively assume that the instruction
406 // writes to all slots.
407 if (MI->memoperands_empty())
408 return true;
409 for (const MachineMemOperand *MemOp : MI->memoperands()) {
410 if (!MemOp->isStore() || !MemOp->getPseudoValue())
411 continue;
413 dyn_cast<FixedStackPseudoSourceValue>(MemOp->getPseudoValue())) {
414 if (Value->getFrameIndex() == FI)
415 return true;
416 }
417 }
418 return false;
419}
420
421/// Examine the instruction for potentai LICM candidate. Also
422/// gather register def and frame object update information.
423void MachineLICMBase::ProcessMI(MachineInstr *MI,
424 BitVector &PhysRegDefs,
425 BitVector &PhysRegClobbers,
426 SmallSet<int, 32> &StoredFIs,
427 SmallVectorImpl<CandidateInfo> &Candidates) {
428 bool RuledOut = false;
429 bool HasNonInvariantUse = false;
430 unsigned Def = 0;
431 for (const MachineOperand &MO : MI->operands()) {
432 if (MO.isFI()) {
433 // Remember if the instruction stores to the frame index.
434 int FI = MO.getIndex();
435 if (!StoredFIs.count(FI) &&
436 MFI->isSpillSlotObjectIndex(FI) &&
438 StoredFIs.insert(FI);
439 HasNonInvariantUse = true;
440 continue;
441 }
442
443 // We can't hoist an instruction defining a physreg that is clobbered in
444 // the loop.
445 if (MO.isRegMask()) {
446 PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
447 continue;
448 }
449
450 if (!MO.isReg())
451 continue;
452 Register Reg = MO.getReg();
453 if (!Reg)
454 continue;
455 assert(Reg.isPhysical() && "Not expecting virtual register!");
456
457 if (!MO.isDef()) {
458 if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
459 // If it's using a non-loop-invariant register, then it's obviously not
460 // safe to hoist.
461 HasNonInvariantUse = true;
462 continue;
463 }
464
465 if (MO.isImplicit()) {
466 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
467 PhysRegClobbers.set(*AI);
468 if (!MO.isDead())
469 // Non-dead implicit def? This cannot be hoisted.
470 RuledOut = true;
471 // No need to check if a dead implicit def is also defined by
472 // another instruction.
473 continue;
474 }
475
476 // FIXME: For now, avoid instructions with multiple defs, unless
477 // it's a dead implicit def.
478 if (Def)
479 RuledOut = true;
480 else
481 Def = Reg;
482
483 // If we have already seen another instruction that defines the same
484 // register, then this is not safe. Two defs is indicated by setting a
485 // PhysRegClobbers bit.
486 for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
487 if (PhysRegDefs.test(*AS))
488 PhysRegClobbers.set(*AS);
489 }
490 // Need a second loop because MCRegAliasIterator can visit the same
491 // register twice.
492 for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS)
493 PhysRegDefs.set(*AS);
494
495 if (PhysRegClobbers.test(Reg))
496 // MI defined register is seen defined by another instruction in
497 // the loop, it cannot be a LICM candidate.
498 RuledOut = true;
499 }
500
501 // Only consider reloads for now and remats which do not have register
502 // operands. FIXME: Consider unfold load folding instructions.
503 if (Def && !RuledOut) {
504 int FI = std::numeric_limits<int>::min();
505 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
507 Candidates.push_back(CandidateInfo(MI, Def, FI));
508 }
509}
510
511/// Walk the specified region of the CFG and hoist loop invariants out to the
512/// preheader.
513void MachineLICMBase::HoistRegionPostRA() {
514 MachineBasicBlock *Preheader = getCurPreheader();
515 if (!Preheader)
516 return;
517
518 unsigned NumRegs = TRI->getNumRegs();
519 BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
520 BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
521
523 SmallSet<int, 32> StoredFIs;
524
525 // Walk the entire region, count number of defs for each register, and
526 // collect potential LICM candidates.
527 for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
528 // If the header of the loop containing this basic block is a landing pad,
529 // then don't try to hoist instructions out of this loop.
530 const MachineLoop *ML = MLI->getLoopFor(BB);
531 if (ML && ML->getHeader()->isEHPad()) continue;
532
533 // Conservatively treat live-in's as an external def.
534 // FIXME: That means a reload that're reused in successor block(s) will not
535 // be LICM'ed.
536 for (const auto &LI : BB->liveins()) {
537 for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI)
538 PhysRegDefs.set(*AI);
539 }
540
541 // Funclet entry blocks will clobber all registers
542 if (const uint32_t *Mask = BB->getBeginClobberMask(TRI))
543 PhysRegClobbers.setBitsNotInMask(Mask);
544
545 SpeculationState = SpeculateUnknown;
546 for (MachineInstr &MI : *BB)
547 ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
548 }
549
550 // Gather the registers read / clobbered by the terminator.
551 BitVector TermRegs(NumRegs);
553 if (TI != Preheader->end()) {
554 for (const MachineOperand &MO : TI->operands()) {
555 if (!MO.isReg())
556 continue;
557 Register Reg = MO.getReg();
558 if (!Reg)
559 continue;
560 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
561 TermRegs.set(*AI);
562 }
563 }
564
565 // Now evaluate whether the potential candidates qualify.
566 // 1. Check if the candidate defined register is defined by another
567 // instruction in the loop.
568 // 2. If the candidate is a load from stack slot (always true for now),
569 // check if the slot is stored anywhere in the loop.
570 // 3. Make sure candidate def should not clobber
571 // registers read by the terminator. Similarly its def should not be
572 // clobbered by the terminator.
573 for (CandidateInfo &Candidate : Candidates) {
574 if (Candidate.FI != std::numeric_limits<int>::min() &&
575 StoredFIs.count(Candidate.FI))
576 continue;
577
578 unsigned Def = Candidate.Def;
579 if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
580 bool Safe = true;
581 MachineInstr *MI = Candidate.MI;
582 for (const MachineOperand &MO : MI->all_uses()) {
583 if (!MO.getReg())
584 continue;
585 Register Reg = MO.getReg();
586 if (PhysRegDefs.test(Reg) ||
587 PhysRegClobbers.test(Reg)) {
588 // If it's using a non-loop-invariant register, then it's obviously
589 // not safe to hoist.
590 Safe = false;
591 break;
592 }
593 }
594 if (Safe)
595 HoistPostRA(MI, Candidate.Def);
596 }
597 }
598}
599
600/// Add register 'Reg' to the livein sets of BBs in the current loop, and make
601/// sure it is not killed by any instructions in the loop.
602void MachineLICMBase::AddToLiveIns(MCRegister Reg) {
603 for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
604 if (!BB->isLiveIn(Reg))
605 BB->addLiveIn(Reg);
606 for (MachineInstr &MI : *BB) {
607 for (MachineOperand &MO : MI.all_uses()) {
608 if (!MO.getReg())
609 continue;
610 if (TRI->isSuperRegisterEq(Reg, MO.getReg()))
611 MO.setIsKill(false);
612 }
613 }
614 }
615}
616
617/// When an instruction is found to only use loop invariant operands that is
618/// safe to hoist, this instruction is called to do the dirty work.
619void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) {
620 MachineBasicBlock *Preheader = getCurPreheader();
621
622 // Now move the instructions to the predecessor, inserting it before any
623 // terminator instructions.
624 LLVM_DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader)
625 << " from " << printMBBReference(*MI->getParent()) << ": "
626 << *MI);
627
628 // Splice the instruction to the preheader.
629 MachineBasicBlock *MBB = MI->getParent();
630 Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
631
632 // Since we are moving the instruction out of its basic block, we do not
633 // retain its debug location. Doing so would degrade the debugging
634 // experience and adversely affect the accuracy of profiling information.
635 assert(!MI->isDebugInstr() && "Should not hoist debug inst");
636 MI->setDebugLoc(DebugLoc());
637
638 // Add register to livein list to all the BBs in the current loop since a
639 // loop invariant must be kept live throughout the whole loop. This is
640 // important to ensure later passes do not scavenge the def register.
641 AddToLiveIns(Def);
642
643 ++NumPostRAHoisted;
644 Changed = true;
645}
646
647/// Check if this mbb is guaranteed to execute. If not then a load from this mbb
648/// may not be safe to hoist.
649bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) {
650 if (SpeculationState != SpeculateUnknown)
651 return SpeculationState == SpeculateFalse;
652
653 if (BB != CurLoop->getHeader()) {
654 // Check loop exiting blocks.
655 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
656 CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
657 for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
658 if (!DT->dominates(BB, CurrentLoopExitingBlock)) {
659 SpeculationState = SpeculateTrue;
660 return false;
661 }
662 }
663
664 SpeculationState = SpeculateFalse;
665 return true;
666}
667
668/// Check if \p MI is trivially remateralizable and if it does not have any
669/// virtual register uses. Even though rematerializable RA might not actually
670/// rematerialize it in this scenario. In that case we do not want to hoist such
671/// instruction out of the loop in a belief RA will sink it back if needed.
672bool MachineLICMBase::isTriviallyReMaterializable(
673 const MachineInstr &MI) const {
674 if (!TII->isTriviallyReMaterializable(MI))
675 return false;
676
677 for (const MachineOperand &MO : MI.all_uses()) {
678 if (MO.getReg().isVirtual())
679 return false;
680 }
681
682 return true;
683}
684
685void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) {
686 LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');
687
688 // Remember livein register pressure.
689 BackTrace.push_back(RegPressure);
690}
691
692void MachineLICMBase::ExitScope(MachineBasicBlock *MBB) {
693 LLVM_DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n');
694 BackTrace.pop_back();
695}
696
697/// Destroy scope for the MBB that corresponds to the given dominator tree node
698/// if its a leaf or all of its children are done. Walk up the dominator tree to
699/// destroy ancestors which are now done.
700void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node,
703 if (OpenChildren[Node])
704 return;
705
706 for(;;) {
707 ExitScope(Node->getBlock());
708 // Now traverse upwards to pop ancestors whose offsprings are all done.
709 MachineDomTreeNode *Parent = ParentMap.lookup(Node);
710 if (!Parent || --OpenChildren[Parent] != 0)
711 break;
712 Node = Parent;
713 }
714}
715
716/// Walk the specified loop in the CFG (defined by all blocks dominated by the
717/// specified header block, and that are in the current loop) in depth first
718/// order w.r.t the DominatorTree. This allows us to visit definitions before
719/// uses, allowing us to hoist a loop body in one pass without iteration.
720void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
721 MachineBasicBlock *Preheader = getCurPreheader();
722 if (!Preheader)
723 return;
724
729
730 // Perform a DFS walk to determine the order of visit.
731 WorkList.push_back(HeaderN);
732 while (!WorkList.empty()) {
734 assert(Node && "Null dominator tree node?");
735 MachineBasicBlock *BB = Node->getBlock();
736
737 // If the header of the loop containing this basic block is a landing pad,
738 // then don't try to hoist instructions out of this loop.
739 const MachineLoop *ML = MLI->getLoopFor(BB);
740 if (ML && ML->getHeader()->isEHPad())
741 continue;
742
743 // If this subregion is not in the top level loop at all, exit.
744 if (!CurLoop->contains(BB))
745 continue;
746
747 Scopes.push_back(Node);
748 unsigned NumChildren = Node->getNumChildren();
749
750 // Don't hoist things out of a large switch statement. This often causes
751 // code to be hoisted that wasn't going to be executed, and increases
752 // register pressure in a situation where it's likely to matter.
753 if (BB->succ_size() >= 25)
754 NumChildren = 0;
755
756 OpenChildren[Node] = NumChildren;
757 if (NumChildren) {
758 // Add children in reverse order as then the next popped worklist node is
759 // the first child of this node. This means we ultimately traverse the
760 // DOM tree in exactly the same order as if we'd recursed.
761 for (MachineDomTreeNode *Child : reverse(Node->children())) {
762 ParentMap[Child] = Node;
763 WorkList.push_back(Child);
764 }
765 }
766 }
767
768 if (Scopes.size() == 0)
769 return;
770
771 // Compute registers which are livein into the loop headers.
772 RegSeen.clear();
773 BackTrace.clear();
774 InitRegPressure(Preheader);
775
776 // Now perform LICM.
777 for (MachineDomTreeNode *Node : Scopes) {
778 MachineBasicBlock *MBB = Node->getBlock();
779
780 EnterScope(MBB);
781
782 // Process the block
783 SpeculationState = SpeculateUnknown;
785 if (!Hoist(&MI, Preheader))
786 UpdateRegPressure(&MI);
787 // If we have hoisted an instruction that may store, it can only be a
788 // constant store.
789 }
790
791 // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
792 ExitScopeIfDone(Node, OpenChildren, ParentMap);
793 }
794}
795
797 return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
798}
799
800/// Find all virtual register references that are liveout of the preheader to
801/// initialize the starting "register pressure". Note this does not count live
802/// through (livein but not used) registers.
803void MachineLICMBase::InitRegPressure(MachineBasicBlock *BB) {
804 std::fill(RegPressure.begin(), RegPressure.end(), 0);
805
806 // If the preheader has only a single predecessor and it ends with a
807 // fallthrough or an unconditional branch, then scan its predecessor for live
808 // defs as well. This happens whenever the preheader is created by splitting
809 // the critical edge from the loop predecessor to the loop header.
810 if (BB->pred_size() == 1) {
811 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
813 if (!TII->analyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
814 InitRegPressure(*BB->pred_begin());
815 }
816
817 for (const MachineInstr &MI : *BB)
818 UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
819}
820
821/// Update estimate of register pressure after the specified instruction.
822void MachineLICMBase::UpdateRegPressure(const MachineInstr *MI,
823 bool ConsiderUnseenAsDef) {
824 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
825 for (const auto &RPIdAndCost : Cost) {
826 unsigned Class = RPIdAndCost.first;
827 if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
828 RegPressure[Class] = 0;
829 else
830 RegPressure[Class] += RPIdAndCost.second;
831 }
832}
833
834/// Calculate the additional register pressure that the registers used in MI
835/// cause.
836///
837/// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
838/// figure out which usages are live-ins.
839/// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
841MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
842 bool ConsiderUnseenAsDef) {
844 if (MI->isImplicitDef())
845 return Cost;
846 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
847 const MachineOperand &MO = MI->getOperand(i);
848 if (!MO.isReg() || MO.isImplicit())
849 continue;
850 Register Reg = MO.getReg();
851 if (!Reg.isVirtual())
852 continue;
853
854 // FIXME: It seems bad to use RegSeen only for some of these calculations.
855 bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
856 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
857
858 RegClassWeight W = TRI->getRegClassWeight(RC);
859 int RCCost = 0;
860 if (MO.isDef())
861 RCCost = W.RegWeight;
862 else {
863 bool isKill = isOperandKill(MO, MRI);
864 if (isNew && !isKill && ConsiderUnseenAsDef)
865 // Haven't seen this, it must be a livein.
866 RCCost = W.RegWeight;
867 else if (!isNew && isKill)
868 RCCost = -W.RegWeight;
869 }
870 if (RCCost == 0)
871 continue;
872 const int *PS = TRI->getRegClassPressureSets(RC);
873 for (; *PS != -1; ++PS) {
874 if (!Cost.contains(*PS))
875 Cost[*PS] = RCCost;
876 else
877 Cost[*PS] += RCCost;
878 }
879 }
880 return Cost;
881}
882
883/// Return true if this machine instruction loads from global offset table or
884/// constant pool.
886 assert(MI.mayLoad() && "Expected MI that loads!");
887
888 // If we lost memory operands, conservatively assume that the instruction
889 // reads from everything..
890 if (MI.memoperands_empty())
891 return true;
892
893 for (MachineMemOperand *MemOp : MI.memoperands())
894 if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
895 if (PSV->isGOT() || PSV->isConstantPool())
896 return true;
897
898 return false;
899}
900
901// This function iterates through all the operands of the input store MI and
902// checks that each register operand statisfies isCallerPreservedPhysReg.
903// This means, the value being stored and the address where it is being stored
904// is constant throughout the body of the function (not including prologue and
905// epilogue). When called with an MI that isn't a store, it returns false.
906// A future improvement can be to check if the store registers are constant
907// throughout the loop rather than throughout the funtion.
909 const TargetRegisterInfo *TRI,
910 const MachineRegisterInfo *MRI) {
911
912 bool FoundCallerPresReg = false;
913 if (!MI.mayStore() || MI.hasUnmodeledSideEffects() ||
914 (MI.getNumOperands() == 0))
915 return false;
916
917 // Check that all register operands are caller-preserved physical registers.
918 for (const MachineOperand &MO : MI.operands()) {
919 if (MO.isReg()) {
920 Register Reg = MO.getReg();
921 // If operand is a virtual register, check if it comes from a copy of a
922 // physical register.
923 if (Reg.isVirtual())
924 Reg = TRI->lookThruCopyLike(MO.getReg(), MRI);
925 if (Reg.isVirtual())
926 return false;
927 if (!TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF()))
928 return false;
929 else
930 FoundCallerPresReg = true;
931 } else if (!MO.isImm()) {
932 return false;
933 }
934 }
935 return FoundCallerPresReg;
936}
937
938// Return true if the input MI is a copy instruction that feeds an invariant
939// store instruction. This means that the src of the copy has to satisfy
940// isCallerPreservedPhysReg and atleast one of it's users should satisfy
941// isInvariantStore.
944 const TargetRegisterInfo *TRI) {
945
946 // FIXME: If targets would like to look through instructions that aren't
947 // pure copies, this can be updated to a query.
948 if (!MI.isCopy())
949 return false;
950
951 const MachineFunction *MF = MI.getMF();
952 // Check that we are copying a constant physical register.
953 Register CopySrcReg = MI.getOperand(1).getReg();
954 if (CopySrcReg.isVirtual())
955 return false;
956
957 if (!TRI->isCallerPreservedPhysReg(CopySrcReg.asMCReg(), *MF))
958 return false;
959
960 Register CopyDstReg = MI.getOperand(0).getReg();
961 // Check if any of the uses of the copy are invariant stores.
962 assert(CopyDstReg.isVirtual() && "copy dst is not a virtual reg");
963
964 for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) {
965 if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI))
966 return true;
967 }
968 return false;
969}
970
971/// Returns true if the instruction may be a suitable candidate for LICM.
972/// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
973bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
974 // Check if it's safe to move the instruction.
975 bool DontMoveAcrossStore = true;
976 if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) &&
978 LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n");
979 return false;
980 }
981
982 // If it is a load then check if it is guaranteed to execute by making sure
983 // that it dominates all exiting blocks. If it doesn't, then there is a path
984 // out of the loop which does not execute this load, so we can't hoist it.
985 // Loads from constant memory are safe to speculate, for example indexed load
986 // from a jump table.
987 // Stores and side effects are already checked by isSafeToMove.
988 if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
989 !IsGuaranteedToExecute(I.getParent())) {
990 LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n");
991 return false;
992 }
993
994 // Convergent attribute has been used on operations that involve inter-thread
995 // communication which results are implicitly affected by the enclosing
996 // control flows. It is not safe to hoist or sink such operations across
997 // control flow.
998 if (I.isConvergent())
999 return false;
1000
1001 if (!TII->shouldHoist(I, CurLoop))
1002 return false;
1003
1004 return true;
1005}
1006
1007/// Returns true if the instruction is loop invariant.
1008bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) {
1009 if (!IsLICMCandidate(I)) {
1010 LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n");
1011 return false;
1012 }
1013 return CurLoop->isLoopInvariant(I);
1014}
1015
1016/// Return true if the specified instruction is used by a phi node and hoisting
1017/// it could cause a copy to be inserted.
1018bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
1020 do {
1021 MI = Work.pop_back_val();
1022 for (const MachineOperand &MO : MI->all_defs()) {
1023 Register Reg = MO.getReg();
1024 if (!Reg.isVirtual())
1025 continue;
1026 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
1027 // A PHI may cause a copy to be inserted.
1028 if (UseMI.isPHI()) {
1029 // A PHI inside the loop causes a copy because the live range of Reg is
1030 // extended across the PHI.
1031 if (CurLoop->contains(&UseMI))
1032 return true;
1033 // A PHI in an exit block can cause a copy to be inserted if the PHI
1034 // has multiple predecessors in the loop with different values.
1035 // For now, approximate by rejecting all exit blocks.
1036 if (isExitBlock(UseMI.getParent()))
1037 return true;
1038 continue;
1039 }
1040 // Look past copies as well.
1041 if (UseMI.isCopy() && CurLoop->contains(&UseMI))
1042 Work.push_back(&UseMI);
1043 }
1044 }
1045 } while (!Work.empty());
1046 return false;
1047}
1048
1049/// Compute operand latency between a def of 'Reg' and an use in the current
1050/// loop, return true if the target considered it high.
1051bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
1052 Register Reg) const {
1053 if (MRI->use_nodbg_empty(Reg))
1054 return false;
1055
1056 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
1057 if (UseMI.isCopyLike())
1058 continue;
1059 if (!CurLoop->contains(UseMI.getParent()))
1060 continue;
1061 for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {
1062 const MachineOperand &MO = UseMI.getOperand(i);
1063 if (!MO.isReg() || !MO.isUse())
1064 continue;
1065 Register MOReg = MO.getReg();
1066 if (MOReg != Reg)
1067 continue;
1068
1069 if (TII->hasHighOperandLatency(SchedModel, MRI, MI, DefIdx, UseMI, i))
1070 return true;
1071 }
1072
1073 // Only look at the first in loop use.
1074 break;
1075 }
1076
1077 return false;
1078}
1079
1080/// Return true if the instruction is marked "cheap" or the operand latency
1081/// between its def and a use is one or less.
1082bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const {
1083 if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike())
1084 return true;
1085
1086 bool isCheap = false;
1087 unsigned NumDefs = MI.getDesc().getNumDefs();
1088 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
1089 MachineOperand &DefMO = MI.getOperand(i);
1090 if (!DefMO.isReg() || !DefMO.isDef())
1091 continue;
1092 --NumDefs;
1093 Register Reg = DefMO.getReg();
1094 if (Reg.isPhysical())
1095 continue;
1096
1097 if (!TII->hasLowDefLatency(SchedModel, MI, i))
1098 return false;
1099 isCheap = true;
1100 }
1101
1102 return isCheap;
1103}
1104
1105/// Visit BBs from header to current BB, check if hoisting an instruction of the
1106/// given cost matrix can cause high register pressure.
1107bool
1108MachineLICMBase::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
1109 bool CheapInstr) {
1110 for (const auto &RPIdAndCost : Cost) {
1111 if (RPIdAndCost.second <= 0)
1112 continue;
1113
1114 unsigned Class = RPIdAndCost.first;
1115 int Limit = RegLimit[Class];
1116
1117 // Don't hoist cheap instructions if they would increase register pressure,
1118 // even if we're under the limit.
1119 if (CheapInstr && !HoistCheapInsts)
1120 return true;
1121
1122 for (const auto &RP : BackTrace)
1123 if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
1124 return true;
1125 }
1126
1127 return false;
1128}
1129
1130/// Traverse the back trace from header to the current block and update their
1131/// register pressures to reflect the effect of hoisting MI from the current
1132/// block to the preheader.
1133void MachineLICMBase::UpdateBackTraceRegPressure(const MachineInstr *MI) {
1134 // First compute the 'cost' of the instruction, i.e. its contribution
1135 // to register pressure.
1136 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
1137 /*ConsiderUnseenAsDef=*/false);
1138
1139 // Update register pressure of blocks from loop header to current block.
1140 for (auto &RP : BackTrace)
1141 for (const auto &RPIdAndCost : Cost)
1142 RP[RPIdAndCost.first] += RPIdAndCost.second;
1143}
1144
1145/// Return true if it is potentially profitable to hoist the given loop
1146/// invariant.
1147bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
1148 if (MI.isImplicitDef())
1149 return true;
1150
1151 // Besides removing computation from the loop, hoisting an instruction has
1152 // these effects:
1153 //
1154 // - The value defined by the instruction becomes live across the entire
1155 // loop. This increases register pressure in the loop.
1156 //
1157 // - If the value is used by a PHI in the loop, a copy will be required for
1158 // lowering the PHI after extending the live range.
1159 //
1160 // - When hoisting the last use of a value in the loop, that value no longer
1161 // needs to be live in the loop. This lowers register pressure in the loop.
1162
1164 return true;
1165
1166 bool CheapInstr = IsCheapInstruction(MI);
1167 bool CreatesCopy = HasLoopPHIUse(&MI);
1168
1169 // Don't hoist a cheap instruction if it would create a copy in the loop.
1170 if (CheapInstr && CreatesCopy) {
1171 LLVM_DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
1172 return false;
1173 }
1174
1175 // Rematerializable instructions should always be hoisted providing the
1176 // register allocator can just pull them down again when needed.
1177 if (isTriviallyReMaterializable(MI))
1178 return true;
1179
1180 // FIXME: If there are long latency loop-invariant instructions inside the
1181 // loop at this point, why didn't the optimizer's LICM hoist them?
1182 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1183 const MachineOperand &MO = MI.getOperand(i);
1184 if (!MO.isReg() || MO.isImplicit())
1185 continue;
1186 Register Reg = MO.getReg();
1187 if (!Reg.isVirtual())
1188 continue;
1189 if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
1190 LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI);
1191 ++NumHighLatency;
1192 return true;
1193 }
1194 }
1195
1196 // Estimate register pressure to determine whether to LICM the instruction.
1197 // In low register pressure situation, we can be more aggressive about
1198 // hoisting. Also, favors hoisting long latency instructions even in
1199 // moderately high pressure situation.
1200 // Cheap instructions will only be hoisted if they don't increase register
1201 // pressure at all.
1202 auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false,
1203 /*ConsiderUnseenAsDef=*/false);
1204
1205 // Visit BBs from header to current BB, if hoisting this doesn't cause
1206 // high register pressure, then it's safe to proceed.
1207 if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
1208 LLVM_DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
1209 ++NumLowRP;
1210 return true;
1211 }
1212
1213 // Don't risk increasing register pressure if it would create copies.
1214 if (CreatesCopy) {
1215 LLVM_DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
1216 return false;
1217 }
1218
1219 // Do not "speculate" in high register pressure situation. If an
1220 // instruction is not guaranteed to be executed in the loop, it's best to be
1221 // conservative.
1222 if (AvoidSpeculation &&
1223 (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
1224 LLVM_DEBUG(dbgs() << "Won't speculate: " << MI);
1225 return false;
1226 }
1227
1228 // High register pressure situation, only hoist if the instruction is going
1229 // to be remat'ed.
1230 if (!isTriviallyReMaterializable(MI) &&
1231 !MI.isDereferenceableInvariantLoad()) {
1232 LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
1233 return false;
1234 }
1235
1236 return true;
1237}
1238
1239/// Unfold a load from the given machineinstr if the load itself could be
1240/// hoisted. Return the unfolded and hoistable load, or null if the load
1241/// couldn't be unfolded or if it wouldn't be hoistable.
1242MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) {
1243 // Don't unfold simple loads.
1244 if (MI->canFoldAsLoad())
1245 return nullptr;
1246
1247 // If not, we may be able to unfold a load and hoist that.
1248 // First test whether the instruction is loading from an amenable
1249 // memory location.
1250 if (!MI->isDereferenceableInvariantLoad())
1251 return nullptr;
1252
1253 // Next determine the register class for a temporary register.
1254 unsigned LoadRegIndex;
1255 unsigned NewOpc =
1256 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
1257 /*UnfoldLoad=*/true,
1258 /*UnfoldStore=*/false,
1259 &LoadRegIndex);
1260 if (NewOpc == 0) return nullptr;
1261 const MCInstrDesc &MID = TII->get(NewOpc);
1262 MachineFunction &MF = *MI->getMF();
1263 const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
1264 // Ok, we're unfolding. Create a temporary register and do the unfold.
1265 Register Reg = MRI->createVirtualRegister(RC);
1266
1268 bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg,
1269 /*UnfoldLoad=*/true,
1270 /*UnfoldStore=*/false, NewMIs);
1271 (void)Success;
1272 assert(Success &&
1273 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1274 "succeeded!");
1275 assert(NewMIs.size() == 2 &&
1276 "Unfolded a load into multiple instructions!");
1277 MachineBasicBlock *MBB = MI->getParent();
1279 MBB->insert(Pos, NewMIs[0]);
1280 MBB->insert(Pos, NewMIs[1]);
1281 // If unfolding produced a load that wasn't loop-invariant or profitable to
1282 // hoist, discard the new instructions and bail.
1283 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
1284 NewMIs[0]->eraseFromParent();
1285 NewMIs[1]->eraseFromParent();
1286 return nullptr;
1287 }
1288
1289 // Update register pressure for the unfolded instruction.
1290 UpdateRegPressure(NewMIs[1]);
1291
1292 // Otherwise we successfully unfolded a load that we can hoist.
1293
1294 // Update the call site info.
1295 if (MI->shouldUpdateCallSiteInfo())
1297
1298 MI->eraseFromParent();
1299 return NewMIs[0];
1300}
1301
1302/// Initialize the CSE map with instructions that are in the current loop
1303/// preheader that may become duplicates of instructions that are hoisted
1304/// out of the loop.
1305void MachineLICMBase::InitCSEMap(MachineBasicBlock *BB) {
1306 for (MachineInstr &MI : *BB)
1307 CSEMap[MI.getOpcode()].push_back(&MI);
1308}
1309
1310/// Find an instruction amount PrevMIs that is a duplicate of MI.
1311/// Return this instruction if it's found.
1313MachineLICMBase::LookForDuplicate(const MachineInstr *MI,
1314 std::vector<MachineInstr *> &PrevMIs) {
1315 for (MachineInstr *PrevMI : PrevMIs)
1316 if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr)))
1317 return PrevMI;
1318
1319 return nullptr;
1320}
1321
1322/// Given a LICM'ed instruction, look for an instruction on the preheader that
1323/// computes the same value. If it's found, do a RAU on with the definition of
1324/// the existing instruction rather than hoisting the instruction to the
1325/// preheader.
1326bool MachineLICMBase::EliminateCSE(
1328 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) {
1329 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1330 // the undef property onto uses.
1331 if (CI == CSEMap.end() || MI->isImplicitDef())
1332 return false;
1333
1334 if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
1335 LLVM_DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
1336
1337 // Replace virtual registers defined by MI by their counterparts defined
1338 // by Dup.
1340 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1341 const MachineOperand &MO = MI->getOperand(i);
1342
1343 // Physical registers may not differ here.
1344 assert((!MO.isReg() || MO.getReg() == 0 || !MO.getReg().isPhysical() ||
1345 MO.getReg() == Dup->getOperand(i).getReg()) &&
1346 "Instructions with different phys regs are not identical!");
1347
1348 if (MO.isReg() && MO.isDef() && !MO.getReg().isPhysical())
1349 Defs.push_back(i);
1350 }
1351
1353 for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
1354 unsigned Idx = Defs[i];
1355 Register Reg = MI->getOperand(Idx).getReg();
1356 Register DupReg = Dup->getOperand(Idx).getReg();
1357 OrigRCs.push_back(MRI->getRegClass(DupReg));
1358
1359 if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
1360 // Restore old RCs if more than one defs.
1361 for (unsigned j = 0; j != i; ++j)
1362 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
1363 return false;
1364 }
1365 }
1366
1367 for (unsigned Idx : Defs) {
1368 Register Reg = MI->getOperand(Idx).getReg();
1369 Register DupReg = Dup->getOperand(Idx).getReg();
1370 MRI->replaceRegWith(Reg, DupReg);
1371 MRI->clearKillFlags(DupReg);
1372 // Clear Dup dead flag if any, we reuse it for Reg.
1373 if (!MRI->use_nodbg_empty(DupReg))
1374 Dup->getOperand(Idx).setIsDead(false);
1375 }
1376
1377 MI->eraseFromParent();
1378 ++NumCSEed;
1379 return true;
1380 }
1381 return false;
1382}
1383
1384/// Return true if the given instruction will be CSE'd if it's hoisted out of
1385/// the loop.
1386bool MachineLICMBase::MayCSE(MachineInstr *MI) {
1387 unsigned Opcode = MI->getOpcode();
1389 CSEMap.find(Opcode);
1390 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1391 // the undef property onto uses.
1392 if (CI == CSEMap.end() || MI->isImplicitDef())
1393 return false;
1394
1395 return LookForDuplicate(MI, CI->second) != nullptr;
1396}
1397
1398/// When an instruction is found to use only loop invariant operands
1399/// that are safe to hoist, this instruction is called to do the dirty work.
1400/// It returns true if the instruction is hoisted.
1401bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
1402 MachineBasicBlock *SrcBlock = MI->getParent();
1403
1404 // Disable the instruction hoisting due to block hotness
1406 (DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) &&
1407 isTgtHotterThanSrc(SrcBlock, Preheader)) {
1408 ++NumNotHoistedDueToHotness;
1409 return false;
1410 }
1411 // First check whether we should hoist this instruction.
1412 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
1413 // If not, try unfolding a hoistable load.
1414 MI = ExtractHoistableLoad(MI);
1415 if (!MI) return false;
1416 }
1417
1418 // If we have hoisted an instruction that may store, it can only be a constant
1419 // store.
1420 if (MI->mayStore())
1421 NumStoreConst++;
1422
1423 // Now move the instructions to the predecessor, inserting it before any
1424 // terminator instructions.
1425 LLVM_DEBUG({
1426 dbgs() << "Hoisting " << *MI;
1427 if (MI->getParent()->getBasicBlock())
1428 dbgs() << " from " << printMBBReference(*MI->getParent());
1429 if (Preheader->getBasicBlock())
1430 dbgs() << " to " << printMBBReference(*Preheader);
1431 dbgs() << "\n";
1432 });
1433
1434 // If this is the first instruction being hoisted to the preheader,
1435 // initialize the CSE map with potential common expressions.
1436 if (FirstInLoop) {
1437 InitCSEMap(Preheader);
1438 FirstInLoop = false;
1439 }
1440
1441 // Look for opportunity to CSE the hoisted instruction.
1442 unsigned Opcode = MI->getOpcode();
1444 CSEMap.find(Opcode);
1445 if (!EliminateCSE(MI, CI)) {
1446 // Otherwise, splice the instruction to the preheader.
1447 Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
1448
1449 // Since we are moving the instruction out of its basic block, we do not
1450 // retain its debug location. Doing so would degrade the debugging
1451 // experience and adversely affect the accuracy of profiling information.
1452 assert(!MI->isDebugInstr() && "Should not hoist debug inst");
1453 MI->setDebugLoc(DebugLoc());
1454
1455 // Update register pressure for BBs from header to this block.
1456 UpdateBackTraceRegPressure(MI);
1457
1458 // Clear the kill flags of any register this instruction defines,
1459 // since they may need to be live throughout the entire loop
1460 // rather than just live for part of it.
1461 for (MachineOperand &MO : MI->all_defs())
1462 if (!MO.isDead())
1463 MRI->clearKillFlags(MO.getReg());
1464
1465 // Add to the CSE map.
1466 if (CI != CSEMap.end())
1467 CI->second.push_back(MI);
1468 else
1469 CSEMap[Opcode].push_back(MI);
1470 }
1471
1472 ++NumHoisted;
1473 Changed = true;
1474
1475 return true;
1476}
1477
1478/// Get the preheader for the current loop, splitting a critical edge if needed.
1479MachineBasicBlock *MachineLICMBase::getCurPreheader() {
1480 // Determine the block to which to hoist instructions. If we can't find a
1481 // suitable loop predecessor, we can't do any hoisting.
1482
1483 // If we've tried to get a preheader and failed, don't try again.
1484 if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
1485 return nullptr;
1486
1487 if (!CurPreheader) {
1488 CurPreheader = CurLoop->getLoopPreheader();
1489 if (!CurPreheader) {
1490 MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
1491 if (!Pred) {
1492 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1493 return nullptr;
1494 }
1495
1496 CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), *this);
1497 if (!CurPreheader) {
1498 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1499 return nullptr;
1500 }
1501 }
1502 }
1503 return CurPreheader;
1504}
1505
1506/// Is the target basic block at least "BlockFrequencyRatioThreshold"
1507/// times hotter than the source basic block.
1508bool MachineLICMBase::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
1509 MachineBasicBlock *TgtBlock) {
1510 // Parse source and target basic block frequency from MBFI
1511 uint64_t SrcBF = MBFI->getBlockFreq(SrcBlock).getFrequency();
1512 uint64_t DstBF = MBFI->getBlockFreq(TgtBlock).getFrequency();
1513
1514 // Disable the hoisting if source block frequency is zero
1515 if (!SrcBF)
1516 return true;
1517
1518 double Ratio = (double)DstBF / SrcBF;
1519
1520 // Compare the block frequency ratio with the threshold
1521 return Ratio > BlockFrequencyRatioThreshold;
1522}
unsigned const MachineRegisterInfo * MRI
#define Success
MachineInstrBuilder & UseMI
MachineBasicBlock & MBB
basic Basic Alias true
This file implements the BitVector class.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:680
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static bool isExitBlock(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &ExitBlocks)
Return true if the specified block is in the list.
Definition: LCSSA.cpp:69
#define I(x, y, z)
Definition: MD5.cpp:58
Machine Loop Invariant Code false early machinelicm
static cl::opt< bool > HoistConstStores("hoist-const-stores", cl::desc("Hoist invariant stores"), cl::init(true), cl::Hidden)
static cl::opt< UseBFI > DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks", cl::desc("Disable hoisting instructions to" " hotter blocks"), cl::init(UseBFI::PGO), cl::Hidden, cl::values(clEnumValN(UseBFI::None, "none", "disable the feature"), clEnumValN(UseBFI::PGO, "pgo", "enable the feature when using profile data"), clEnumValN(UseBFI::All, "all", "enable the feature with/wo profile data")))
static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI)
Return true if this machine instruction loads from global offset table or constant pool.
static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI)
Machine Loop Invariant Code false early Early Machine Loop Invariant Code static false bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop)
Test if the given loop is the outer-most loop that has a unique predecessor.
UseBFI
Definition: MachineLICM.cpp:83
Machine Loop Invariant Code Motion
static cl::opt< bool > AvoidSpeculation("avoid-speculation", cl::desc("MachineLICM should avoid speculation"), cl::init(true), cl::Hidden)
static bool InstructionStoresToFI(const MachineInstr *MI, int FI)
Return true if instruction stores to the specified frame.
static bool isCopyFeedingInvariantStore(const MachineInstr &MI, const MachineRegisterInfo *MRI, const TargetRegisterInfo *TRI)
#define DEBUG_TYPE
Definition: MachineLICM.cpp:59
static cl::opt< bool > HoistCheapInsts("hoist-cheap-insts", cl::desc("MachineLICM should hoist even cheap instructions"), cl::init(false), cl::Hidden)
static bool isInvariantStore(const MachineInstr &MI, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI)
static cl::opt< unsigned > BlockFrequencyRatioThreshold("block-freq-ratio-threshold", cl::desc("Do not hoist instructions if target" "block is N times hotter than the source."), cl::init(100), cl::Hidden)
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
TLS Variable Hoist
When an instruction is found to use only loop invariant operands that are safe to hoist,...
This file describes how to lower LLVM code to machine code.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
void setBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsNotInMask - Add a bit to this vector for every '0' bit in Mask.
Definition: BitVector.h:719
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
iterator end()
Definition: DenseMap.h:84
Base class for the actual dominator tree node.
A specialized PseudoSourceValue for holding FixedStack values, which must include a frame index.
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:291
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool isAsCheapAsAMove(const MachineInstr &MI) const override
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
BlockT * getLoopPredecessor() const
If the given loop's header has exactly one unique predecessor outside the loop, return it.
iterator end() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
iterator begin() const
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
unsigned pred_size() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
MachineBasicBlock * SplitCriticalEdge(MachineBasicBlock *Succ, Pass &P, std::vector< SparseBitVector<> > *LiveInSets=nullptr)
Split the critical edge from this block to the given successor block, and return the newly created bl...
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
void eraseCallSiteInfo(const MachineInstr *MI)
Following functions update call site info.
Representation of each machine instruction.
Definition: MachineInstr.h:68
iterator begin() const
iterator end() const
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
bool isLoopInvariant(MachineInstr &I) const
Returns true if the instruction is loop invariant.
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void releaseMemory()
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
Definition: Pass.cpp:102
Special value supplied for machine level alias analysis.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void resize(size_type N)
Definition: SmallVector.h:642
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetInstrInfo - Interface to description of machine instruction set.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
TargetSubtargetInfo - Generic base class for all target subtargets.
LLVM Value Representation.
Definition: Value.h:74
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:705
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
constexpr double e
Definition: MathExtras.h:31
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:666
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:429
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void initializeMachineLICMPass(PassRegistry &)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1884
char & MachineLICMID
This pass performs loop invariant code motion on machine instructions.
void initializeEarlyMachineLICMPass(PassRegistry &)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
#define N
Each TargetRegisterClass has a per register weight, and weight limit which must be less than the limi...