LLVM 17.0.0git
MachineLICM.cpp
Go to the documentation of this file.
1//===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs loop invariant code motion on machine instructions. We
10// attempt to remove as much code from the body of a loop as possible.
11//
12// This pass is not intended to be a replacement or a complete alternative
13// for the LLVM-IR-level LICM pass. It is only designed to hoist simple
14// constructs that are not exposed before lowering and instruction selection.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/ADT/BitVector.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
42#include "llvm/IR/DebugLoc.h"
44#include "llvm/MC/MCInstrDesc.h"
45#include "llvm/MC/MCRegister.h"
47#include "llvm/Pass.h"
50#include "llvm/Support/Debug.h"
52#include <algorithm>
53#include <cassert>
54#include <limits>
55#include <vector>
56
57using namespace llvm;
58
59#define DEBUG_TYPE "machinelicm"
60
61static cl::opt<bool>
62AvoidSpeculation("avoid-speculation",
63 cl::desc("MachineLICM should avoid speculation"),
64 cl::init(true), cl::Hidden);
65
66static cl::opt<bool>
67HoistCheapInsts("hoist-cheap-insts",
68 cl::desc("MachineLICM should hoist even cheap instructions"),
69 cl::init(false), cl::Hidden);
70
71static cl::opt<bool>
72HoistConstStores("hoist-const-stores",
73 cl::desc("Hoist invariant stores"),
74 cl::init(true), cl::Hidden);
75// The default threshold of 100 (i.e. if target block is 100 times hotter)
76// is based on empirical data on a single target and is subject to tuning.
78BlockFrequencyRatioThreshold("block-freq-ratio-threshold",
79 cl::desc("Do not hoist instructions if target"
80 "block is N times hotter than the source."),
81 cl::init(100), cl::Hidden);
82
83enum class UseBFI { None, PGO, All };
84
85static cl::opt<UseBFI>
86DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks",
87 cl::desc("Disable hoisting instructions to"
88 " hotter blocks"),
91 "disable the feature"),
93 "enable the feature when using profile data"),
95 "enable the feature with/wo profile data")));
96
97STATISTIC(NumHoisted,
98 "Number of machine instructions hoisted out of loops");
99STATISTIC(NumLowRP,
100 "Number of instructions hoisted in low reg pressure situation");
101STATISTIC(NumHighLatency,
102 "Number of high latency instructions hoisted");
103STATISTIC(NumCSEed,
104 "Number of hoisted machine instructions CSEed");
105STATISTIC(NumPostRAHoisted,
106 "Number of machine instructions hoisted out of loops post regalloc");
107STATISTIC(NumStoreConst,
108 "Number of stores of const phys reg hoisted out of loops");
109STATISTIC(NumNotHoistedDueToHotness,
110 "Number of instructions not hoisted due to block frequency");
111
112namespace {
113
114 class MachineLICMBase : public MachineFunctionPass {
115 const TargetInstrInfo *TII;
116 const TargetLoweringBase *TLI;
117 const TargetRegisterInfo *TRI;
118 const MachineFrameInfo *MFI;
120 TargetSchedModel SchedModel;
121 bool PreRegAlloc;
122 bool HasProfileData;
123
124 // Various analyses that we use...
125 AliasAnalysis *AA; // Alias analysis info.
126 MachineBlockFrequencyInfo *MBFI; // Machine block frequncy info
127 MachineLoopInfo *MLI; // Current MachineLoopInfo
128 MachineDominatorTree *DT; // Machine dominator tree for the cur loop
129
130 // State that is updated as we process loops
131 bool Changed; // True if a loop is changed.
132 bool FirstInLoop; // True if it's the first LICM in the loop.
133 MachineLoop *CurLoop; // The current loop we are working on.
134 MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
135
136 // Exit blocks for CurLoop.
138
139 bool isExitBlock(const MachineBasicBlock *MBB) const {
140 return is_contained(ExitBlocks, MBB);
141 }
142
143 // Track 'estimated' register pressure.
146
147 // Register pressure "limit" per register pressure set. If the pressure
148 // is higher than the limit, then it's considered high.
150
151 // Register pressure on path leading from loop preheader to current BB.
153
154 // For each opcode, keep a list of potential CSE instructions.
156
157 enum {
158 SpeculateFalse = 0,
159 SpeculateTrue = 1,
160 SpeculateUnknown = 2
161 };
162
163 // If a MBB does not dominate loop exiting blocks then it may not safe
164 // to hoist loads from this block.
165 // Tri-state: 0 - false, 1 - true, 2 - unknown
166 unsigned SpeculationState;
167
168 public:
169 MachineLICMBase(char &PassID, bool PreRegAlloc)
170 : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {}
171
172 bool runOnMachineFunction(MachineFunction &MF) override;
173
174 void getAnalysisUsage(AnalysisUsage &AU) const override {
176 if (DisableHoistingToHotterBlocks != UseBFI::None)
182 }
183
184 void releaseMemory() override {
185 RegSeen.clear();
186 RegPressure.clear();
187 RegLimit.clear();
188 BackTrace.clear();
189 CSEMap.clear();
190 }
191
192 private:
193 /// Keep track of information about hoisting candidates.
194 struct CandidateInfo {
196 unsigned Def;
197 int FI;
198
199 CandidateInfo(MachineInstr *mi, unsigned def, int fi)
200 : MI(mi), Def(def), FI(fi) {}
201 };
202
203 void HoistRegionPostRA();
204
205 void HoistPostRA(MachineInstr *MI, unsigned Def);
206
207 void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
208 BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs,
210
211 void AddToLiveIns(MCRegister Reg);
212
213 bool IsLICMCandidate(MachineInstr &I);
214
215 bool IsLoopInvariantInst(MachineInstr &I);
216
217 bool HasLoopPHIUse(const MachineInstr *MI) const;
218
219 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
220 Register Reg) const;
221
222 bool IsCheapInstruction(MachineInstr &MI) const;
223
224 bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
225 bool Cheap);
226
227 void UpdateBackTraceRegPressure(const MachineInstr *MI);
228
229 bool IsProfitableToHoist(MachineInstr &MI);
230
231 bool IsGuaranteedToExecute(MachineBasicBlock *BB);
232
233 bool isTriviallyReMaterializable(const MachineInstr &MI) const;
234
235 void EnterScope(MachineBasicBlock *MBB);
236
237 void ExitScope(MachineBasicBlock *MBB);
238
239 void ExitScopeIfDone(
243
244 void HoistOutOfLoop(MachineDomTreeNode *HeaderN);
245
246 void InitRegPressure(MachineBasicBlock *BB);
247
248 DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
249 bool ConsiderSeen,
250 bool ConsiderUnseenAsDef);
251
252 void UpdateRegPressure(const MachineInstr *MI,
253 bool ConsiderUnseenAsDef = false);
254
255 MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
256
257 MachineInstr *LookForDuplicate(const MachineInstr *MI,
258 std::vector<MachineInstr *> &PrevMIs);
259
260 bool
261 EliminateCSE(MachineInstr *MI,
262 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI);
263
264 bool MayCSE(MachineInstr *MI);
265
266 bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
267
268 void InitCSEMap(MachineBasicBlock *BB);
269
270 bool isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
271 MachineBasicBlock *TgtBlock);
272 MachineBasicBlock *getCurPreheader();
273 };
274
275 class MachineLICM : public MachineLICMBase {
276 public:
277 static char ID;
278 MachineLICM() : MachineLICMBase(ID, false) {
280 }
281 };
282
283 class EarlyMachineLICM : public MachineLICMBase {
284 public:
285 static char ID;
286 EarlyMachineLICM() : MachineLICMBase(ID, true) {
288 }
289 };
290
291} // end anonymous namespace
292
293char MachineLICM::ID;
294char EarlyMachineLICM::ID;
295
296char &llvm::MachineLICMID = MachineLICM::ID;
297char &llvm::EarlyMachineLICMID = EarlyMachineLICM::ID;
298
300 "Machine Loop Invariant Code Motion", false, false)
306 "Machine Loop Invariant Code Motion", false, false)
307
308INITIALIZE_PASS_BEGIN(EarlyMachineLICM, "early-machinelicm",
309 "Early Machine Loop Invariant Code Motion", false, false)
314INITIALIZE_PASS_END(EarlyMachineLICM, "early-machinelicm",
315 "Early Machine Loop Invariant Code Motion", false, false)
316
317/// Test if the given loop is the outer-most loop that has a unique predecessor.
319 // Check whether this loop even has a unique predecessor.
320 if (!CurLoop->getLoopPredecessor())
321 return false;
322 // Ok, now check to see if any of its outer loops do.
323 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
324 if (L->getLoopPredecessor())
325 return false;
326 // None of them did, so this is the outermost with a unique predecessor.
327 return true;
328}
329
330bool MachineLICMBase::runOnMachineFunction(MachineFunction &MF) {
331 if (skipFunction(MF.getFunction()))
332 return false;
333
334 Changed = FirstInLoop = false;
335 const TargetSubtargetInfo &ST = MF.getSubtarget();
336 TII = ST.getInstrInfo();
337 TLI = ST.getTargetLowering();
338 TRI = ST.getRegisterInfo();
339 MFI = &MF.getFrameInfo();
340 MRI = &MF.getRegInfo();
341 SchedModel.init(&ST);
342
343 PreRegAlloc = MRI->isSSA();
344 HasProfileData = MF.getFunction().hasProfileData();
345
346 if (PreRegAlloc)
347 LLVM_DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
348 else
349 LLVM_DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
350 LLVM_DEBUG(dbgs() << MF.getName() << " ********\n");
351
352 if (PreRegAlloc) {
353 // Estimate register pressure during pre-regalloc pass.
354 unsigned NumRPS = TRI->getNumRegPressureSets();
355 RegPressure.resize(NumRPS);
356 std::fill(RegPressure.begin(), RegPressure.end(), 0);
357 RegLimit.resize(NumRPS);
358 for (unsigned i = 0, e = NumRPS; i != e; ++i)
359 RegLimit[i] = TRI->getRegPressureSetLimit(MF, i);
360 }
361
362 // Get our Loop information...
364 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
365 MLI = &getAnalysis<MachineLoopInfo>();
366 DT = &getAnalysis<MachineDominatorTree>();
367 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
368
369 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
370 while (!Worklist.empty()) {
371 CurLoop = Worklist.pop_back_val();
372 CurPreheader = nullptr;
373 ExitBlocks.clear();
374
375 // If this is done before regalloc, only visit outer-most preheader-sporting
376 // loops.
377 if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
378 Worklist.append(CurLoop->begin(), CurLoop->end());
379 continue;
380 }
381
382 CurLoop->getExitBlocks(ExitBlocks);
383
384 if (!PreRegAlloc)
385 HoistRegionPostRA();
386 else {
387 // CSEMap is initialized for loop header when the first instruction is
388 // being hoisted.
389 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
390 FirstInLoop = true;
391 HoistOutOfLoop(N);
392 CSEMap.clear();
393 }
394 }
395
396 return Changed;
397}
398
399/// Return true if instruction stores to the specified frame.
400static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
401 // Check mayStore before memory operands so that e.g. DBG_VALUEs will return
402 // true since they have no memory operands.
403 if (!MI->mayStore())
404 return false;
405 // If we lost memory operands, conservatively assume that the instruction
406 // writes to all slots.
407 if (MI->memoperands_empty())
408 return true;
409 for (const MachineMemOperand *MemOp : MI->memoperands()) {
410 if (!MemOp->isStore() || !MemOp->getPseudoValue())
411 continue;
413 dyn_cast<FixedStackPseudoSourceValue>(MemOp->getPseudoValue())) {
414 if (Value->getFrameIndex() == FI)
415 return true;
416 }
417 }
418 return false;
419}
420
421/// Examine the instruction for potentai LICM candidate. Also
422/// gather register def and frame object update information.
423void MachineLICMBase::ProcessMI(MachineInstr *MI,
424 BitVector &PhysRegDefs,
425 BitVector &PhysRegClobbers,
426 SmallSet<int, 32> &StoredFIs,
427 SmallVectorImpl<CandidateInfo> &Candidates) {
428 bool RuledOut = false;
429 bool HasNonInvariantUse = false;
430 unsigned Def = 0;
431 for (const MachineOperand &MO : MI->operands()) {
432 if (MO.isFI()) {
433 // Remember if the instruction stores to the frame index.
434 int FI = MO.getIndex();
435 if (!StoredFIs.count(FI) &&
436 MFI->isSpillSlotObjectIndex(FI) &&
438 StoredFIs.insert(FI);
439 HasNonInvariantUse = true;
440 continue;
441 }
442
443 // We can't hoist an instruction defining a physreg that is clobbered in
444 // the loop.
445 if (MO.isRegMask()) {
446 PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
447 continue;
448 }
449
450 if (!MO.isReg())
451 continue;
452 Register Reg = MO.getReg();
453 if (!Reg)
454 continue;
455 assert(Reg.isPhysical() && "Not expecting virtual register!");
456
457 if (!MO.isDef()) {
458 if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
459 // If it's using a non-loop-invariant register, then it's obviously not
460 // safe to hoist.
461 HasNonInvariantUse = true;
462 continue;
463 }
464
465 if (MO.isImplicit()) {
466 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
467 PhysRegClobbers.set(*AI);
468 if (!MO.isDead())
469 // Non-dead implicit def? This cannot be hoisted.
470 RuledOut = true;
471 // No need to check if a dead implicit def is also defined by
472 // another instruction.
473 continue;
474 }
475
476 // FIXME: For now, avoid instructions with multiple defs, unless
477 // it's a dead implicit def.
478 if (Def)
479 RuledOut = true;
480 else
481 Def = Reg;
482
483 // If we have already seen another instruction that defines the same
484 // register, then this is not safe. Two defs is indicated by setting a
485 // PhysRegClobbers bit.
486 for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
487 if (PhysRegDefs.test(*AS))
488 PhysRegClobbers.set(*AS);
489 }
490 // Need a second loop because MCRegAliasIterator can visit the same
491 // register twice.
492 for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS)
493 PhysRegDefs.set(*AS);
494
495 if (PhysRegClobbers.test(Reg))
496 // MI defined register is seen defined by another instruction in
497 // the loop, it cannot be a LICM candidate.
498 RuledOut = true;
499 }
500
501 // Only consider reloads for now and remats which do not have register
502 // operands. FIXME: Consider unfold load folding instructions.
503 if (Def && !RuledOut) {
504 int FI = std::numeric_limits<int>::min();
505 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
507 Candidates.push_back(CandidateInfo(MI, Def, FI));
508 }
509}
510
511/// Walk the specified region of the CFG and hoist loop invariants out to the
512/// preheader.
513void MachineLICMBase::HoistRegionPostRA() {
514 MachineBasicBlock *Preheader = getCurPreheader();
515 if (!Preheader)
516 return;
517
518 unsigned NumRegs = TRI->getNumRegs();
519 BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
520 BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
521
523 SmallSet<int, 32> StoredFIs;
524
525 // Walk the entire region, count number of defs for each register, and
526 // collect potential LICM candidates.
527 for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
528 // If the header of the loop containing this basic block is a landing pad,
529 // then don't try to hoist instructions out of this loop.
530 const MachineLoop *ML = MLI->getLoopFor(BB);
531 if (ML && ML->getHeader()->isEHPad()) continue;
532
533 // Conservatively treat live-in's as an external def.
534 // FIXME: That means a reload that're reused in successor block(s) will not
535 // be LICM'ed.
536 for (const auto &LI : BB->liveins()) {
537 for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI)
538 PhysRegDefs.set(*AI);
539 }
540
541 SpeculationState = SpeculateUnknown;
542 for (MachineInstr &MI : *BB)
543 ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
544 }
545
546 // Gather the registers read / clobbered by the terminator.
547 BitVector TermRegs(NumRegs);
549 if (TI != Preheader->end()) {
550 for (const MachineOperand &MO : TI->operands()) {
551 if (!MO.isReg())
552 continue;
553 Register Reg = MO.getReg();
554 if (!Reg)
555 continue;
556 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
557 TermRegs.set(*AI);
558 }
559 }
560
561 // Now evaluate whether the potential candidates qualify.
562 // 1. Check if the candidate defined register is defined by another
563 // instruction in the loop.
564 // 2. If the candidate is a load from stack slot (always true for now),
565 // check if the slot is stored anywhere in the loop.
566 // 3. Make sure candidate def should not clobber
567 // registers read by the terminator. Similarly its def should not be
568 // clobbered by the terminator.
569 for (CandidateInfo &Candidate : Candidates) {
570 if (Candidate.FI != std::numeric_limits<int>::min() &&
571 StoredFIs.count(Candidate.FI))
572 continue;
573
574 unsigned Def = Candidate.Def;
575 if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
576 bool Safe = true;
577 MachineInstr *MI = Candidate.MI;
578 for (const MachineOperand &MO : MI->operands()) {
579 if (!MO.isReg() || MO.isDef() || !MO.getReg())
580 continue;
581 Register Reg = MO.getReg();
582 if (PhysRegDefs.test(Reg) ||
583 PhysRegClobbers.test(Reg)) {
584 // If it's using a non-loop-invariant register, then it's obviously
585 // not safe to hoist.
586 Safe = false;
587 break;
588 }
589 }
590 if (Safe)
591 HoistPostRA(MI, Candidate.Def);
592 }
593 }
594}
595
596/// Add register 'Reg' to the livein sets of BBs in the current loop, and make
597/// sure it is not killed by any instructions in the loop.
598void MachineLICMBase::AddToLiveIns(MCRegister Reg) {
599 for (MachineBasicBlock *BB : CurLoop->getBlocks()) {
600 if (!BB->isLiveIn(Reg))
601 BB->addLiveIn(Reg);
602 for (MachineInstr &MI : *BB) {
603 for (MachineOperand &MO : MI.operands()) {
604 if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
605 if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
606 MO.setIsKill(false);
607 }
608 }
609 }
610}
611
612/// When an instruction is found to only use loop invariant operands that is
613/// safe to hoist, this instruction is called to do the dirty work.
614void MachineLICMBase::HoistPostRA(MachineInstr *MI, unsigned Def) {
615 MachineBasicBlock *Preheader = getCurPreheader();
616
617 // Now move the instructions to the predecessor, inserting it before any
618 // terminator instructions.
619 LLVM_DEBUG(dbgs() << "Hoisting to " << printMBBReference(*Preheader)
620 << " from " << printMBBReference(*MI->getParent()) << ": "
621 << *MI);
622
623 // Splice the instruction to the preheader.
624 MachineBasicBlock *MBB = MI->getParent();
625 Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
626
627 // Since we are moving the instruction out of its basic block, we do not
628 // retain its debug location. Doing so would degrade the debugging
629 // experience and adversely affect the accuracy of profiling information.
630 assert(!MI->isDebugInstr() && "Should not hoist debug inst");
631 MI->setDebugLoc(DebugLoc());
632
633 // Add register to livein list to all the BBs in the current loop since a
634 // loop invariant must be kept live throughout the whole loop. This is
635 // important to ensure later passes do not scavenge the def register.
636 AddToLiveIns(Def);
637
638 ++NumPostRAHoisted;
639 Changed = true;
640}
641
642/// Check if this mbb is guaranteed to execute. If not then a load from this mbb
643/// may not be safe to hoist.
644bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) {
645 if (SpeculationState != SpeculateUnknown)
646 return SpeculationState == SpeculateFalse;
647
648 if (BB != CurLoop->getHeader()) {
649 // Check loop exiting blocks.
650 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
651 CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
652 for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
653 if (!DT->dominates(BB, CurrentLoopExitingBlock)) {
654 SpeculationState = SpeculateTrue;
655 return false;
656 }
657 }
658
659 SpeculationState = SpeculateFalse;
660 return true;
661}
662
663/// Check if \p MI is trivially remateralizable and if it does not have any
664/// virtual register uses. Even though rematerializable RA might not actually
665/// rematerialize it in this scenario. In that case we do not want to hoist such
666/// instruction out of the loop in a belief RA will sink it back if needed.
667bool MachineLICMBase::isTriviallyReMaterializable(
668 const MachineInstr &MI) const {
669 if (!TII->isTriviallyReMaterializable(MI))
670 return false;
671
672 for (const MachineOperand &MO : MI.operands()) {
673 if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual())
674 return false;
675 }
676
677 return true;
678}
679
680void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) {
681 LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');
682
683 // Remember livein register pressure.
684 BackTrace.push_back(RegPressure);
685}
686
687void MachineLICMBase::ExitScope(MachineBasicBlock *MBB) {
688 LLVM_DEBUG(dbgs() << "Exiting " << printMBBReference(*MBB) << '\n');
689 BackTrace.pop_back();
690}
691
692/// Destroy scope for the MBB that corresponds to the given dominator tree node
693/// if its a leaf or all of its children are done. Walk up the dominator tree to
694/// destroy ancestors which are now done.
695void MachineLICMBase::ExitScopeIfDone(MachineDomTreeNode *Node,
698 if (OpenChildren[Node])
699 return;
700
701 for(;;) {
702 ExitScope(Node->getBlock());
703 // Now traverse upwards to pop ancestors whose offsprings are all done.
704 MachineDomTreeNode *Parent = ParentMap.lookup(Node);
705 if (!Parent || --OpenChildren[Parent] != 0)
706 break;
707 Node = Parent;
708 }
709}
710
711/// Walk the specified loop in the CFG (defined by all blocks dominated by the
712/// specified header block, and that are in the current loop) in depth first
713/// order w.r.t the DominatorTree. This allows us to visit definitions before
714/// uses, allowing us to hoist a loop body in one pass without iteration.
715void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
716 MachineBasicBlock *Preheader = getCurPreheader();
717 if (!Preheader)
718 return;
719
724
725 // Perform a DFS walk to determine the order of visit.
726 WorkList.push_back(HeaderN);
727 while (!WorkList.empty()) {
729 assert(Node && "Null dominator tree node?");
730 MachineBasicBlock *BB = Node->getBlock();
731
732 // If the header of the loop containing this basic block is a landing pad,
733 // then don't try to hoist instructions out of this loop.
734 const MachineLoop *ML = MLI->getLoopFor(BB);
735 if (ML && ML->getHeader()->isEHPad())
736 continue;
737
738 // If this subregion is not in the top level loop at all, exit.
739 if (!CurLoop->contains(BB))
740 continue;
741
742 Scopes.push_back(Node);
743 unsigned NumChildren = Node->getNumChildren();
744
745 // Don't hoist things out of a large switch statement. This often causes
746 // code to be hoisted that wasn't going to be executed, and increases
747 // register pressure in a situation where it's likely to matter.
748 if (BB->succ_size() >= 25)
749 NumChildren = 0;
750
751 OpenChildren[Node] = NumChildren;
752 if (NumChildren) {
753 // Add children in reverse order as then the next popped worklist node is
754 // the first child of this node. This means we ultimately traverse the
755 // DOM tree in exactly the same order as if we'd recursed.
756 for (MachineDomTreeNode *Child : reverse(Node->children())) {
757 ParentMap[Child] = Node;
758 WorkList.push_back(Child);
759 }
760 }
761 }
762
763 if (Scopes.size() == 0)
764 return;
765
766 // Compute registers which are livein into the loop headers.
767 RegSeen.clear();
768 BackTrace.clear();
769 InitRegPressure(Preheader);
770
771 // Now perform LICM.
772 for (MachineDomTreeNode *Node : Scopes) {
773 MachineBasicBlock *MBB = Node->getBlock();
774
775 EnterScope(MBB);
776
777 // Process the block
778 SpeculationState = SpeculateUnknown;
780 if (!Hoist(&MI, Preheader))
781 UpdateRegPressure(&MI);
782 // If we have hoisted an instruction that may store, it can only be a
783 // constant store.
784 }
785
786 // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
787 ExitScopeIfDone(Node, OpenChildren, ParentMap);
788 }
789}
790
792 return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
793}
794
795/// Find all virtual register references that are liveout of the preheader to
796/// initialize the starting "register pressure". Note this does not count live
797/// through (livein but not used) registers.
798void MachineLICMBase::InitRegPressure(MachineBasicBlock *BB) {
799 std::fill(RegPressure.begin(), RegPressure.end(), 0);
800
801 // If the preheader has only a single predecessor and it ends with a
802 // fallthrough or an unconditional branch, then scan its predecessor for live
803 // defs as well. This happens whenever the preheader is created by splitting
804 // the critical edge from the loop predecessor to the loop header.
805 if (BB->pred_size() == 1) {
806 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
808 if (!TII->analyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
809 InitRegPressure(*BB->pred_begin());
810 }
811
812 for (const MachineInstr &MI : *BB)
813 UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
814}
815
816/// Update estimate of register pressure after the specified instruction.
817void MachineLICMBase::UpdateRegPressure(const MachineInstr *MI,
818 bool ConsiderUnseenAsDef) {
819 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
820 for (const auto &RPIdAndCost : Cost) {
821 unsigned Class = RPIdAndCost.first;
822 if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second)
823 RegPressure[Class] = 0;
824 else
825 RegPressure[Class] += RPIdAndCost.second;
826 }
827}
828
829/// Calculate the additional register pressure that the registers used in MI
830/// cause.
831///
832/// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
833/// figure out which usages are live-ins.
834/// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
836MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
837 bool ConsiderUnseenAsDef) {
839 if (MI->isImplicitDef())
840 return Cost;
841 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
842 const MachineOperand &MO = MI->getOperand(i);
843 if (!MO.isReg() || MO.isImplicit())
844 continue;
845 Register Reg = MO.getReg();
846 if (!Reg.isVirtual())
847 continue;
848
849 // FIXME: It seems bad to use RegSeen only for some of these calculations.
850 bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false;
851 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
852
853 RegClassWeight W = TRI->getRegClassWeight(RC);
854 int RCCost = 0;
855 if (MO.isDef())
856 RCCost = W.RegWeight;
857 else {
858 bool isKill = isOperandKill(MO, MRI);
859 if (isNew && !isKill && ConsiderUnseenAsDef)
860 // Haven't seen this, it must be a livein.
861 RCCost = W.RegWeight;
862 else if (!isNew && isKill)
863 RCCost = -W.RegWeight;
864 }
865 if (RCCost == 0)
866 continue;
867 const int *PS = TRI->getRegClassPressureSets(RC);
868 for (; *PS != -1; ++PS) {
869 if (!Cost.contains(*PS))
870 Cost[*PS] = RCCost;
871 else
872 Cost[*PS] += RCCost;
873 }
874 }
875 return Cost;
876}
877
878/// Return true if this machine instruction loads from global offset table or
879/// constant pool.
881 assert(MI.mayLoad() && "Expected MI that loads!");
882
883 // If we lost memory operands, conservatively assume that the instruction
884 // reads from everything..
885 if (MI.memoperands_empty())
886 return true;
887
888 for (MachineMemOperand *MemOp : MI.memoperands())
889 if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
890 if (PSV->isGOT() || PSV->isConstantPool())
891 return true;
892
893 return false;
894}
895
896// This function iterates through all the operands of the input store MI and
897// checks that each register operand statisfies isCallerPreservedPhysReg.
898// This means, the value being stored and the address where it is being stored
899// is constant throughout the body of the function (not including prologue and
900// epilogue). When called with an MI that isn't a store, it returns false.
901// A future improvement can be to check if the store registers are constant
902// throughout the loop rather than throughout the funtion.
904 const TargetRegisterInfo *TRI,
905 const MachineRegisterInfo *MRI) {
906
907 bool FoundCallerPresReg = false;
908 if (!MI.mayStore() || MI.hasUnmodeledSideEffects() ||
909 (MI.getNumOperands() == 0))
910 return false;
911
912 // Check that all register operands are caller-preserved physical registers.
913 for (const MachineOperand &MO : MI.operands()) {
914 if (MO.isReg()) {
915 Register Reg = MO.getReg();
916 // If operand is a virtual register, check if it comes from a copy of a
917 // physical register.
918 if (Reg.isVirtual())
919 Reg = TRI->lookThruCopyLike(MO.getReg(), MRI);
920 if (Reg.isVirtual())
921 return false;
922 if (!TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *MI.getMF()))
923 return false;
924 else
925 FoundCallerPresReg = true;
926 } else if (!MO.isImm()) {
927 return false;
928 }
929 }
930 return FoundCallerPresReg;
931}
932
933// Return true if the input MI is a copy instruction that feeds an invariant
934// store instruction. This means that the src of the copy has to satisfy
935// isCallerPreservedPhysReg and atleast one of it's users should satisfy
936// isInvariantStore.
939 const TargetRegisterInfo *TRI) {
940
941 // FIXME: If targets would like to look through instructions that aren't
942 // pure copies, this can be updated to a query.
943 if (!MI.isCopy())
944 return false;
945
946 const MachineFunction *MF = MI.getMF();
947 // Check that we are copying a constant physical register.
948 Register CopySrcReg = MI.getOperand(1).getReg();
949 if (CopySrcReg.isVirtual())
950 return false;
951
952 if (!TRI->isCallerPreservedPhysReg(CopySrcReg.asMCReg(), *MF))
953 return false;
954
955 Register CopyDstReg = MI.getOperand(0).getReg();
956 // Check if any of the uses of the copy are invariant stores.
957 assert(CopyDstReg.isVirtual() && "copy dst is not a virtual reg");
958
959 for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) {
960 if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI))
961 return true;
962 }
963 return false;
964}
965
966/// Returns true if the instruction may be a suitable candidate for LICM.
967/// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
968bool MachineLICMBase::IsLICMCandidate(MachineInstr &I) {
969 // Check if it's safe to move the instruction.
970 bool DontMoveAcrossStore = true;
971 if ((!I.isSafeToMove(AA, DontMoveAcrossStore)) &&
973 LLVM_DEBUG(dbgs() << "LICM: Instruction not safe to move.\n");
974 return false;
975 }
976
977 // If it is a load then check if it is guaranteed to execute by making sure
978 // that it dominates all exiting blocks. If it doesn't, then there is a path
979 // out of the loop which does not execute this load, so we can't hoist it.
980 // Loads from constant memory are safe to speculate, for example indexed load
981 // from a jump table.
982 // Stores and side effects are already checked by isSafeToMove.
983 if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
984 !IsGuaranteedToExecute(I.getParent())) {
985 LLVM_DEBUG(dbgs() << "LICM: Load not guaranteed to execute.\n");
986 return false;
987 }
988
989 // Convergent attribute has been used on operations that involve inter-thread
990 // communication which results are implicitly affected by the enclosing
991 // control flows. It is not safe to hoist or sink such operations across
992 // control flow.
993 if (I.isConvergent())
994 return false;
995
996 if (!TII->shouldHoist(I, CurLoop))
997 return false;
998
999 return true;
1000}
1001
1002/// Returns true if the instruction is loop invariant.
1003bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) {
1004 if (!IsLICMCandidate(I)) {
1005 LLVM_DEBUG(dbgs() << "LICM: Instruction not a LICM candidate\n");
1006 return false;
1007 }
1008 return CurLoop->isLoopInvariant(I);
1009}
1010
1011/// Return true if the specified instruction is used by a phi node and hoisting
1012/// it could cause a copy to be inserted.
1013bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
1015 do {
1016 MI = Work.pop_back_val();
1017 for (const MachineOperand &MO : MI->operands()) {
1018 if (!MO.isReg() || !MO.isDef())
1019 continue;
1020 Register Reg = MO.getReg();
1021 if (!Reg.isVirtual())
1022 continue;
1023 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
1024 // A PHI may cause a copy to be inserted.
1025 if (UseMI.isPHI()) {
1026 // A PHI inside the loop causes a copy because the live range of Reg is
1027 // extended across the PHI.
1028 if (CurLoop->contains(&UseMI))
1029 return true;
1030 // A PHI in an exit block can cause a copy to be inserted if the PHI
1031 // has multiple predecessors in the loop with different values.
1032 // For now, approximate by rejecting all exit blocks.
1033 if (isExitBlock(UseMI.getParent()))
1034 return true;
1035 continue;
1036 }
1037 // Look past copies as well.
1038 if (UseMI.isCopy() && CurLoop->contains(&UseMI))
1039 Work.push_back(&UseMI);
1040 }
1041 }
1042 } while (!Work.empty());
1043 return false;
1044}
1045
1046/// Compute operand latency between a def of 'Reg' and an use in the current
1047/// loop, return true if the target considered it high.
1048bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
1049 Register Reg) const {
1050 if (MRI->use_nodbg_empty(Reg))
1051 return false;
1052
1053 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
1054 if (UseMI.isCopyLike())
1055 continue;
1056 if (!CurLoop->contains(UseMI.getParent()))
1057 continue;
1058 for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {
1059 const MachineOperand &MO = UseMI.getOperand(i);
1060 if (!MO.isReg() || !MO.isUse())
1061 continue;
1062 Register MOReg = MO.getReg();
1063 if (MOReg != Reg)
1064 continue;
1065
1066 if (TII->hasHighOperandLatency(SchedModel, MRI, MI, DefIdx, UseMI, i))
1067 return true;
1068 }
1069
1070 // Only look at the first in loop use.
1071 break;
1072 }
1073
1074 return false;
1075}
1076
1077/// Return true if the instruction is marked "cheap" or the operand latency
1078/// between its def and a use is one or less.
1079bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const {
1080 if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike())
1081 return true;
1082
1083 bool isCheap = false;
1084 unsigned NumDefs = MI.getDesc().getNumDefs();
1085 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
1086 MachineOperand &DefMO = MI.getOperand(i);
1087 if (!DefMO.isReg() || !DefMO.isDef())
1088 continue;
1089 --NumDefs;
1090 Register Reg = DefMO.getReg();
1091 if (Reg.isPhysical())
1092 continue;
1093
1094 if (!TII->hasLowDefLatency(SchedModel, MI, i))
1095 return false;
1096 isCheap = true;
1097 }
1098
1099 return isCheap;
1100}
1101
1102/// Visit BBs from header to current BB, check if hoisting an instruction of the
1103/// given cost matrix can cause high register pressure.
1104bool
1105MachineLICMBase::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
1106 bool CheapInstr) {
1107 for (const auto &RPIdAndCost : Cost) {
1108 if (RPIdAndCost.second <= 0)
1109 continue;
1110
1111 unsigned Class = RPIdAndCost.first;
1112 int Limit = RegLimit[Class];
1113
1114 // Don't hoist cheap instructions if they would increase register pressure,
1115 // even if we're under the limit.
1116 if (CheapInstr && !HoistCheapInsts)
1117 return true;
1118
1119 for (const auto &RP : BackTrace)
1120 if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit)
1121 return true;
1122 }
1123
1124 return false;
1125}
1126
1127/// Traverse the back trace from header to the current block and update their
1128/// register pressures to reflect the effect of hoisting MI from the current
1129/// block to the preheader.
1130void MachineLICMBase::UpdateBackTraceRegPressure(const MachineInstr *MI) {
1131 // First compute the 'cost' of the instruction, i.e. its contribution
1132 // to register pressure.
1133 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false,
1134 /*ConsiderUnseenAsDef=*/false);
1135
1136 // Update register pressure of blocks from loop header to current block.
1137 for (auto &RP : BackTrace)
1138 for (const auto &RPIdAndCost : Cost)
1139 RP[RPIdAndCost.first] += RPIdAndCost.second;
1140}
1141
1142/// Return true if it is potentially profitable to hoist the given loop
1143/// invariant.
1144bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
1145 if (MI.isImplicitDef())
1146 return true;
1147
1148 // Besides removing computation from the loop, hoisting an instruction has
1149 // these effects:
1150 //
1151 // - The value defined by the instruction becomes live across the entire
1152 // loop. This increases register pressure in the loop.
1153 //
1154 // - If the value is used by a PHI in the loop, a copy will be required for
1155 // lowering the PHI after extending the live range.
1156 //
1157 // - When hoisting the last use of a value in the loop, that value no longer
1158 // needs to be live in the loop. This lowers register pressure in the loop.
1159
1161 return true;
1162
1163 bool CheapInstr = IsCheapInstruction(MI);
1164 bool CreatesCopy = HasLoopPHIUse(&MI);
1165
1166 // Don't hoist a cheap instruction if it would create a copy in the loop.
1167 if (CheapInstr && CreatesCopy) {
1168 LLVM_DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
1169 return false;
1170 }
1171
1172 // Rematerializable instructions should always be hoisted providing the
1173 // register allocator can just pull them down again when needed.
1174 if (isTriviallyReMaterializable(MI))
1175 return true;
1176
1177 // FIXME: If there are long latency loop-invariant instructions inside the
1178 // loop at this point, why didn't the optimizer's LICM hoist them?
1179 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
1180 const MachineOperand &MO = MI.getOperand(i);
1181 if (!MO.isReg() || MO.isImplicit())
1182 continue;
1183 Register Reg = MO.getReg();
1184 if (!Reg.isVirtual())
1185 continue;
1186 if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
1187 LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI);
1188 ++NumHighLatency;
1189 return true;
1190 }
1191 }
1192
1193 // Estimate register pressure to determine whether to LICM the instruction.
1194 // In low register pressure situation, we can be more aggressive about
1195 // hoisting. Also, favors hoisting long latency instructions even in
1196 // moderately high pressure situation.
1197 // Cheap instructions will only be hoisted if they don't increase register
1198 // pressure at all.
1199 auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false,
1200 /*ConsiderUnseenAsDef=*/false);
1201
1202 // Visit BBs from header to current BB, if hoisting this doesn't cause
1203 // high register pressure, then it's safe to proceed.
1204 if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
1205 LLVM_DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
1206 ++NumLowRP;
1207 return true;
1208 }
1209
1210 // Don't risk increasing register pressure if it would create copies.
1211 if (CreatesCopy) {
1212 LLVM_DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
1213 return false;
1214 }
1215
1216 // Do not "speculate" in high register pressure situation. If an
1217 // instruction is not guaranteed to be executed in the loop, it's best to be
1218 // conservative.
1219 if (AvoidSpeculation &&
1220 (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
1221 LLVM_DEBUG(dbgs() << "Won't speculate: " << MI);
1222 return false;
1223 }
1224
1225 // High register pressure situation, only hoist if the instruction is going
1226 // to be remat'ed.
1227 if (!isTriviallyReMaterializable(MI) &&
1228 !MI.isDereferenceableInvariantLoad()) {
1229 LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
1230 return false;
1231 }
1232
1233 return true;
1234}
1235
1236/// Unfold a load from the given machineinstr if the load itself could be
1237/// hoisted. Return the unfolded and hoistable load, or null if the load
1238/// couldn't be unfolded or if it wouldn't be hoistable.
1239MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) {
1240 // Don't unfold simple loads.
1241 if (MI->canFoldAsLoad())
1242 return nullptr;
1243
1244 // If not, we may be able to unfold a load and hoist that.
1245 // First test whether the instruction is loading from an amenable
1246 // memory location.
1247 if (!MI->isDereferenceableInvariantLoad())
1248 return nullptr;
1249
1250 // Next determine the register class for a temporary register.
1251 unsigned LoadRegIndex;
1252 unsigned NewOpc =
1253 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
1254 /*UnfoldLoad=*/true,
1255 /*UnfoldStore=*/false,
1256 &LoadRegIndex);
1257 if (NewOpc == 0) return nullptr;
1258 const MCInstrDesc &MID = TII->get(NewOpc);
1259 MachineFunction &MF = *MI->getMF();
1260 const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
1261 // Ok, we're unfolding. Create a temporary register and do the unfold.
1262 Register Reg = MRI->createVirtualRegister(RC);
1263
1265 bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg,
1266 /*UnfoldLoad=*/true,
1267 /*UnfoldStore=*/false, NewMIs);
1268 (void)Success;
1269 assert(Success &&
1270 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
1271 "succeeded!");
1272 assert(NewMIs.size() == 2 &&
1273 "Unfolded a load into multiple instructions!");
1274 MachineBasicBlock *MBB = MI->getParent();
1276 MBB->insert(Pos, NewMIs[0]);
1277 MBB->insert(Pos, NewMIs[1]);
1278 // If unfolding produced a load that wasn't loop-invariant or profitable to
1279 // hoist, discard the new instructions and bail.
1280 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
1281 NewMIs[0]->eraseFromParent();
1282 NewMIs[1]->eraseFromParent();
1283 return nullptr;
1284 }
1285
1286 // Update register pressure for the unfolded instruction.
1287 UpdateRegPressure(NewMIs[1]);
1288
1289 // Otherwise we successfully unfolded a load that we can hoist.
1290
1291 // Update the call site info.
1292 if (MI->shouldUpdateCallSiteInfo())
1294
1295 MI->eraseFromParent();
1296 return NewMIs[0];
1297}
1298
1299/// Initialize the CSE map with instructions that are in the current loop
1300/// preheader that may become duplicates of instructions that are hoisted
1301/// out of the loop.
1302void MachineLICMBase::InitCSEMap(MachineBasicBlock *BB) {
1303 for (MachineInstr &MI : *BB)
1304 CSEMap[MI.getOpcode()].push_back(&MI);
1305}
1306
1307/// Find an instruction amount PrevMIs that is a duplicate of MI.
1308/// Return this instruction if it's found.
1310MachineLICMBase::LookForDuplicate(const MachineInstr *MI,
1311 std::vector<MachineInstr *> &PrevMIs) {
1312 for (MachineInstr *PrevMI : PrevMIs)
1313 if (TII->produceSameValue(*MI, *PrevMI, (PreRegAlloc ? MRI : nullptr)))
1314 return PrevMI;
1315
1316 return nullptr;
1317}
1318
1319/// Given a LICM'ed instruction, look for an instruction on the preheader that
1320/// computes the same value. If it's found, do a RAU on with the definition of
1321/// the existing instruction rather than hoisting the instruction to the
1322/// preheader.
1323bool MachineLICMBase::EliminateCSE(
1325 DenseMap<unsigned, std::vector<MachineInstr *>>::iterator &CI) {
1326 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1327 // the undef property onto uses.
1328 if (CI == CSEMap.end() || MI->isImplicitDef())
1329 return false;
1330
1331 if (MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
1332 LLVM_DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
1333
1334 // Replace virtual registers defined by MI by their counterparts defined
1335 // by Dup.
1337 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1338 const MachineOperand &MO = MI->getOperand(i);
1339
1340 // Physical registers may not differ here.
1341 assert((!MO.isReg() || MO.getReg() == 0 || !MO.getReg().isPhysical() ||
1342 MO.getReg() == Dup->getOperand(i).getReg()) &&
1343 "Instructions with different phys regs are not identical!");
1344
1345 if (MO.isReg() && MO.isDef() && !MO.getReg().isPhysical())
1346 Defs.push_back(i);
1347 }
1348
1350 for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
1351 unsigned Idx = Defs[i];
1352 Register Reg = MI->getOperand(Idx).getReg();
1353 Register DupReg = Dup->getOperand(Idx).getReg();
1354 OrigRCs.push_back(MRI->getRegClass(DupReg));
1355
1356 if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
1357 // Restore old RCs if more than one defs.
1358 for (unsigned j = 0; j != i; ++j)
1359 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
1360 return false;
1361 }
1362 }
1363
1364 for (unsigned Idx : Defs) {
1365 Register Reg = MI->getOperand(Idx).getReg();
1366 Register DupReg = Dup->getOperand(Idx).getReg();
1367 MRI->replaceRegWith(Reg, DupReg);
1368 MRI->clearKillFlags(DupReg);
1369 // Clear Dup dead flag if any, we reuse it for Reg.
1370 if (!MRI->use_nodbg_empty(DupReg))
1371 Dup->getOperand(Idx).setIsDead(false);
1372 }
1373
1374 MI->eraseFromParent();
1375 ++NumCSEed;
1376 return true;
1377 }
1378 return false;
1379}
1380
1381/// Return true if the given instruction will be CSE'd if it's hoisted out of
1382/// the loop.
1383bool MachineLICMBase::MayCSE(MachineInstr *MI) {
1384 unsigned Opcode = MI->getOpcode();
1386 CSEMap.find(Opcode);
1387 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
1388 // the undef property onto uses.
1389 if (CI == CSEMap.end() || MI->isImplicitDef())
1390 return false;
1391
1392 return LookForDuplicate(MI, CI->second) != nullptr;
1393}
1394
1395/// When an instruction is found to use only loop invariant operands
1396/// that are safe to hoist, this instruction is called to do the dirty work.
1397/// It returns true if the instruction is hoisted.
1398bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
1399 MachineBasicBlock *SrcBlock = MI->getParent();
1400
1401 // Disable the instruction hoisting due to block hotness
1403 (DisableHoistingToHotterBlocks == UseBFI::PGO && HasProfileData)) &&
1404 isTgtHotterThanSrc(SrcBlock, Preheader)) {
1405 ++NumNotHoistedDueToHotness;
1406 return false;
1407 }
1408 // First check whether we should hoist this instruction.
1409 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
1410 // If not, try unfolding a hoistable load.
1411 MI = ExtractHoistableLoad(MI);
1412 if (!MI) return false;
1413 }
1414
1415 // If we have hoisted an instruction that may store, it can only be a constant
1416 // store.
1417 if (MI->mayStore())
1418 NumStoreConst++;
1419
1420 // Now move the instructions to the predecessor, inserting it before any
1421 // terminator instructions.
1422 LLVM_DEBUG({
1423 dbgs() << "Hoisting " << *MI;
1424 if (MI->getParent()->getBasicBlock())
1425 dbgs() << " from " << printMBBReference(*MI->getParent());
1426 if (Preheader->getBasicBlock())
1427 dbgs() << " to " << printMBBReference(*Preheader);
1428 dbgs() << "\n";
1429 });
1430
1431 // If this is the first instruction being hoisted to the preheader,
1432 // initialize the CSE map with potential common expressions.
1433 if (FirstInLoop) {
1434 InitCSEMap(Preheader);
1435 FirstInLoop = false;
1436 }
1437
1438 // Look for opportunity to CSE the hoisted instruction.
1439 unsigned Opcode = MI->getOpcode();
1441 CSEMap.find(Opcode);
1442 if (!EliminateCSE(MI, CI)) {
1443 // Otherwise, splice the instruction to the preheader.
1444 Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
1445
1446 // Since we are moving the instruction out of its basic block, we do not
1447 // retain its debug location. Doing so would degrade the debugging
1448 // experience and adversely affect the accuracy of profiling information.
1449 assert(!MI->isDebugInstr() && "Should not hoist debug inst");
1450 MI->setDebugLoc(DebugLoc());
1451
1452 // Update register pressure for BBs from header to this block.
1453 UpdateBackTraceRegPressure(MI);
1454
1455 // Clear the kill flags of any register this instruction defines,
1456 // since they may need to be live throughout the entire loop
1457 // rather than just live for part of it.
1458 for (MachineOperand &MO : MI->operands())
1459 if (MO.isReg() && MO.isDef() && !MO.isDead())
1460 MRI->clearKillFlags(MO.getReg());
1461
1462 // Add to the CSE map.
1463 if (CI != CSEMap.end())
1464 CI->second.push_back(MI);
1465 else
1466 CSEMap[Opcode].push_back(MI);
1467 }
1468
1469 ++NumHoisted;
1470 Changed = true;
1471
1472 return true;
1473}
1474
1475/// Get the preheader for the current loop, splitting a critical edge if needed.
1476MachineBasicBlock *MachineLICMBase::getCurPreheader() {
1477 // Determine the block to which to hoist instructions. If we can't find a
1478 // suitable loop predecessor, we can't do any hoisting.
1479
1480 // If we've tried to get a preheader and failed, don't try again.
1481 if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
1482 return nullptr;
1483
1484 if (!CurPreheader) {
1485 CurPreheader = CurLoop->getLoopPreheader();
1486 if (!CurPreheader) {
1487 MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
1488 if (!Pred) {
1489 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1490 return nullptr;
1491 }
1492
1493 CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), *this);
1494 if (!CurPreheader) {
1495 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
1496 return nullptr;
1497 }
1498 }
1499 }
1500 return CurPreheader;
1501}
1502
1503/// Is the target basic block at least "BlockFrequencyRatioThreshold"
1504/// times hotter than the source basic block.
1505bool MachineLICMBase::isTgtHotterThanSrc(MachineBasicBlock *SrcBlock,
1506 MachineBasicBlock *TgtBlock) {
1507 // Parse source and target basic block frequency from MBFI
1508 uint64_t SrcBF = MBFI->getBlockFreq(SrcBlock).getFrequency();
1509 uint64_t DstBF = MBFI->getBlockFreq(TgtBlock).getFrequency();
1510
1511 // Disable the hoisting if source block frequency is zero
1512 if (!SrcBF)
1513 return true;
1514
1515 double Ratio = (double)DstBF / SrcBF;
1516
1517 // Compare the block frequency ratio with the threshold
1518 return Ratio > BlockFrequencyRatioThreshold;
1519}
unsigned const MachineRegisterInfo * MRI
#define Success
MachineInstrBuilder & UseMI
MachineBasicBlock & MBB
basic Basic Alias true
SmallVector< MachineOperand, 4 > Cond
This file implements the BitVector class.
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:678
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static bool isExitBlock(BasicBlock *BB, const SmallVectorImpl< BasicBlock * > &ExitBlocks)
Return true if the specified block is in the list.
Definition: LCSSA.cpp:70
#define I(x, y, z)
Definition: MD5.cpp:58
Machine Loop Invariant Code false early machinelicm
static cl::opt< bool > HoistConstStores("hoist-const-stores", cl::desc("Hoist invariant stores"), cl::init(true), cl::Hidden)
static cl::opt< UseBFI > DisableHoistingToHotterBlocks("disable-hoisting-to-hotter-blocks", cl::desc("Disable hoisting instructions to" " hotter blocks"), cl::init(UseBFI::PGO), cl::Hidden, cl::values(clEnumValN(UseBFI::None, "none", "disable the feature"), clEnumValN(UseBFI::PGO, "pgo", "enable the feature when using profile data"), clEnumValN(UseBFI::All, "all", "enable the feature with/wo profile data")))
static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI)
Return true if this machine instruction loads from global offset table or constant pool.
static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI)
Machine Loop Invariant Code false early Early Machine Loop Invariant Code static false bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop)
Test if the given loop is the outer-most loop that has a unique predecessor.
UseBFI
Definition: MachineLICM.cpp:83
Machine Loop Invariant Code Motion
static cl::opt< bool > AvoidSpeculation("avoid-speculation", cl::desc("MachineLICM should avoid speculation"), cl::init(true), cl::Hidden)
static bool InstructionStoresToFI(const MachineInstr *MI, int FI)
Return true if instruction stores to the specified frame.
static bool isCopyFeedingInvariantStore(const MachineInstr &MI, const MachineRegisterInfo *MRI, const TargetRegisterInfo *TRI)
#define DEBUG_TYPE
Definition: MachineLICM.cpp:59
static cl::opt< bool > HoistCheapInsts("hoist-cheap-insts", cl::desc("MachineLICM should hoist even cheap instructions"), cl::init(false), cl::Hidden)
static bool isInvariantStore(const MachineInstr &MI, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI)
static cl::opt< unsigned > BlockFrequencyRatioThreshold("block-freq-ratio-threshold", cl::desc("Do not hoist instructions if target" "block is N times hotter than the source."), cl::init(100), cl::Hidden)
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
TLS Variable Hoist
When an instruction is found to use only loop invariant operands that are safe to hoist,...
This file describes how to lower LLVM code to machine code.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
void setBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
setBitsNotInMask - Add a bit to this vector for every '0' bit in Mask.
Definition: BitVector.h:719
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
iterator end()
Definition: DenseMap.h:84
Base class for the actual dominator tree node.
A specialized PseudoSourceValue for holding FixedStack values, which must include a frame index.
bool hasProfileData(bool IncludeSynthetic=false) const
Return true if the function is annotated with profile data.
Definition: Function.h:289
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
bool isAsCheapAsAMove(const MachineInstr &MI) const override
unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:139
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:64
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:33
BlockT * getHeader() const
Definition: LoopInfo.h:105
BlockT * getLoopPredecessor() const
If the given loop's header has exactly one unique predecessor outside the loop, return it.
Definition: LoopInfoImpl.h:211
iterator end() const
Definition: LoopInfo.h:172
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:183
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:188
iterator begin() const
Definition: LoopInfo.h:171
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
unsigned pred_size() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
MachineBasicBlock * SplitCriticalEdge(MachineBasicBlock *Succ, Pass &P, std::vector< SparseBitVector<> > *LiveInSets=nullptr)
Split the critical edge from this block to the given successor block, and return the newly created bl...
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineDomTreeNode * getNode(MachineBasicBlock *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
bool dominates(const MachineDomTreeNode *A, const MachineDomTreeNode *B) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
void eraseCallSiteInfo(const MachineInstr *MI)
Following functions update call site info.
Representation of each machine instruction.
Definition: MachineInstr.h:68
iterator begin() const
iterator end() const
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
bool isLoopInvariant(MachineInstr &I) const
Returns true if the instruction is loop invariant.
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void releaseMemory()
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
Definition: Pass.cpp:102
Special value supplied for machine level alias analysis.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:120
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
void clear()
Definition: SmallSet.h:216
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:177
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void resize(size_type N)
Definition: SmallVector.h:642
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetInstrInfo - Interface to description of machine instruction set.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Provide an instruction scheduling machine model to CodeGen passes.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
TargetSubtargetInfo - Generic base class for all target subtargets.
LLVM Value Representation.
Definition: Value.h:74
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:703
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
constexpr double e
Definition: MathExtras.h:31
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void initializeMachineLICMPass(PassRegistry &)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1976
char & MachineLICMID
This pass performs loop invariant code motion on machine instructions.
void initializeEarlyMachineLICMPass(PassRegistry &)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
#define N
Each TargetRegisterClass has a per register weight, and weight limit which must be less than the limi...