LLVM 22.0.0git
RegAllocGreedy.cpp
Go to the documentation of this file.
1//===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the RAGreedy function pass for register allocation in
10// optimized builds.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RegAllocGreedy.h"
15#include "AllocationOrder.h"
16#include "InterferenceCache.h"
17#include "RegAllocBase.h"
18#include "SplitKit.h"
19#include "llvm/ADT/ArrayRef.h"
20#include "llvm/ADT/BitVector.h"
21#include "llvm/ADT/IndexedMap.h"
22#include "llvm/ADT/SmallSet.h"
24#include "llvm/ADT/Statistic.h"
25#include "llvm/ADT/StringRef.h"
60#include "llvm/IR/Analysis.h"
62#include "llvm/IR/Function.h"
63#include "llvm/IR/LLVMContext.h"
64#include "llvm/Pass.h"
68#include "llvm/Support/Debug.h"
70#include "llvm/Support/Timer.h"
72#include <algorithm>
73#include <cassert>
74#include <cstdint>
75#include <utility>
76
77using namespace llvm;
78
79#define DEBUG_TYPE "regalloc"
80
81STATISTIC(NumGlobalSplits, "Number of split global live ranges");
82STATISTIC(NumLocalSplits, "Number of split local live ranges");
83STATISTIC(NumEvicted, "Number of interferences evicted");
84
86 "split-spill-mode", cl::Hidden,
87 cl::desc("Spill mode for splitting live ranges"),
88 cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
89 clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
90 clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
92
95 cl::desc("Last chance recoloring max depth"),
96 cl::init(5));
97
99 "lcr-max-interf", cl::Hidden,
100 cl::desc("Last chance recoloring maximum number of considered"
101 " interference at a time"),
102 cl::init(8));
103
105 "exhaustive-register-search", cl::NotHidden,
106 cl::desc("Exhaustive Search for registers bypassing the depth "
107 "and interference cutoffs of last chance recoloring"),
108 cl::Hidden);
109
110// FIXME: Find a good default for this flag and remove the flag.
112CSRFirstTimeCost("regalloc-csr-first-time-cost",
113 cl::desc("Cost for first time use of callee-saved register."),
114 cl::init(0), cl::Hidden);
115
117 "grow-region-complexity-budget",
118 cl::desc("growRegion() does not scale with the number of BB edges, so "
119 "limit its budget and bail out once we reach the limit."),
120 cl::init(10000), cl::Hidden);
121
123 "greedy-regclass-priority-trumps-globalness",
124 cl::desc("Change the greedy register allocator's live range priority "
125 "calculation to make the AllocationPriority of the register class "
126 "more important then whether the range is global"),
127 cl::Hidden);
128
130 "greedy-reverse-local-assignment",
131 cl::desc("Reverse allocation order of local live ranges, such that "
132 "shorter local live ranges will tend to be allocated first"),
133 cl::Hidden);
134
136 "split-threshold-for-reg-with-hint",
137 cl::desc("The threshold for splitting a virtual register with a hint, in "
138 "percentage"),
139 cl::init(75), cl::Hidden);
140
141static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
143
144namespace {
145class RAGreedyLegacy : public MachineFunctionPass {
147
148public:
149 RAGreedyLegacy(const RegAllocFilterFunc F = nullptr);
150
151 static char ID;
152 /// Return the pass name.
153 StringRef getPassName() const override { return "Greedy Register Allocator"; }
154
155 /// RAGreedy analysis usage.
156 void getAnalysisUsage(AnalysisUsage &AU) const override;
157 /// Perform register allocation.
158 bool runOnMachineFunction(MachineFunction &mf) override;
159
160 MachineFunctionProperties getRequiredProperties() const override {
161 return MachineFunctionProperties().setNoPHIs();
162 }
163
164 MachineFunctionProperties getClearedProperties() const override {
165 return MachineFunctionProperties().setIsSSA();
166 }
167};
168
169} // end anonymous namespace
170
171RAGreedyLegacy::RAGreedyLegacy(const RegAllocFilterFunc F)
174}
175
199
201 : RegAllocBase(F) {
202 VRM = Analyses.VRM;
203 LIS = Analyses.LIS;
204 Matrix = Analyses.LRM;
205 Indexes = Analyses.Indexes;
206 MBFI = Analyses.MBFI;
207 DomTree = Analyses.DomTree;
208 Loops = Analyses.Loops;
209 ORE = Analyses.ORE;
210 Bundles = Analyses.Bundles;
211 SpillPlacer = Analyses.SpillPlacer;
212 DebugVars = Analyses.DebugVars;
213 LSS = Analyses.LSS;
214 EvictProvider = Analyses.EvictProvider;
215 PriorityProvider = Analyses.PriorityProvider;
216}
217
219 raw_ostream &OS,
220 function_ref<StringRef(StringRef)> MapClassName2PassName) const {
221 StringRef FilterName = Opts.FilterName.empty() ? "all" : Opts.FilterName;
222 OS << "greedy<" << FilterName << '>';
223}
224
243
246 MFPropsModifier _(*this, MF);
247
248 RAGreedy::RequiredAnalyses Analyses(MF, MFAM);
249 RAGreedy Impl(Analyses, Opts.Filter);
250
251 bool Changed = Impl.run(MF);
252 if (!Changed)
253 return PreservedAnalyses::all();
255 PA.preserveSet<CFGAnalyses>();
256 PA.preserve<MachineBlockFrequencyAnalysis>();
257 PA.preserve<LiveIntervalsAnalysis>();
258 PA.preserve<SlotIndexesAnalysis>();
259 PA.preserve<LiveDebugVariablesAnalysis>();
260 PA.preserve<LiveStacksAnalysis>();
261 PA.preserve<VirtRegMapAnalysis>();
262 PA.preserve<LiveRegMatrixAnalysis>();
263 return PA;
264}
265
267 VRM = &P.getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
268 LIS = &P.getAnalysis<LiveIntervalsWrapperPass>().getLIS();
269 LSS = &P.getAnalysis<LiveStacksWrapperLegacy>().getLS();
270 LRM = &P.getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
271 Indexes = &P.getAnalysis<SlotIndexesWrapperPass>().getSI();
272 MBFI = &P.getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
273 DomTree = &P.getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
274 ORE = &P.getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
275 Loops = &P.getAnalysis<MachineLoopInfoWrapperPass>().getLI();
276 Bundles = &P.getAnalysis<EdgeBundlesWrapperLegacy>().getEdgeBundles();
277 SpillPlacer = &P.getAnalysis<SpillPlacementWrapperLegacy>().getResult();
278 DebugVars = &P.getAnalysis<LiveDebugVariablesWrapperLegacy>().getLDV();
280 &P.getAnalysis<RegAllocEvictionAdvisorAnalysisLegacy>().getProvider();
282 &P.getAnalysis<RegAllocPriorityAdvisorAnalysisLegacy>().getProvider();
283}
284
285bool RAGreedyLegacy::runOnMachineFunction(MachineFunction &MF) {
286 RAGreedy::RequiredAnalyses Analyses(*this);
287 RAGreedy Impl(Analyses, F);
288 return Impl.run(MF);
289}
290
291char RAGreedyLegacy::ID = 0;
292char &llvm::RAGreedyLegacyID = RAGreedyLegacy::ID;
293
294INITIALIZE_PASS_BEGIN(RAGreedyLegacy, "greedy", "Greedy Register Allocator",
295 false, false)
299INITIALIZE_PASS_DEPENDENCY(RegisterCoalescerLegacy)
300INITIALIZE_PASS_DEPENDENCY(MachineSchedulerLegacy)
311INITIALIZE_PASS_END(RAGreedyLegacy, "greedy", "Greedy Register Allocator",
313
314#ifndef NDEBUG
315const char *const RAGreedy::StageName[] = {
316 "RS_New",
317 "RS_Assign",
318 "RS_Split",
319 "RS_Split2",
320 "RS_Spill",
321 "RS_Done"
322};
323#endif
324
325// Hysteresis to use when comparing floats.
326// This helps stabilize decisions based on float comparisons.
327const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
328
330 return new RAGreedyLegacy();
331}
332
334 return new RAGreedyLegacy(Ftor);
335}
336
337void RAGreedyLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
338 AU.setPreservesCFG();
363}
364
365//===----------------------------------------------------------------------===//
366// LiveRangeEdit delegate methods
367//===----------------------------------------------------------------------===//
368
369bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
370 LiveInterval &LI = LIS->getInterval(VirtReg);
371 if (VRM->hasPhys(VirtReg)) {
372 Matrix->unassign(LI);
374 return true;
375 }
376 // Unassigned virtreg is probably in the priority queue.
377 // RegAllocBase will erase it after dequeueing.
378 // Nonetheless, clear the live-range so that the debug
379 // dump will show the right state for that VirtReg.
380 LI.clear();
381 return false;
382}
383
384void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
385 if (!VRM->hasPhys(VirtReg))
386 return;
387
388 // Register is assigned, put it back on the queue for reassignment.
389 LiveInterval &LI = LIS->getInterval(VirtReg);
390 Matrix->unassign(LI);
392}
393
394void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
395 ExtraInfo->LRE_DidCloneVirtReg(New, Old);
396}
397
399 // Cloning a register we haven't even heard about yet? Just ignore it.
400 if (!Info.inBounds(Old))
401 return;
402
403 // LRE may clone a virtual register because dead code elimination causes it to
404 // be split into connected components. The new components are much smaller
405 // than the original, so they should get a new chance at being assigned.
406 // same stage as the parent.
407 Info[Old].Stage = RS_Assign;
408 Info.grow(New.id());
409 Info[New] = Info[Old];
410}
411
413 SpillerInstance.reset();
414 GlobalCand.clear();
415}
416
417void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); }
418
419void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
420 // Prioritize live ranges by size, assigning larger ranges first.
421 // The queue holds (size, reg) pairs.
422 const Register Reg = LI->reg();
423 assert(Reg.isVirtual() && "Can only enqueue virtual registers");
424
425 auto Stage = ExtraInfo->getOrInitStage(Reg);
426 if (Stage == RS_New) {
427 Stage = RS_Assign;
428 ExtraInfo->setStage(Reg, Stage);
429 }
430
431 unsigned Ret = PriorityAdvisor->getPriority(*LI);
432
433 // The virtual register number is a tie breaker for same-sized ranges.
434 // Give lower vreg numbers higher priority to assign them first.
435 CurQueue.push(std::make_pair(Ret, ~Reg.id()));
436}
437
438unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const {
439 const unsigned Size = LI.getSize();
440 const Register Reg = LI.reg();
441 unsigned Prio;
442 LiveRangeStage Stage = RA.getExtraInfo().getStage(LI);
443
444 if (Stage == RS_Split) {
445 // Unsplit ranges that couldn't be allocated immediately are deferred until
446 // everything else has been allocated.
447 Prio = Size;
448 } else {
449 // Giant live ranges fall back to the global assignment heuristic, which
450 // prevents excessive spilling in pathological cases.
451 const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
452 bool ForceGlobal = RC.GlobalPriority ||
453 (!ReverseLocalAssignment &&
455 (2 * RegClassInfo.getNumAllocatableRegs(&RC)));
456 unsigned GlobalBit = 0;
457
458 if (Stage == RS_Assign && !ForceGlobal && !LI.empty() &&
459 LIS->intervalIsInOneMBB(LI)) {
460 // Allocate original local ranges in linear instruction order. Since they
461 // are singly defined, this produces optimal coloring in the absence of
462 // global interference and other constraints.
463 if (!ReverseLocalAssignment)
464 Prio = LI.beginIndex().getApproxInstrDistance(Indexes->getLastIndex());
465 else {
466 // Allocating bottom up may allow many short LRGs to be assigned first
467 // to one of the cheap registers. This could be much faster for very
468 // large blocks on targets with many physical registers.
469 Prio = Indexes->getZeroIndex().getApproxInstrDistance(LI.endIndex());
470 }
471 } else {
472 // Allocate global and split ranges in long->short order. Long ranges that
473 // don't fit should be spilled (or split) ASAP so they don't create
474 // interference. Mark a bit to prioritize global above local ranges.
475 Prio = Size;
476 GlobalBit = 1;
477 }
478
479 // Priority bit layout:
480 // 31 RS_Assign priority
481 // 30 Preference priority
482 // if (RegClassPriorityTrumpsGlobalness)
483 // 29-25 AllocPriority
484 // 24 GlobalBit
485 // else
486 // 29 Global bit
487 // 28-24 AllocPriority
488 // 0-23 Size/Instr distance
489
490 // Clamp the size to fit with the priority masking scheme
491 Prio = std::min(Prio, (unsigned)maxUIntN(24));
492 assert(isUInt<5>(RC.AllocationPriority) && "allocation priority overflow");
493
494 if (RegClassPriorityTrumpsGlobalness)
495 Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
496 else
497 Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
498
499 // Mark a higher bit to prioritize global and local above RS_Split.
500 Prio |= (1u << 31);
501
502 // Boost ranges that have a physical register hint.
503 if (VRM->hasKnownPreference(Reg))
504 Prio |= (1u << 30);
505 }
506
507 return Prio;
508}
509
510unsigned DummyPriorityAdvisor::getPriority(const LiveInterval &LI) const {
511 // Prioritize by virtual register number, lowest first.
512 Register Reg = LI.reg();
513 return ~Reg.virtRegIndex();
514}
515
516const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
517
518const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
519 if (CurQueue.empty())
520 return nullptr;
521 LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
522 CurQueue.pop();
523 return LI;
524}
525
526//===----------------------------------------------------------------------===//
527// Direct Assignment
528//===----------------------------------------------------------------------===//
529
530/// tryAssign - Try to assign VirtReg to an available register.
531MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
532 AllocationOrder &Order,
534 const SmallVirtRegSet &FixedRegisters) {
535 MCRegister PhysReg;
536 for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
537 assert(*I);
538 if (!Matrix->checkInterference(VirtReg, *I)) {
539 if (I.isHint())
540 return *I;
541 else
542 PhysReg = *I;
543 }
544 }
545 if (!PhysReg.isValid())
546 return PhysReg;
547
548 // PhysReg is available, but there may be a better choice.
549
550 // If we missed a simple hint, try to cheaply evict interference from the
551 // preferred register.
552 if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
553 if (Order.isHint(Hint)) {
554 MCRegister PhysHint = Hint.asMCReg();
555 LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
556
557 if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint,
558 FixedRegisters)) {
559 evictInterference(VirtReg, PhysHint, NewVRegs);
560 return PhysHint;
561 }
562
563 // We can also split the virtual register in cold blocks.
564 if (trySplitAroundHintReg(PhysHint, VirtReg, NewVRegs, Order))
565 return MCRegister();
566
567 // Record the missed hint, we may be able to recover
568 // at the end if the surrounding allocation changed.
569 SetOfBrokenHints.insert(&VirtReg);
570 }
571
572 // Try to evict interference from a cheaper alternative.
573 uint8_t Cost = RegCosts[PhysReg.id()];
574
575 // Most registers have 0 additional cost.
576 if (!Cost)
577 return PhysReg;
578
579 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
580 << (unsigned)Cost << '\n');
581 MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
582 return CheapReg ? CheapReg : PhysReg;
583}
584
585//===----------------------------------------------------------------------===//
586// Interference eviction
587//===----------------------------------------------------------------------===//
588
590 MCRegister FromReg) const {
591 auto HasRegUnitInterference = [&](MCRegUnit Unit) {
592 // Instantiate a "subquery", not to be confused with the Queries array.
594 VirtReg, Matrix->getLiveUnions()[static_cast<unsigned>(Unit)]);
595 return SubQ.checkInterference();
596 };
597
598 for (MCRegister Reg :
600 if (Reg == FromReg)
601 continue;
602 // If no units have interference, reassignment is possible.
603 if (none_of(TRI->regunits(Reg), HasRegUnitInterference)) {
604 LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
605 << printReg(FromReg, TRI) << " to "
606 << printReg(Reg, TRI) << '\n');
607 return true;
608 }
609 }
610 return false;
611}
612
613/// evictInterference - Evict any interferring registers that prevent VirtReg
614/// from being assigned to Physreg. This assumes that canEvictInterference
615/// returned true.
616void RAGreedy::evictInterference(const LiveInterval &VirtReg,
617 MCRegister PhysReg,
618 SmallVectorImpl<Register> &NewVRegs) {
619 // Make sure that VirtReg has a cascade number, and assign that cascade
620 // number to every evicted register. These live ranges than then only be
621 // evicted by a newer cascade, preventing infinite loops.
622 unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg());
623
624 LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
625 << " interference: Cascade " << Cascade << '\n');
626
627 // Collect all interfering virtregs first.
629 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
630 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
631 // We usually have the interfering VRegs cached so collectInterferingVRegs()
632 // should be fast, we may need to recalculate if when different physregs
633 // overlap the same register unit so we had different SubRanges queried
634 // against it.
636 Intfs.append(IVR.begin(), IVR.end());
637 }
638
639 // Evict them second. This will invalidate the queries.
640 for (const LiveInterval *Intf : Intfs) {
641 // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
642 if (!VRM->hasPhys(Intf->reg()))
643 continue;
644
645 Matrix->unassign(*Intf);
646 assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
647 VirtReg.isSpillable() < Intf->isSpillable()) &&
648 "Cannot decrease cascade number, illegal eviction");
649 ExtraInfo->setCascade(Intf->reg(), Cascade);
650 ++NumEvicted;
651 NewVRegs.push_back(Intf->reg());
652 }
653}
654
655/// Returns true if the given \p PhysReg is a callee saved register and has not
656/// been used for allocation yet.
658 MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
659 if (!CSR)
660 return false;
661
662 return !Matrix->isPhysRegUsed(PhysReg);
663}
664
665std::optional<unsigned>
667 const AllocationOrder &Order,
668 unsigned CostPerUseLimit) const {
669 unsigned OrderLimit = Order.getOrder().size();
670
671 if (CostPerUseLimit < uint8_t(~0u)) {
672 // Check of any registers in RC are below CostPerUseLimit.
673 const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
674 uint8_t MinCost = RegClassInfo.getMinCost(RC);
675 if (MinCost >= CostPerUseLimit) {
676 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
677 << MinCost << ", no cheaper registers to be found.\n");
678 return std::nullopt;
679 }
680
681 // It is normal for register classes to have a long tail of registers with
682 // the same cost. We don't need to look at them if they're too expensive.
683 if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) {
684 OrderLimit = RegClassInfo.getLastCostChange(RC);
685 LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
686 << " regs.\n");
687 }
688 }
689 return OrderLimit;
690}
691
693 MCRegister PhysReg) const {
694 if (RegCosts[PhysReg.id()] >= CostPerUseLimit)
695 return false;
696 // The first use of a callee-saved register in a function has cost 1.
697 // Don't start using a CSR when the CostPerUseLimit is low.
698 if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
700 dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
701 << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
702 << '\n');
703 return false;
704 }
705 return true;
706}
707
708/// tryEvict - Try to evict all interferences for a physreg.
709/// @param VirtReg Currently unassigned virtual register.
710/// @param Order Physregs to try.
711/// @return Physreg to assign VirtReg, or 0.
712MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg,
713 AllocationOrder &Order,
715 uint8_t CostPerUseLimit,
716 const SmallVirtRegSet &FixedRegisters) {
719
720 MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate(
721 VirtReg, Order, CostPerUseLimit, FixedRegisters);
722 if (BestPhys.isValid())
723 evictInterference(VirtReg, BestPhys, NewVRegs);
724 return BestPhys;
725}
726
727//===----------------------------------------------------------------------===//
728// Region Splitting
729//===----------------------------------------------------------------------===//
730
731/// addSplitConstraints - Fill out the SplitConstraints vector based on the
732/// interference pattern in Physreg and its aliases. Add the constraints to
733/// SpillPlacement and return the static cost of this split in Cost, assuming
734/// that all preferences in SplitConstraints are met.
735/// Return false if there are no bundles with positive bias.
736bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
737 BlockFrequency &Cost) {
738 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
739
740 // Reset interference dependent info.
741 SplitConstraints.resize(UseBlocks.size());
742 BlockFrequency StaticCost = BlockFrequency(0);
743 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
744 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
745 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
746
747 BC.Number = BI.MBB->getNumber();
748 Intf.moveToBlock(BC.Number);
750 BC.Exit = (BI.LiveOut &&
754 BC.ChangesValue = BI.FirstDef.isValid();
755
756 if (!Intf.hasInterference())
757 continue;
758
759 // Number of spill code instructions to insert.
760 unsigned Ins = 0;
761
762 // Interference for the live-in value.
763 if (BI.LiveIn) {
764 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
766 ++Ins;
767 } else if (Intf.first() < BI.FirstInstr) {
769 ++Ins;
770 } else if (Intf.first() < BI.LastInstr) {
771 ++Ins;
772 }
773
774 // Abort if the spill cannot be inserted at the MBB' start
775 if (((BC.Entry == SpillPlacement::MustSpill) ||
778 SA->getFirstSplitPoint(BC.Number)))
779 return false;
780 }
781
782 // Interference for the live-out value.
783 if (BI.LiveOut) {
784 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
786 ++Ins;
787 } else if (Intf.last() > BI.LastInstr) {
789 ++Ins;
790 } else if (Intf.last() > BI.FirstInstr) {
791 ++Ins;
792 }
793 }
794
795 // Accumulate the total frequency of inserted spill code.
796 while (Ins--)
797 StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
798 }
799 Cost = StaticCost;
800
801 // Add constraints for use-blocks. Note that these are the only constraints
802 // that may add a positive bias, it is downhill from here.
803 SpillPlacer->addConstraints(SplitConstraints);
804 return SpillPlacer->scanActiveBundles();
805}
806
807/// addThroughConstraints - Add constraints and links to SpillPlacer from the
808/// live-through blocks in Blocks.
809bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
810 ArrayRef<unsigned> Blocks) {
811 const unsigned GroupSize = 8;
812 SpillPlacement::BlockConstraint BCS[GroupSize];
813 unsigned TBS[GroupSize];
814 unsigned B = 0, T = 0;
815
816 for (unsigned Number : Blocks) {
817 Intf.moveToBlock(Number);
818
819 if (!Intf.hasInterference()) {
820 assert(T < GroupSize && "Array overflow");
821 TBS[T] = Number;
822 if (++T == GroupSize) {
823 SpillPlacer->addLinks(ArrayRef(TBS, T));
824 T = 0;
825 }
826 continue;
827 }
828
829 assert(B < GroupSize && "Array overflow");
830 BCS[B].Number = Number;
831
832 // Abort if the spill cannot be inserted at the MBB' start
833 MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
834 auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
835 if (FirstNonDebugInstr != MBB->end() &&
836 SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*FirstNonDebugInstr),
837 SA->getFirstSplitPoint(Number)))
838 return false;
839 // Interference for the live-in value.
840 if (Intf.first() <= Indexes->getMBBStartIdx(Number))
842 else
844
845 // Interference for the live-out value.
846 if (Intf.last() >= SA->getLastSplitPoint(Number))
848 else
850
851 if (++B == GroupSize) {
852 SpillPlacer->addConstraints(ArrayRef(BCS, B));
853 B = 0;
854 }
855 }
856
857 SpillPlacer->addConstraints(ArrayRef(BCS, B));
858 SpillPlacer->addLinks(ArrayRef(TBS, T));
859 return true;
860}
861
862bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
863 // Keep track of through blocks that have not been added to SpillPlacer.
864 BitVector Todo = SA->getThroughBlocks();
865 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
866 unsigned AddedTo = 0;
867#ifndef NDEBUG
868 unsigned Visited = 0;
869#endif
870
871 unsigned long Budget = GrowRegionComplexityBudget;
872 while (true) {
873 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
874 // Find new through blocks in the periphery of PrefRegBundles.
875 for (unsigned Bundle : NewBundles) {
876 // Look at all blocks connected to Bundle in the full graph.
877 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
878 // Limit compilation time by bailing out after we use all our budget.
879 if (Blocks.size() >= Budget)
880 return false;
881 Budget -= Blocks.size();
882 for (unsigned Block : Blocks) {
883 if (!Todo.test(Block))
884 continue;
885 Todo.reset(Block);
886 // This is a new through block. Add it to SpillPlacer later.
887 ActiveBlocks.push_back(Block);
888#ifndef NDEBUG
889 ++Visited;
890#endif
891 }
892 }
893 // Any new blocks to add?
894 if (ActiveBlocks.size() == AddedTo)
895 break;
896
897 // Compute through constraints from the interference, or assume that all
898 // through blocks prefer spilling when forming compact regions.
899 auto NewBlocks = ArrayRef(ActiveBlocks).slice(AddedTo);
900 if (Cand.PhysReg) {
901 if (!addThroughConstraints(Cand.Intf, NewBlocks))
902 return false;
903 } else {
904 // Providing that the variable being spilled does not look like a loop
905 // induction variable, which is expensive to spill around and better
906 // pushed into a condition inside the loop if possible, provide a strong
907 // negative bias on through blocks to prevent unwanted liveness on loop
908 // backedges.
909 bool PrefSpill = true;
910 if (SA->looksLikeLoopIV() && NewBlocks.size() >= 2) {
911 // Check that the current bundle is adding a Header + start+end of
912 // loop-internal blocks. If the block is indeed a header, don't make
913 // the NewBlocks as PrefSpill to allow the variable to be live in
914 // Header<->Latch.
915 MachineLoop *L = Loops->getLoopFor(MF->getBlockNumbered(NewBlocks[0]));
916 if (L && L->getHeader()->getNumber() == (int)NewBlocks[0] &&
917 all_of(NewBlocks.drop_front(), [&](unsigned Block) {
918 return L == Loops->getLoopFor(MF->getBlockNumbered(Block));
919 }))
920 PrefSpill = false;
921 }
922 if (PrefSpill)
923 SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
924 }
925 AddedTo = ActiveBlocks.size();
926
927 // Perhaps iterating can enable more bundles?
928 SpillPlacer->iterate();
929 }
930 LLVM_DEBUG(dbgs() << ", v=" << Visited);
931 return true;
932}
933
934/// calcCompactRegion - Compute the set of edge bundles that should be live
935/// when splitting the current live range into compact regions. Compact
936/// regions can be computed without looking at interference. They are the
937/// regions formed by removing all the live-through blocks from the live range.
938///
939/// Returns false if the current live range is already compact, or if the
940/// compact regions would form single block regions anyway.
941bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
942 // Without any through blocks, the live range is already compact.
943 if (!SA->getNumThroughBlocks())
944 return false;
945
946 // Compact regions don't correspond to any physreg.
947 Cand.reset(IntfCache, MCRegister::NoRegister);
948
949 LLVM_DEBUG(dbgs() << "Compact region bundles");
950
951 // Use the spill placer to determine the live bundles. GrowRegion pretends
952 // that all the through blocks have interference when PhysReg is unset.
953 SpillPlacer->prepare(Cand.LiveBundles);
954
955 // The static split cost will be zero since Cand.Intf reports no interference.
956 BlockFrequency Cost;
957 if (!addSplitConstraints(Cand.Intf, Cost)) {
958 LLVM_DEBUG(dbgs() << ", none.\n");
959 return false;
960 }
961
962 if (!growRegion(Cand)) {
963 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
964 return false;
965 }
966
967 SpillPlacer->finish();
968
969 if (!Cand.LiveBundles.any()) {
970 LLVM_DEBUG(dbgs() << ", none.\n");
971 return false;
972 }
973
974 LLVM_DEBUG({
975 for (int I : Cand.LiveBundles.set_bits())
976 dbgs() << " EB#" << I;
977 dbgs() << ".\n";
978 });
979 return true;
980}
981
982/// calcSpillCost - Compute how expensive it would be to split the live range in
983/// SA around all use blocks instead of forming bundle regions.
984BlockFrequency RAGreedy::calcSpillCost() {
985 BlockFrequency Cost = BlockFrequency(0);
986 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
987 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
988 unsigned Number = BI.MBB->getNumber();
989 // We normally only need one spill instruction - a load or a store.
990 Cost += SpillPlacer->getBlockFrequency(Number);
991
992 // Unless the value is redefined in the block.
993 if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
994 Cost += SpillPlacer->getBlockFrequency(Number);
995 }
996 return Cost;
997}
998
999/// calcGlobalSplitCost - Return the global split cost of following the split
1000/// pattern in LiveBundles. This cost should be added to the local cost of the
1001/// interference pattern in SplitConstraints.
1002///
1003BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
1004 const AllocationOrder &Order) {
1005 BlockFrequency GlobalCost = BlockFrequency(0);
1006 const BitVector &LiveBundles = Cand.LiveBundles;
1007 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1008 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
1009 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
1010 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
1011 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)];
1012 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
1013 unsigned Ins = 0;
1014
1015 Cand.Intf.moveToBlock(BC.Number);
1016
1017 if (BI.LiveIn)
1018 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
1019 if (BI.LiveOut)
1020 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
1021 while (Ins--)
1022 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
1023 }
1024
1025 for (unsigned Number : Cand.ActiveBlocks) {
1026 bool RegIn = LiveBundles[Bundles->getBundle(Number, false)];
1027 bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
1028 if (!RegIn && !RegOut)
1029 continue;
1030 if (RegIn && RegOut) {
1031 // We need double spill code if this block has interference.
1032 Cand.Intf.moveToBlock(Number);
1033 if (Cand.Intf.hasInterference()) {
1034 GlobalCost += SpillPlacer->getBlockFrequency(Number);
1035 GlobalCost += SpillPlacer->getBlockFrequency(Number);
1036 }
1037 continue;
1038 }
1039 // live-in / stack-out or stack-in live-out.
1040 GlobalCost += SpillPlacer->getBlockFrequency(Number);
1041 }
1042 return GlobalCost;
1043}
1044
1045/// splitAroundRegion - Split the current live range around the regions
1046/// determined by BundleCand and GlobalCand.
1047///
1048/// Before calling this function, GlobalCand and BundleCand must be initialized
1049/// so each bundle is assigned to a valid candidate, or NoCand for the
1050/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor
1051/// objects must be initialized for the current live range, and intervals
1052/// created for the used candidates.
1053///
1054/// @param LREdit The LiveRangeEdit object handling the current split.
1055/// @param UsedCands List of used GlobalCand entries. Every BundleCand value
1056/// must appear in this list.
1057void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
1058 ArrayRef<unsigned> UsedCands) {
1059 // These are the intervals created for new global ranges. We may create more
1060 // intervals for local ranges.
1061 const unsigned NumGlobalIntvs = LREdit.size();
1062 LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs
1063 << " globals.\n");
1064 assert(NumGlobalIntvs && "No global intervals configured");
1065
1066 // Isolate even single instructions when dealing with a proper sub-class.
1067 // That guarantees register class inflation for the stack interval because it
1068 // is all copies.
1069 Register Reg = SA->getParent().reg();
1070 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1071
1072 // First handle all the blocks with uses.
1073 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1074 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
1075 unsigned Number = BI.MBB->getNumber();
1076 unsigned IntvIn = 0, IntvOut = 0;
1077 SlotIndex IntfIn, IntfOut;
1078 if (BI.LiveIn) {
1079 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
1080 if (CandIn != NoCand) {
1081 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
1082 IntvIn = Cand.IntvIdx;
1083 Cand.Intf.moveToBlock(Number);
1084 IntfIn = Cand.Intf.first();
1085 }
1086 }
1087 if (BI.LiveOut) {
1088 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
1089 if (CandOut != NoCand) {
1090 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
1091 IntvOut = Cand.IntvIdx;
1092 Cand.Intf.moveToBlock(Number);
1093 IntfOut = Cand.Intf.last();
1094 }
1095 }
1096
1097 // Create separate intervals for isolated blocks with multiple uses.
1098 if (!IntvIn && !IntvOut) {
1099 LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
1100 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1101 SE->splitSingleBlock(BI);
1102 continue;
1103 }
1104
1105 if (IntvIn && IntvOut)
1106 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
1107 else if (IntvIn)
1108 SE->splitRegInBlock(BI, IntvIn, IntfIn);
1109 else
1110 SE->splitRegOutBlock(BI, IntvOut, IntfOut);
1111 }
1112
1113 // Handle live-through blocks. The relevant live-through blocks are stored in
1114 // the ActiveBlocks list with each candidate. We need to filter out
1115 // duplicates.
1116 BitVector Todo = SA->getThroughBlocks();
1117 for (unsigned UsedCand : UsedCands) {
1118 ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks;
1119 for (unsigned Number : Blocks) {
1120 if (!Todo.test(Number))
1121 continue;
1122 Todo.reset(Number);
1123
1124 unsigned IntvIn = 0, IntvOut = 0;
1125 SlotIndex IntfIn, IntfOut;
1126
1127 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
1128 if (CandIn != NoCand) {
1129 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
1130 IntvIn = Cand.IntvIdx;
1131 Cand.Intf.moveToBlock(Number);
1132 IntfIn = Cand.Intf.first();
1133 }
1134
1135 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
1136 if (CandOut != NoCand) {
1137 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
1138 IntvOut = Cand.IntvIdx;
1139 Cand.Intf.moveToBlock(Number);
1140 IntfOut = Cand.Intf.last();
1141 }
1142 if (!IntvIn && !IntvOut)
1143 continue;
1144 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
1145 }
1146 }
1147
1148 ++NumGlobalSplits;
1149
1150 SmallVector<unsigned, 8> IntvMap;
1151 SE->finish(&IntvMap);
1152 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1153
1154 unsigned OrigBlocks = SA->getNumLiveBlocks();
1155
1156 // Sort out the new intervals created by splitting. We get four kinds:
1157 // - Remainder intervals should not be split again.
1158 // - Candidate intervals can be assigned to Cand.PhysReg.
1159 // - Block-local splits are candidates for local splitting.
1160 // - DCE leftovers should go back on the queue.
1161 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1162 const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
1163
1164 // Ignore old intervals from DCE.
1165 if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New)
1166 continue;
1167
1168 // Remainder interval. Don't try splitting again, spill if it doesn't
1169 // allocate.
1170 if (IntvMap[I] == 0) {
1171 ExtraInfo->setStage(Reg, RS_Spill);
1172 continue;
1173 }
1174
1175 // Global intervals. Allow repeated splitting as long as the number of live
1176 // blocks is strictly decreasing.
1177 if (IntvMap[I] < NumGlobalIntvs) {
1178 if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
1179 LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
1180 << " blocks as original.\n");
1181 // Don't allow repeated splitting as a safe guard against looping.
1182 ExtraInfo->setStage(Reg, RS_Split2);
1183 }
1184 continue;
1185 }
1186
1187 // Other intervals are treated as new. This includes local intervals created
1188 // for blocks with multiple uses, and anything created by DCE.
1189 }
1190
1191 if (VerifyEnabled)
1192 MF->verify(LIS, Indexes, "After splitting live range around region",
1193 &errs());
1194}
1195
1196MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
1197 AllocationOrder &Order,
1198 SmallVectorImpl<Register> &NewVRegs) {
1199 if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
1201 unsigned NumCands = 0;
1202 BlockFrequency SpillCost = calcSpillCost();
1203 BlockFrequency BestCost;
1204
1205 // Check if we can split this live range around a compact region.
1206 bool HasCompact = calcCompactRegion(GlobalCand.front());
1207 if (HasCompact) {
1208 // Yes, keep GlobalCand[0] as the compact region candidate.
1209 NumCands = 1;
1210 BestCost = BlockFrequency::max();
1211 } else {
1212 // No benefit from the compact region, our fallback will be per-block
1213 // splitting. Make sure we find a solution that is cheaper than spilling.
1214 BestCost = SpillCost;
1215 LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = "
1216 << printBlockFreq(*MBFI, BestCost) << '\n');
1217 }
1218
1219 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
1220 NumCands, false /*IgnoreCSR*/);
1221
1222 // No solutions found, fall back to single block splitting.
1223 if (!HasCompact && BestCand == NoCand)
1225
1226 return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
1227}
1228
1229unsigned RAGreedy::calculateRegionSplitCostAroundReg(MCRegister PhysReg,
1230 AllocationOrder &Order,
1231 BlockFrequency &BestCost,
1232 unsigned &NumCands,
1233 unsigned &BestCand) {
1234 // Discard bad candidates before we run out of interference cache cursors.
1235 // This will only affect register classes with a lot of registers (>32).
1236 if (NumCands == IntfCache.getMaxCursors()) {
1237 unsigned WorstCount = ~0u;
1238 unsigned Worst = 0;
1239 for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
1240 if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
1241 continue;
1242 unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
1243 if (Count < WorstCount) {
1244 Worst = CandIndex;
1245 WorstCount = Count;
1246 }
1247 }
1248 --NumCands;
1249 GlobalCand[Worst] = GlobalCand[NumCands];
1250 if (BestCand == NumCands)
1251 BestCand = Worst;
1252 }
1253
1254 if (GlobalCand.size() <= NumCands)
1255 GlobalCand.resize(NumCands+1);
1256 GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1257 Cand.reset(IntfCache, PhysReg);
1258
1259 SpillPlacer->prepare(Cand.LiveBundles);
1260 BlockFrequency Cost;
1261 if (!addSplitConstraints(Cand.Intf, Cost)) {
1262 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
1263 return BestCand;
1264 }
1265 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI)
1266 << "\tstatic = " << printBlockFreq(*MBFI, Cost));
1267 if (Cost >= BestCost) {
1268 LLVM_DEBUG({
1269 if (BestCand == NoCand)
1270 dbgs() << " worse than no bundles\n";
1271 else
1272 dbgs() << " worse than "
1273 << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
1274 });
1275 return BestCand;
1276 }
1277 if (!growRegion(Cand)) {
1278 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
1279 return BestCand;
1280 }
1281
1282 SpillPlacer->finish();
1283
1284 // No live bundles, defer to splitSingleBlocks().
1285 if (!Cand.LiveBundles.any()) {
1286 LLVM_DEBUG(dbgs() << " no bundles.\n");
1287 return BestCand;
1288 }
1289
1290 Cost += calcGlobalSplitCost(Cand, Order);
1291 LLVM_DEBUG({
1292 dbgs() << ", total = " << printBlockFreq(*MBFI, Cost) << " with bundles";
1293 for (int I : Cand.LiveBundles.set_bits())
1294 dbgs() << " EB#" << I;
1295 dbgs() << ".\n";
1296 });
1297 if (Cost < BestCost) {
1298 BestCand = NumCands;
1299 BestCost = Cost;
1300 }
1301 ++NumCands;
1302
1303 return BestCand;
1304}
1305
1306unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
1307 AllocationOrder &Order,
1308 BlockFrequency &BestCost,
1309 unsigned &NumCands,
1310 bool IgnoreCSR) {
1311 unsigned BestCand = NoCand;
1312 for (MCRegister PhysReg : Order) {
1313 assert(PhysReg);
1314 if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
1315 continue;
1316
1317 calculateRegionSplitCostAroundReg(PhysReg, Order, BestCost, NumCands,
1318 BestCand);
1319 }
1320
1321 return BestCand;
1322}
1323
1324MCRegister RAGreedy::doRegionSplit(const LiveInterval &VirtReg,
1325 unsigned BestCand, bool HasCompact,
1326 SmallVectorImpl<Register> &NewVRegs) {
1327 SmallVector<unsigned, 8> UsedCands;
1328 // Prepare split editor.
1329 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1330 SE->reset(LREdit, SplitSpillMode);
1331
1332 // Assign all edge bundles to the preferred candidate, or NoCand.
1333 BundleCand.assign(Bundles->getNumBundles(), NoCand);
1334
1335 // Assign bundles for the best candidate region.
1336 if (BestCand != NoCand) {
1337 GlobalSplitCandidate &Cand = GlobalCand[BestCand];
1338 if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
1339 UsedCands.push_back(BestCand);
1340 Cand.IntvIdx = SE->openIntv();
1341 LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
1342 << B << " bundles, intv " << Cand.IntvIdx << ".\n");
1343 (void)B;
1344 }
1345 }
1346
1347 // Assign bundles for the compact region.
1348 if (HasCompact) {
1349 GlobalSplitCandidate &Cand = GlobalCand.front();
1350 assert(!Cand.PhysReg && "Compact region has no physreg");
1351 if (unsigned B = Cand.getBundles(BundleCand, 0)) {
1352 UsedCands.push_back(0);
1353 Cand.IntvIdx = SE->openIntv();
1354 LLVM_DEBUG(dbgs() << "Split for compact region in " << B
1355 << " bundles, intv " << Cand.IntvIdx << ".\n");
1356 (void)B;
1357 }
1358 }
1359
1360 splitAroundRegion(LREdit, UsedCands);
1361 return MCRegister();
1362}
1363
1364// VirtReg has a physical Hint, this function tries to split VirtReg around
1365// Hint if we can place new COPY instructions in cold blocks.
1366bool RAGreedy::trySplitAroundHintReg(MCRegister Hint,
1367 const LiveInterval &VirtReg,
1368 SmallVectorImpl<Register> &NewVRegs,
1369 AllocationOrder &Order) {
1370 // Split the VirtReg may generate COPY instructions in multiple cold basic
1371 // blocks, and increase code size. So we avoid it when the function is
1372 // optimized for size.
1373 if (MF->getFunction().hasOptSize())
1374 return false;
1375
1376 // Don't allow repeated splitting as a safe guard against looping.
1377 if (ExtraInfo->getStage(VirtReg) >= RS_Split2)
1378 return false;
1379
1380 BlockFrequency Cost = BlockFrequency(0);
1381 Register Reg = VirtReg.reg();
1382
1383 // Compute the cost of assigning a non Hint physical register to VirtReg.
1384 // We define it as the total frequency of broken COPY instructions to/from
1385 // Hint register, and after split, they can be deleted.
1386
1387 // FIXME: This is miscounting the costs with subregisters. In particular, this
1388 // should support recognizing SplitKit formed copy bundles instead of direct
1389 // copy instructions, which will appear in the same block.
1390 for (const MachineOperand &Opnd : MRI->reg_nodbg_operands(Reg)) {
1391 const MachineInstr &Instr = *Opnd.getParent();
1392 if (!Instr.isCopy() || Opnd.isImplicit())
1393 continue;
1394
1395 // Look for the other end of the copy.
1396 const bool IsDef = Opnd.isDef();
1397 const MachineOperand &OtherOpnd = Instr.getOperand(IsDef);
1398 Register OtherReg = OtherOpnd.getReg();
1399 assert(Reg == Opnd.getReg());
1400 if (OtherReg == Reg)
1401 continue;
1402
1403 unsigned SubReg = Opnd.getSubReg();
1404 unsigned OtherSubReg = OtherOpnd.getSubReg();
1405 if (SubReg && OtherSubReg && SubReg != OtherSubReg)
1406 continue;
1407
1408 // Check if VirtReg interferes with OtherReg after this COPY instruction.
1409 if (Opnd.readsReg()) {
1410 SlotIndex Index = LIS->getInstructionIndex(Instr).getRegSlot();
1411
1412 if (SubReg) {
1413 LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
1414 if (IsDef)
1415 Mask = ~Mask;
1416
1417 if (any_of(VirtReg.subranges(), [=](const LiveInterval::SubRange &S) {
1418 return (S.LaneMask & Mask).any() && S.liveAt(Index);
1419 })) {
1420 continue;
1421 }
1422 } else {
1423 if (VirtReg.liveAt(Index))
1424 continue;
1425 }
1426 }
1427
1428 MCRegister OtherPhysReg =
1429 OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
1430 MCRegister ThisHint = SubReg ? TRI->getSubReg(Hint, SubReg) : Hint;
1431 if (OtherPhysReg == ThisHint)
1432 Cost += MBFI->getBlockFreq(Instr.getParent());
1433 }
1434
1435 // Decrease the cost so it will be split in colder blocks.
1436 BranchProbability Threshold(SplitThresholdForRegWithHint, 100);
1437 Cost *= Threshold;
1438 if (Cost == BlockFrequency(0))
1439 return false;
1440
1441 unsigned NumCands = 0;
1442 unsigned BestCand = NoCand;
1443 SA->analyze(&VirtReg);
1444 calculateRegionSplitCostAroundReg(Hint, Order, Cost, NumCands, BestCand);
1445 if (BestCand == NoCand)
1446 return false;
1447
1448 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
1449 return true;
1450}
1451
1452//===----------------------------------------------------------------------===//
1453// Per-Block Splitting
1454//===----------------------------------------------------------------------===//
1455
1456/// tryBlockSplit - Split a global live range around every block with uses. This
1457/// creates a lot of local live ranges, that will be split by tryLocalSplit if
1458/// they don't allocate.
1459MCRegister RAGreedy::tryBlockSplit(const LiveInterval &VirtReg,
1460 AllocationOrder &Order,
1461 SmallVectorImpl<Register> &NewVRegs) {
1462 assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
1463 Register Reg = VirtReg.reg();
1464 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1465 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1466 SE->reset(LREdit, SplitSpillMode);
1467 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1468 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
1469 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1470 SE->splitSingleBlock(BI);
1471 }
1472 // No blocks were split.
1473 if (LREdit.empty())
1474 return MCRegister();
1475
1476 // We did split for some blocks.
1477 SmallVector<unsigned, 8> IntvMap;
1478 SE->finish(&IntvMap);
1479
1480 // Tell LiveDebugVariables about the new ranges.
1481 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1482
1483 // Sort out the new intervals created by splitting. The remainder interval
1484 // goes straight to spilling, the new local ranges get to stay RS_New.
1485 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1486 const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
1487 if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
1488 ExtraInfo->setStage(LI, RS_Spill);
1489 }
1490
1491 if (VerifyEnabled)
1492 MF->verify(LIS, Indexes, "After splitting live range around basic blocks",
1493 &errs());
1494 return MCRegister();
1495}
1496
1497//===----------------------------------------------------------------------===//
1498// Per-Instruction Splitting
1499//===----------------------------------------------------------------------===//
1500
1501/// Get the number of allocatable registers that match the constraints of \p Reg
1502/// on \p MI and that are also in \p SuperRC.
1504 const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
1506 const RegisterClassInfo &RCI) {
1507 assert(SuperRC && "Invalid register class");
1508
1509 const TargetRegisterClass *ConstrainedRC =
1510 MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
1511 /* ExploreBundle */ true);
1512 if (!ConstrainedRC)
1513 return 0;
1514 return RCI.getNumAllocatableRegs(ConstrainedRC);
1515}
1516
1518 const TargetRegisterInfo &TRI,
1519 const MachineInstr &FirstMI,
1520 Register Reg) {
1521 LaneBitmask Mask;
1523 (void)AnalyzeVirtRegInBundle(const_cast<MachineInstr &>(FirstMI), Reg, &Ops);
1524
1525 for (auto [MI, OpIdx] : Ops) {
1526 const MachineOperand &MO = MI->getOperand(OpIdx);
1527 assert(MO.isReg() && MO.getReg() == Reg);
1528 unsigned SubReg = MO.getSubReg();
1529 if (SubReg == 0 && MO.isUse()) {
1530 if (MO.isUndef())
1531 continue;
1532 return MRI.getMaxLaneMaskForVReg(Reg);
1533 }
1534
1535 LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
1536 if (MO.isDef()) {
1537 if (!MO.isUndef())
1538 Mask |= ~SubRegMask;
1539 } else
1540 Mask |= SubRegMask;
1541 }
1542
1543 return Mask;
1544}
1545
1546/// Return true if \p MI at \P Use reads a subset of the lanes live in \p
1547/// VirtReg.
1549 const MachineInstr *MI, const LiveInterval &VirtReg,
1551 const TargetInstrInfo *TII) {
1552 // Early check the common case. Beware of the semi-formed bundles SplitKit
1553 // creates by setting the bundle flag on copies without a matching BUNDLE.
1554
1555 auto DestSrc = TII->isCopyInstr(*MI);
1556 if (DestSrc && !MI->isBundled() &&
1557 DestSrc->Destination->getSubReg() == DestSrc->Source->getSubReg())
1558 return false;
1559
1560 // FIXME: We're only considering uses, but should be consider defs too?
1561 LaneBitmask ReadMask = getInstReadLaneMask(MRI, *TRI, *MI, VirtReg.reg());
1562
1563 LaneBitmask LiveAtMask;
1564 for (const LiveInterval::SubRange &S : VirtReg.subranges()) {
1565 if (S.liveAt(Use))
1566 LiveAtMask |= S.LaneMask;
1567 }
1568
1569 // If the live lanes aren't different from the lanes used by the instruction,
1570 // this doesn't help.
1571 return (ReadMask & ~(LiveAtMask & TRI->getCoveringLanes())).any();
1572}
1573
1574/// tryInstructionSplit - Split a live range around individual instructions.
1575/// This is normally not worthwhile since the spiller is doing essentially the
1576/// same thing. However, when the live range is in a constrained register
1577/// class, it may help to insert copies such that parts of the live range can
1578/// be moved to a larger register class.
1579///
1580/// This is similar to spilling to a larger register class.
1581MCRegister RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
1582 AllocationOrder &Order,
1583 SmallVectorImpl<Register> &NewVRegs) {
1584 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1585 // There is no point to this if there are no larger sub-classes.
1586
1587 bool SplitSubClass = true;
1588 if (!RegClassInfo.isProperSubClass(CurRC)) {
1589 if (!VirtReg.hasSubRanges())
1590 return MCRegister();
1591 SplitSubClass = false;
1592 }
1593
1594 // Always enable split spill mode, since we're effectively spilling to a
1595 // register.
1596 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1597 SE->reset(LREdit, SplitEditor::SM_Size);
1598
1599 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1600 if (Uses.size() <= 1)
1601 return MCRegister();
1602
1603 LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
1604 << " individual instrs.\n");
1605
1606 const TargetRegisterClass *SuperRC =
1607 TRI->getLargestLegalSuperClass(CurRC, *MF);
1608 unsigned SuperRCNumAllocatableRegs =
1609 RegClassInfo.getNumAllocatableRegs(SuperRC);
1610 // Split around every non-copy instruction if this split will relax
1611 // the constraints on the virtual register.
1612 // Otherwise, splitting just inserts uncoalescable copies that do not help
1613 // the allocation.
1614 for (const SlotIndex Use : Uses) {
1615 if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) {
1616 if (TII->isFullCopyInstr(*MI) ||
1617 (SplitSubClass &&
1618 SuperRCNumAllocatableRegs ==
1619 getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
1620 TII, TRI, RegClassInfo)) ||
1621 // TODO: Handle split for subranges with subclass constraints?
1622 (!SplitSubClass && VirtReg.hasSubRanges() &&
1623 !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use, TII))) {
1624 LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
1625 continue;
1626 }
1627 }
1628 SE->openIntv();
1629 SlotIndex SegStart = SE->enterIntvBefore(Use);
1630 SlotIndex SegStop = SE->leaveIntvAfter(Use);
1631 SE->useIntv(SegStart, SegStop);
1632 }
1633
1634 if (LREdit.empty()) {
1635 LLVM_DEBUG(dbgs() << "All uses were copies.\n");
1636 return MCRegister();
1637 }
1638
1639 SmallVector<unsigned, 8> IntvMap;
1640 SE->finish(&IntvMap);
1641 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1642 // Assign all new registers to RS_Spill. This was the last chance.
1643 ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill);
1644 return MCRegister();
1645}
1646
1647//===----------------------------------------------------------------------===//
1648// Local Splitting
1649//===----------------------------------------------------------------------===//
1650
1651/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
1652/// in order to use PhysReg between two entries in SA->UseSlots.
1653///
1654/// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
1655///
1656void RAGreedy::calcGapWeights(MCRegister PhysReg,
1657 SmallVectorImpl<float> &GapWeight) {
1658 assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1659 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1660 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1661 const unsigned NumGaps = Uses.size()-1;
1662
1663 // Start and end points for the interference check.
1664 SlotIndex StartIdx =
1666 SlotIndex StopIdx =
1668
1669 GapWeight.assign(NumGaps, 0.0f);
1670
1671 // Add interference from each overlapping register.
1672 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1673 if (!Matrix->query(const_cast<LiveInterval &>(SA->getParent()), Unit)
1674 .checkInterference())
1675 continue;
1676
1677 // We know that VirtReg is a continuous interval from FirstInstr to
1678 // LastInstr, so we don't need InterferenceQuery.
1679 //
1680 // Interference that overlaps an instruction is counted in both gaps
1681 // surrounding the instruction. The exception is interference before
1682 // StartIdx and after StopIdx.
1683 //
1685 Matrix->getLiveUnions()[static_cast<unsigned>(Unit)].find(StartIdx);
1686 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
1687 // Skip the gaps before IntI.
1688 while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
1689 if (++Gap == NumGaps)
1690 break;
1691 if (Gap == NumGaps)
1692 break;
1693
1694 // Update the gaps covered by IntI.
1695 const float weight = IntI.value()->weight();
1696 for (; Gap != NumGaps; ++Gap) {
1697 GapWeight[Gap] = std::max(GapWeight[Gap], weight);
1698 if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
1699 break;
1700 }
1701 if (Gap == NumGaps)
1702 break;
1703 }
1704 }
1705
1706 // Add fixed interference.
1707 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1708 const LiveRange &LR = LIS->getRegUnit(Unit);
1709 LiveRange::const_iterator I = LR.find(StartIdx);
1711
1712 // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
1713 for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
1714 while (Uses[Gap+1].getBoundaryIndex() < I->start)
1715 if (++Gap == NumGaps)
1716 break;
1717 if (Gap == NumGaps)
1718 break;
1719
1720 for (; Gap != NumGaps; ++Gap) {
1721 GapWeight[Gap] = huge_valf;
1722 if (Uses[Gap+1].getBaseIndex() >= I->end)
1723 break;
1724 }
1725 if (Gap == NumGaps)
1726 break;
1727 }
1728 }
1729}
1730
1731/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
1732/// basic block.
1733///
1734MCRegister RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
1735 AllocationOrder &Order,
1736 SmallVectorImpl<Register> &NewVRegs) {
1737 // TODO: the function currently only handles a single UseBlock; it should be
1738 // possible to generalize.
1739 if (SA->getUseBlocks().size() != 1)
1740 return MCRegister();
1741
1742 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1743
1744 // Note that it is possible to have an interval that is live-in or live-out
1745 // while only covering a single block - A phi-def can use undef values from
1746 // predecessors, and the block could be a single-block loop.
1747 // We don't bother doing anything clever about such a case, we simply assume
1748 // that the interval is continuous from FirstInstr to LastInstr. We should
1749 // make sure that we don't do anything illegal to such an interval, though.
1750
1751 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1752 if (Uses.size() <= 2)
1753 return MCRegister();
1754 const unsigned NumGaps = Uses.size()-1;
1755
1756 LLVM_DEBUG({
1757 dbgs() << "tryLocalSplit: ";
1758 for (const auto &Use : Uses)
1759 dbgs() << ' ' << Use;
1760 dbgs() << '\n';
1761 });
1762
1763 // If VirtReg is live across any register mask operands, compute a list of
1764 // gaps with register masks.
1765 SmallVector<unsigned, 8> RegMaskGaps;
1766 if (Matrix->checkRegMaskInterference(VirtReg)) {
1767 // Get regmask slots for the whole block.
1768 ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
1769 LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
1770 // Constrain to VirtReg's live range.
1771 unsigned RI =
1772 llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
1773 unsigned RE = RMS.size();
1774 for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
1775 // Look for Uses[I] <= RMS <= Uses[I + 1].
1777 if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
1778 continue;
1779 // Skip a regmask on the same instruction as the last use. It doesn't
1780 // overlap the live range.
1781 if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
1782 break;
1783 LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
1784 << Uses[I + 1]);
1785 RegMaskGaps.push_back(I);
1786 // Advance ri to the next gap. A regmask on one of the uses counts in
1787 // both gaps.
1788 while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
1789 ++RI;
1790 }
1791 LLVM_DEBUG(dbgs() << '\n');
1792 }
1793
1794 // Since we allow local split results to be split again, there is a risk of
1795 // creating infinite loops. It is tempting to require that the new live
1796 // ranges have less instructions than the original. That would guarantee
1797 // convergence, but it is too strict. A live range with 3 instructions can be
1798 // split 2+3 (including the COPY), and we want to allow that.
1799 //
1800 // Instead we use these rules:
1801 //
1802 // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
1803 // noop split, of course).
1804 // 2. Require progress be made for ranges with getStage() == RS_Split2. All
1805 // the new ranges must have fewer instructions than before the split.
1806 // 3. New ranges with the same number of instructions are marked RS_Split2,
1807 // smaller ranges are marked RS_New.
1808 //
1809 // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
1810 // excessive splitting and infinite loops.
1811 //
1812 bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2;
1813
1814 // Best split candidate.
1815 unsigned BestBefore = NumGaps;
1816 unsigned BestAfter = 0;
1817 float BestDiff = 0;
1818
1819 const float blockFreq =
1820 SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
1821 (1.0f / MBFI->getEntryFreq().getFrequency());
1822 SmallVector<float, 8> GapWeight;
1823
1824 for (MCRegister PhysReg : Order) {
1825 assert(PhysReg);
1826 // Keep track of the largest spill weight that would need to be evicted in
1827 // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
1828 calcGapWeights(PhysReg, GapWeight);
1829
1830 // Remove any gaps with regmask clobbers.
1831 if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
1832 for (unsigned Gap : RegMaskGaps)
1833 GapWeight[Gap] = huge_valf;
1834
1835 // Try to find the best sequence of gaps to close.
1836 // The new spill weight must be larger than any gap interference.
1837
1838 // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
1839 unsigned SplitBefore = 0, SplitAfter = 1;
1840
1841 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
1842 // It is the spill weight that needs to be evicted.
1843 float MaxGap = GapWeight[0];
1844
1845 while (true) {
1846 // Live before/after split?
1847 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
1848 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
1849
1850 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
1851 << '-' << Uses[SplitAfter] << " I=" << MaxGap);
1852
1853 // Stop before the interval gets so big we wouldn't be making progress.
1854 if (!LiveBefore && !LiveAfter) {
1855 LLVM_DEBUG(dbgs() << " all\n");
1856 break;
1857 }
1858 // Should the interval be extended or shrunk?
1859 bool Shrink = true;
1860
1861 // How many gaps would the new range have?
1862 unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
1863
1864 // Legally, without causing looping?
1865 bool Legal = !ProgressRequired || NewGaps < NumGaps;
1866
1867 if (Legal && MaxGap < huge_valf) {
1868 // Estimate the new spill weight. Each instruction reads or writes the
1869 // register. Conservatively assume there are no read-modify-write
1870 // instructions.
1871 //
1872 // Try to guess the size of the new interval.
1873 const float EstWeight = normalizeSpillWeight(
1874 blockFreq * (NewGaps + 1),
1875 Uses[SplitBefore].distance(Uses[SplitAfter]) +
1876 (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
1877 1);
1878 // Would this split be possible to allocate?
1879 // Never allocate all gaps, we wouldn't be making progress.
1880 LLVM_DEBUG(dbgs() << " w=" << EstWeight);
1881 if (EstWeight * Hysteresis >= MaxGap) {
1882 Shrink = false;
1883 float Diff = EstWeight - MaxGap;
1884 if (Diff > BestDiff) {
1885 LLVM_DEBUG(dbgs() << " (best)");
1886 BestDiff = Hysteresis * Diff;
1887 BestBefore = SplitBefore;
1888 BestAfter = SplitAfter;
1889 }
1890 }
1891 }
1892
1893 // Try to shrink.
1894 if (Shrink) {
1895 if (++SplitBefore < SplitAfter) {
1896 LLVM_DEBUG(dbgs() << " shrink\n");
1897 // Recompute the max when necessary.
1898 if (GapWeight[SplitBefore - 1] >= MaxGap) {
1899 MaxGap = GapWeight[SplitBefore];
1900 for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
1901 MaxGap = std::max(MaxGap, GapWeight[I]);
1902 }
1903 continue;
1904 }
1905 MaxGap = 0;
1906 }
1907
1908 // Try to extend the interval.
1909 if (SplitAfter >= NumGaps) {
1910 LLVM_DEBUG(dbgs() << " end\n");
1911 break;
1912 }
1913
1914 LLVM_DEBUG(dbgs() << " extend\n");
1915 MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
1916 }
1917 }
1918
1919 // Didn't find any candidates?
1920 if (BestBefore == NumGaps)
1921 return MCRegister();
1922
1923 LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-'
1924 << Uses[BestAfter] << ", " << BestDiff << ", "
1925 << (BestAfter - BestBefore + 1) << " instrs\n");
1926
1927 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1928 SE->reset(LREdit);
1929
1930 SE->openIntv();
1931 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
1932 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
1933 SE->useIntv(SegStart, SegStop);
1934 SmallVector<unsigned, 8> IntvMap;
1935 SE->finish(&IntvMap);
1936 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1937 // If the new range has the same number of instructions as before, mark it as
1938 // RS_Split2 so the next split will be forced to make progress. Otherwise,
1939 // leave the new intervals as RS_New so they can compete.
1940 bool LiveBefore = BestBefore != 0 || BI.LiveIn;
1941 bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
1942 unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
1943 if (NewGaps >= NumGaps) {
1944 LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
1945 assert(!ProgressRequired && "Didn't make progress when it was required.");
1946 for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
1947 if (IntvMap[I] == 1) {
1948 ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
1949 LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
1950 }
1951 LLVM_DEBUG(dbgs() << '\n');
1952 }
1953 ++NumLocalSplits;
1954
1955 return MCRegister();
1956}
1957
1958//===----------------------------------------------------------------------===//
1959// Live Range Splitting
1960//===----------------------------------------------------------------------===//
1961
1962/// trySplit - Try to split VirtReg or one of its interferences, making it
1963/// assignable.
1964/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
1965MCRegister RAGreedy::trySplit(const LiveInterval &VirtReg,
1966 AllocationOrder &Order,
1967 SmallVectorImpl<Register> &NewVRegs,
1968 const SmallVirtRegSet &FixedRegisters) {
1969 // Ranges must be Split2 or less.
1970 if (ExtraInfo->getStage(VirtReg) >= RS_Spill)
1971 return MCRegister();
1972
1973 // Local intervals are handled separately.
1974 if (LIS->intervalIsInOneMBB(VirtReg)) {
1975 NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
1977 SA->analyze(&VirtReg);
1978 MCRegister PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
1979 if (PhysReg || !NewVRegs.empty())
1980 return PhysReg;
1981 return tryInstructionSplit(VirtReg, Order, NewVRegs);
1982 }
1983
1984 NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
1986
1987 SA->analyze(&VirtReg);
1988
1989 // First try to split around a region spanning multiple blocks. RS_Split2
1990 // ranges already made dubious progress with region splitting, so they go
1991 // straight to single block splitting.
1992 if (ExtraInfo->getStage(VirtReg) < RS_Split2) {
1993 MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
1994 if (PhysReg || !NewVRegs.empty())
1995 return PhysReg;
1996 }
1997
1998 // Then isolate blocks.
1999 return tryBlockSplit(VirtReg, Order, NewVRegs);
2000}
2001
2002//===----------------------------------------------------------------------===//
2003// Last Chance Recoloring
2004//===----------------------------------------------------------------------===//
2005
2006/// Return true if \p reg has any tied def operand.
2008 for (const MachineOperand &MO : MRI->def_operands(reg))
2009 if (MO.isTied())
2010 return true;
2011
2012 return false;
2013}
2014
2015/// Return true if the existing assignment of \p Intf overlaps, but is not the
2016/// same, as \p PhysReg.
2018 const VirtRegMap &VRM,
2019 MCRegister PhysReg,
2020 const LiveInterval &Intf) {
2021 MCRegister AssignedReg = VRM.getPhys(Intf.reg());
2022 if (PhysReg == AssignedReg)
2023 return false;
2024 return TRI.regsOverlap(PhysReg, AssignedReg);
2025}
2026
2027/// mayRecolorAllInterferences - Check if the virtual registers that
2028/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
2029/// recolored to free \p PhysReg.
2030/// When true is returned, \p RecoloringCandidates has been augmented with all
2031/// the live intervals that need to be recolored in order to free \p PhysReg
2032/// for \p VirtReg.
2033/// \p FixedRegisters contains all the virtual registers that cannot be
2034/// recolored.
2035bool RAGreedy::mayRecolorAllInterferences(
2036 MCRegister PhysReg, const LiveInterval &VirtReg,
2037 SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
2038 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
2039
2040 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
2041 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
2042 // If there is LastChanceRecoloringMaxInterference or more interferences,
2043 // chances are one would not be recolorable.
2047 LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
2048 CutOffInfo |= CO_Interf;
2049 return false;
2050 }
2051 for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
2052 // If Intf is done and sits on the same register class as VirtReg, it
2053 // would not be recolorable as it is in the same state as
2054 // VirtReg. However there are at least two exceptions.
2055 //
2056 // If VirtReg has tied defs and Intf doesn't, then
2057 // there is still a point in examining if it can be recolorable.
2058 //
2059 // Additionally, if the register class has overlapping tuple members, it
2060 // may still be recolorable using a different tuple. This is more likely
2061 // if the existing assignment aliases with the candidate.
2062 //
2063 if (((ExtraInfo->getStage(*Intf) == RS_Done &&
2064 MRI->getRegClass(Intf->reg()) == CurRC &&
2065 !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
2066 !(hasTiedDef(MRI, VirtReg.reg()) &&
2067 !hasTiedDef(MRI, Intf->reg()))) ||
2068 FixedRegisters.count(Intf->reg())) {
2069 LLVM_DEBUG(
2070 dbgs() << "Early abort: the interference is not recolorable.\n");
2071 return false;
2072 }
2073 RecoloringCandidates.insert(Intf);
2074 }
2075 }
2076 return true;
2077}
2078
2079/// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
2080/// its interferences.
2081/// Last chance recoloring chooses a color for \p VirtReg and recolors every
2082/// virtual register that was using it. The recoloring process may recursively
2083/// use the last chance recoloring. Therefore, when a virtual register has been
2084/// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
2085/// be last-chance-recolored again during this recoloring "session".
2086/// E.g.,
2087/// Let
2088/// vA can use {R1, R2 }
2089/// vB can use { R2, R3}
2090/// vC can use {R1 }
2091/// Where vA, vB, and vC cannot be split anymore (they are reloads for
2092/// instance) and they all interfere.
2093///
2094/// vA is assigned R1
2095/// vB is assigned R2
2096/// vC tries to evict vA but vA is already done.
2097/// Regular register allocation fails.
2098///
2099/// Last chance recoloring kicks in:
2100/// vC does as if vA was evicted => vC uses R1.
2101/// vC is marked as fixed.
2102/// vA needs to find a color.
2103/// None are available.
2104/// vA cannot evict vC: vC is a fixed virtual register now.
2105/// vA does as if vB was evicted => vA uses R2.
2106/// vB needs to find a color.
2107/// R3 is available.
2108/// Recoloring => vC = R1, vA = R2, vB = R3
2109///
2110/// \p Order defines the preferred allocation order for \p VirtReg.
2111/// \p NewRegs will contain any new virtual register that have been created
2112/// (split, spill) during the process and that must be assigned.
2113/// \p FixedRegisters contains all the virtual registers that cannot be
2114/// recolored.
2115///
2116/// \p RecolorStack tracks the original assignments of successfully recolored
2117/// registers.
2118///
2119/// \p Depth gives the current depth of the last chance recoloring.
2120/// \return a physical register that can be used for VirtReg or ~0u if none
2121/// exists.
2122MCRegister RAGreedy::tryLastChanceRecoloring(
2123 const LiveInterval &VirtReg, AllocationOrder &Order,
2124 SmallVectorImpl<Register> &NewVRegs, SmallVirtRegSet &FixedRegisters,
2125 RecoloringStack &RecolorStack, unsigned Depth) {
2126 if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
2127 return ~0u;
2128
2129 LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
2130
2131 const ssize_t EntryStackSize = RecolorStack.size();
2132
2133 // Ranges must be Done.
2134 assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
2135 "Last chance recoloring should really be last chance");
2136 // Set the max depth to LastChanceRecoloringMaxDepth.
2137 // We may want to reconsider that if we end up with a too large search space
2138 // for target with hundreds of registers.
2139 // Indeed, in that case we may want to cut the search space earlier.
2141 LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");
2142 CutOffInfo |= CO_Depth;
2143 return ~0u;
2144 }
2145
2146 // Set of Live intervals that will need to be recolored.
2147 SmallLISet RecoloringCandidates;
2148
2149 // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
2150 // this recoloring "session".
2151 assert(!FixedRegisters.count(VirtReg.reg()));
2152 FixedRegisters.insert(VirtReg.reg());
2153 SmallVector<Register, 4> CurrentNewVRegs;
2154
2155 for (MCRegister PhysReg : Order) {
2156 assert(PhysReg.isValid());
2157 LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
2158 << printReg(PhysReg, TRI) << '\n');
2159 RecoloringCandidates.clear();
2160 CurrentNewVRegs.clear();
2161
2162 // It is only possible to recolor virtual register interference.
2163 if (Matrix->checkInterference(VirtReg, PhysReg) >
2165 LLVM_DEBUG(
2166 dbgs() << "Some interferences are not with virtual registers.\n");
2167
2168 continue;
2169 }
2170
2171 // Early give up on this PhysReg if it is obvious we cannot recolor all
2172 // the interferences.
2173 if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
2174 FixedRegisters)) {
2175 LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
2176 continue;
2177 }
2178
2179 // RecoloringCandidates contains all the virtual registers that interfere
2180 // with VirtReg on PhysReg (or one of its aliases). Enqueue them for
2181 // recoloring and perform the actual recoloring.
2182 PQueue RecoloringQueue;
2183 for (const LiveInterval *RC : RecoloringCandidates) {
2184 Register ItVirtReg = RC->reg();
2185 enqueue(RecoloringQueue, RC);
2186 assert(VRM->hasPhys(ItVirtReg) &&
2187 "Interferences are supposed to be with allocated variables");
2188
2189 // Record the current allocation.
2190 RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg)));
2191
2192 // unset the related struct.
2193 Matrix->unassign(*RC);
2194 }
2195
2196 // Do as if VirtReg was assigned to PhysReg so that the underlying
2197 // recoloring has the right information about the interferes and
2198 // available colors.
2199 Matrix->assign(VirtReg, PhysReg);
2200
2201 // VirtReg may be deleted during tryRecoloringCandidates, save a copy.
2202 Register ThisVirtReg = VirtReg.reg();
2203
2204 // Save the current recoloring state.
2205 // If we cannot recolor all the interferences, we will have to start again
2206 // at this point for the next physical register.
2207 SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
2208 if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
2209 FixedRegisters, RecolorStack, Depth)) {
2210 // Push the queued vregs into the main queue.
2211 llvm::append_range(NewVRegs, CurrentNewVRegs);
2212 // Do not mess up with the global assignment process.
2213 // I.e., VirtReg must be unassigned.
2214 if (VRM->hasPhys(ThisVirtReg)) {
2215 Matrix->unassign(VirtReg);
2216 return PhysReg;
2217 }
2218
2219 // It is possible VirtReg will be deleted during tryRecoloringCandidates.
2220 LLVM_DEBUG(dbgs() << "tryRecoloringCandidates deleted a fixed register "
2221 << printReg(ThisVirtReg) << '\n');
2222 FixedRegisters.erase(ThisVirtReg);
2223 return MCRegister();
2224 }
2225
2226 LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
2227 << printReg(PhysReg, TRI) << '\n');
2228
2229 // The recoloring attempt failed, undo the changes.
2230 FixedRegisters = SaveFixedRegisters;
2231 Matrix->unassign(VirtReg);
2232
2233 // For a newly created vreg which is also in RecoloringCandidates,
2234 // don't add it to NewVRegs because its physical register will be restored
2235 // below. Other vregs in CurrentNewVRegs are created by calling
2236 // selectOrSplit and should be added into NewVRegs.
2237 for (Register R : CurrentNewVRegs) {
2238 if (RecoloringCandidates.count(&LIS->getInterval(R)))
2239 continue;
2240 NewVRegs.push_back(R);
2241 }
2242
2243 // Roll back our unsuccessful recoloring. Also roll back any successful
2244 // recolorings in any recursive recoloring attempts, since it's possible
2245 // they would have introduced conflicts with assignments we will be
2246 // restoring further up the stack. Perform all unassignments prior to
2247 // reassigning, since sub-recolorings may have conflicted with the registers
2248 // we are going to restore to their original assignments.
2249 for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) {
2250 const LiveInterval *LI;
2251 MCRegister PhysReg;
2252 std::tie(LI, PhysReg) = RecolorStack[I];
2253
2254 if (VRM->hasPhys(LI->reg()))
2255 Matrix->unassign(*LI);
2256 }
2257
2258 for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) {
2259 const LiveInterval *LI;
2260 MCRegister PhysReg;
2261 std::tie(LI, PhysReg) = RecolorStack[I];
2262 if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg()))
2263 Matrix->assign(*LI, PhysReg);
2264 }
2265
2266 // Pop the stack of recoloring attempts.
2267 RecolorStack.resize(EntryStackSize);
2268 }
2269
2270 // Last chance recoloring did not worked either, give up.
2271 return ~0u;
2272}
2273
2274/// tryRecoloringCandidates - Try to assign a new color to every register
2275/// in \RecoloringQueue.
2276/// \p NewRegs will contain any new virtual register created during the
2277/// recoloring process.
2278/// \p FixedRegisters[in/out] contains all the registers that have been
2279/// recolored.
2280/// \return true if all virtual registers in RecoloringQueue were successfully
2281/// recolored, false otherwise.
2282bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
2283 SmallVectorImpl<Register> &NewVRegs,
2284 SmallVirtRegSet &FixedRegisters,
2285 RecoloringStack &RecolorStack,
2286 unsigned Depth) {
2287 while (!RecoloringQueue.empty()) {
2288 const LiveInterval *LI = dequeue(RecoloringQueue);
2289 LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
2290 MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
2291 RecolorStack, Depth + 1);
2292 // When splitting happens, the live-range may actually be empty.
2293 // In that case, this is okay to continue the recoloring even
2294 // if we did not find an alternative color for it. Indeed,
2295 // there will not be anything to color for LI in the end.
2296 if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
2297 return false;
2298
2299 if (!PhysReg) {
2300 assert(LI->empty() && "Only empty live-range do not require a register");
2301 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2302 << " succeeded. Empty LI.\n");
2303 continue;
2304 }
2305 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2306 << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
2307
2308 Matrix->assign(*LI, PhysReg);
2309 FixedRegisters.insert(LI->reg());
2310 }
2311 return true;
2312}
2313
2314//===----------------------------------------------------------------------===//
2315// Main Entry Point
2316//===----------------------------------------------------------------------===//
2317
2319 SmallVectorImpl<Register> &NewVRegs) {
2320 CutOffInfo = CO_None;
2321 LLVMContext &Ctx = MF->getFunction().getContext();
2322 SmallVirtRegSet FixedRegisters;
2323 RecoloringStack RecolorStack;
2324 MCRegister Reg =
2325 selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
2326 if (Reg == ~0U && (CutOffInfo != CO_None)) {
2327 uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
2328 if (CutOffEncountered == CO_Depth)
2329 Ctx.emitError("register allocation failed: maximum depth for recoloring "
2330 "reached. Use -fexhaustive-register-search to skip "
2331 "cutoffs");
2332 else if (CutOffEncountered == CO_Interf)
2333 Ctx.emitError("register allocation failed: maximum interference for "
2334 "recoloring reached. Use -fexhaustive-register-search "
2335 "to skip cutoffs");
2336 else if (CutOffEncountered == (CO_Depth | CO_Interf))
2337 Ctx.emitError("register allocation failed: maximum interference and "
2338 "depth for recoloring reached. Use "
2339 "-fexhaustive-register-search to skip cutoffs");
2340 }
2341 return Reg;
2342}
2343
2344/// Using a CSR for the first time has a cost because it causes push|pop
2345/// to be added to prologue|epilogue. Splitting a cold section of the live
2346/// range can have lower cost than using the CSR for the first time;
2347/// Spilling a live range in the cold path can have lower cost than using
2348/// the CSR for the first time. Returns the physical register if we decide
2349/// to use the CSR; otherwise return MCRegister().
2350MCRegister RAGreedy::tryAssignCSRFirstTime(
2351 const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg,
2352 uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) {
2353 if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
2354 // We choose spill over using the CSR for the first time if the spill cost
2355 // is lower than CSRCost.
2356 SA->analyze(&VirtReg);
2357 if (calcSpillCost() >= CSRCost)
2358 return PhysReg;
2359
2360 // We are going to spill, set CostPerUseLimit to 1 to make sure that
2361 // we will not use a callee-saved register in tryEvict.
2362 CostPerUseLimit = 1;
2363 return MCRegister();
2364 }
2365 if (ExtraInfo->getStage(VirtReg) < RS_Split) {
2366 // We choose pre-splitting over using the CSR for the first time if
2367 // the cost of splitting is lower than CSRCost.
2368 SA->analyze(&VirtReg);
2369 unsigned NumCands = 0;
2370 BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
2371 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
2372 NumCands, true /*IgnoreCSR*/);
2373 if (BestCand == NoCand)
2374 // Use the CSR if we can't find a region split below CSRCost.
2375 return PhysReg;
2376
2377 // Perform the actual pre-splitting.
2378 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
2379 return MCRegister();
2380 }
2381 return PhysReg;
2382}
2383
2385 // Do not keep invalid information around.
2386 SetOfBrokenHints.remove(&LI);
2387}
2388
2389void RAGreedy::initializeCSRCost() {
2390 // We use the command-line option if it is explicitly set, otherwise use the
2391 // larger one out of the command-line option and the value reported by TRI.
2392 CSRCost = BlockFrequency(
2393 CSRFirstTimeCost.getNumOccurrences()
2395 : std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
2396 if (!CSRCost.getFrequency())
2397 return;
2398
2399 // Raw cost is relative to Entry == 2^14; scale it appropriately.
2400 uint64_t ActualEntry = MBFI->getEntryFreq().getFrequency();
2401 if (!ActualEntry) {
2402 CSRCost = BlockFrequency(0);
2403 return;
2404 }
2405 uint64_t FixedEntry = 1 << 14;
2406 if (ActualEntry < FixedEntry)
2407 CSRCost *= BranchProbability(ActualEntry, FixedEntry);
2408 else if (ActualEntry <= UINT32_MAX)
2409 // Invert the fraction and divide.
2410 CSRCost /= BranchProbability(FixedEntry, ActualEntry);
2411 else
2412 // Can't use BranchProbability in general, since it takes 32-bit numbers.
2413 CSRCost =
2414 BlockFrequency(CSRCost.getFrequency() * (ActualEntry / FixedEntry));
2415}
2416
2417/// Collect the hint info for \p Reg.
2418/// The results are stored into \p Out.
2419/// \p Out is not cleared before being populated.
2420void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
2421 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
2422
2423 for (const MachineOperand &Opnd : MRI->reg_nodbg_operands(Reg)) {
2424 const MachineInstr &Instr = *Opnd.getParent();
2425 if (!Instr.isCopy() || Opnd.isImplicit())
2426 continue;
2427
2428 // Look for the other end of the copy.
2429 const MachineOperand &OtherOpnd = Instr.getOperand(Opnd.isDef());
2430 Register OtherReg = OtherOpnd.getReg();
2431 if (OtherReg == Reg)
2432 continue;
2433 unsigned OtherSubReg = OtherOpnd.getSubReg();
2434 unsigned SubReg = Opnd.getSubReg();
2435
2436 // Get the current assignment.
2437 MCRegister OtherPhysReg;
2438 if (OtherReg.isPhysical()) {
2439 if (OtherSubReg)
2440 OtherPhysReg = TRI->getMatchingSuperReg(OtherReg, OtherSubReg, RC);
2441 else if (SubReg)
2442 OtherPhysReg = TRI->getMatchingSuperReg(OtherReg, SubReg, RC);
2443 else
2444 OtherPhysReg = OtherReg;
2445 } else {
2446 OtherPhysReg = VRM->getPhys(OtherReg);
2447 // TODO: Should find matching superregister, but applying this in the
2448 // non-hint case currently causes regressions
2449
2450 if (SubReg && OtherSubReg && SubReg != OtherSubReg)
2451 continue;
2452 }
2453
2454 // Push the collected information.
2455 if (OtherPhysReg) {
2456 Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
2457 OtherPhysReg));
2458 }
2459 }
2460}
2461
2462/// Using the given \p List, compute the cost of the broken hints if
2463/// \p PhysReg was used.
2464/// \return The cost of \p List for \p PhysReg.
2465BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
2466 MCRegister PhysReg) {
2467 BlockFrequency Cost = BlockFrequency(0);
2468 for (const HintInfo &Info : List) {
2469 if (Info.PhysReg != PhysReg)
2470 Cost += Info.Freq;
2471 }
2472 return Cost;
2473}
2474
2475/// Using the register assigned to \p VirtReg, try to recolor
2476/// all the live ranges that are copy-related with \p VirtReg.
2477/// The recoloring is then propagated to all the live-ranges that have
2478/// been recolored and so on, until no more copies can be coalesced or
2479/// it is not profitable.
2480/// For a given live range, profitability is determined by the sum of the
2481/// frequencies of the non-identity copies it would introduce with the old
2482/// and new register.
2483void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
2484 // We have a broken hint, check if it is possible to fix it by
2485 // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
2486 // some register and PhysReg may be available for the other live-ranges.
2487 HintsInfo Info;
2488 Register Reg = VirtReg.reg();
2489 MCRegister PhysReg = VRM->getPhys(Reg);
2490 // Start the recoloring algorithm from the input live-interval, then
2491 // it will propagate to the ones that are copy-related with it.
2492 SmallSet<Register, 4> Visited = {Reg};
2493 SmallVector<Register, 2> RecoloringCandidates = {Reg};
2494
2495 LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
2496 << '(' << printReg(PhysReg, TRI) << ")\n");
2497
2498 do {
2499 Reg = RecoloringCandidates.pop_back_val();
2500
2501 MCRegister CurrPhys = VRM->getPhys(Reg);
2502
2503 // This may be a skipped register.
2504 if (!CurrPhys) {
2506 "We have an unallocated variable which should have been handled");
2507 continue;
2508 }
2509
2510 // Get the live interval mapped with this virtual register to be able
2511 // to check for the interference with the new color.
2512 LiveInterval &LI = LIS->getInterval(Reg);
2513 // Check that the new color matches the register class constraints and
2514 // that it is free for this live range.
2515 if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
2516 Matrix->checkInterference(LI, PhysReg)))
2517 continue;
2518
2519 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
2520 << ") is recolorable.\n");
2521
2522 // Gather the hint info.
2523 Info.clear();
2524 collectHintInfo(Reg, Info);
2525 // Check if recoloring the live-range will increase the cost of the
2526 // non-identity copies.
2527 if (CurrPhys != PhysReg) {
2528 LLVM_DEBUG(dbgs() << "Checking profitability:\n");
2529 BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
2530 BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
2531 LLVM_DEBUG(dbgs() << "Old Cost: " << printBlockFreq(*MBFI, OldCopiesCost)
2532 << "\nNew Cost: "
2533 << printBlockFreq(*MBFI, NewCopiesCost) << '\n');
2534 if (OldCopiesCost < NewCopiesCost) {
2535 LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
2536 continue;
2537 }
2538 // At this point, the cost is either cheaper or equal. If it is
2539 // equal, we consider this is profitable because it may expose
2540 // more recoloring opportunities.
2541 LLVM_DEBUG(dbgs() << "=> Profitable.\n");
2542 // Recolor the live-range.
2543 Matrix->unassign(LI);
2544 Matrix->assign(LI, PhysReg);
2545 }
2546 // Push all copy-related live-ranges to keep reconciling the broken
2547 // hints.
2548 for (const HintInfo &HI : Info) {
2549 // We cannot recolor physical register.
2550 if (HI.Reg.isVirtual() && Visited.insert(HI.Reg).second)
2551 RecoloringCandidates.push_back(HI.Reg);
2552 }
2553 } while (!RecoloringCandidates.empty());
2554}
2555
2556/// Try to recolor broken hints.
2557/// Broken hints may be repaired by recoloring when an evicted variable
2558/// freed up a register for a larger live-range.
2559/// Consider the following example:
2560/// BB1:
2561/// a =
2562/// b =
2563/// BB2:
2564/// ...
2565/// = b
2566/// = a
2567/// Let us assume b gets split:
2568/// BB1:
2569/// a =
2570/// b =
2571/// BB2:
2572/// c = b
2573/// ...
2574/// d = c
2575/// = d
2576/// = a
2577/// Because of how the allocation work, b, c, and d may be assigned different
2578/// colors. Now, if a gets evicted later:
2579/// BB1:
2580/// a =
2581/// st a, SpillSlot
2582/// b =
2583/// BB2:
2584/// c = b
2585/// ...
2586/// d = c
2587/// = d
2588/// e = ld SpillSlot
2589/// = e
2590/// This is likely that we can assign the same register for b, c, and d,
2591/// getting rid of 2 copies.
2592void RAGreedy::tryHintsRecoloring() {
2593 for (const LiveInterval *LI : SetOfBrokenHints) {
2594 assert(LI->reg().isVirtual() &&
2595 "Recoloring is possible only for virtual registers");
2596 // Some dead defs may be around (e.g., because of debug uses).
2597 // Ignore those.
2598 if (!VRM->hasPhys(LI->reg()))
2599 continue;
2600 tryHintRecoloring(*LI);
2601 }
2602}
2603
2604MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
2605 SmallVectorImpl<Register> &NewVRegs,
2606 SmallVirtRegSet &FixedRegisters,
2607 RecoloringStack &RecolorStack,
2608 unsigned Depth) {
2609 uint8_t CostPerUseLimit = uint8_t(~0u);
2610 // First try assigning a free register.
2611 auto Order =
2613 if (MCRegister PhysReg =
2614 tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
2615 // When NewVRegs is not empty, we may have made decisions such as evicting
2616 // a virtual register, go with the earlier decisions and use the physical
2617 // register.
2618 if (CSRCost.getFrequency() &&
2619 EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
2620 MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
2621 CostPerUseLimit, NewVRegs);
2622 if (CSRReg || !NewVRegs.empty())
2623 // Return now if we decide to use a CSR or create new vregs due to
2624 // pre-splitting.
2625 return CSRReg;
2626 } else
2627 return PhysReg;
2628 }
2629 // Non empty NewVRegs means VirtReg has been split.
2630 if (!NewVRegs.empty())
2631 return MCRegister();
2632
2633 LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
2634 LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
2635 << ExtraInfo->getCascade(VirtReg.reg()) << '\n');
2636
2637 // Try to evict a less worthy live range, but only for ranges from the primary
2638 // queue. The RS_Split ranges already failed to do this, and they should not
2639 // get a second chance until they have been split.
2640 if (Stage != RS_Split) {
2641 if (MCRegister PhysReg =
2642 tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
2643 FixedRegisters)) {
2644 Register Hint = MRI->getSimpleHint(VirtReg.reg());
2645 // If VirtReg has a hint and that hint is broken record this
2646 // virtual register as a recoloring candidate for broken hint.
2647 // Indeed, since we evicted a variable in its neighborhood it is
2648 // likely we can at least partially recolor some of the
2649 // copy-related live-ranges.
2650 if (Hint && Hint != PhysReg)
2651 SetOfBrokenHints.insert(&VirtReg);
2652 return PhysReg;
2653 }
2654 }
2655
2656 assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
2657
2658 // The first time we see a live range, don't try to split or spill.
2659 // Wait until the second time, when all smaller ranges have been allocated.
2660 // This gives a better picture of the interference to split around.
2661 if (Stage < RS_Split) {
2662 ExtraInfo->setStage(VirtReg, RS_Split);
2663 LLVM_DEBUG(dbgs() << "wait for second round\n");
2664 NewVRegs.push_back(VirtReg.reg());
2665 return MCRegister();
2666 }
2667
2668 if (Stage < RS_Spill && !VirtReg.empty()) {
2669 // Try splitting VirtReg or interferences.
2670 unsigned NewVRegSizeBefore = NewVRegs.size();
2671 MCRegister PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
2672 if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
2673 return PhysReg;
2674 }
2675
2676 // If we couldn't allocate a register from spilling, there is probably some
2677 // invalid inline assembly. The base class will report it.
2678 if (Stage >= RS_Done || !VirtReg.isSpillable()) {
2679 return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
2680 RecolorStack, Depth);
2681 }
2682
2683 // Finally spill VirtReg itself.
2684 NamedRegionTimer T("spill", "Spiller", TimerGroupName,
2686 LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
2687 spiller().spill(LRE, &Order);
2688 ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
2689
2690 // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
2691 // the new regs are kept in LDV (still mapping to the old register), until
2692 // we rewrite spilled locations in LDV at a later stage.
2693 for (Register r : spiller().getSpilledRegs())
2694 DebugVars->splitRegister(r, LRE.regs(), *LIS);
2695 for (Register r : spiller().getReplacedRegs())
2696 DebugVars->splitRegister(r, LRE.regs(), *LIS);
2697
2698 if (VerifyEnabled)
2699 MF->verify(LIS, Indexes, "After spilling", &errs());
2700
2701 // The live virtual register requesting allocation was spilled, so tell
2702 // the caller not to allocate anything during this round.
2703 return MCRegister();
2704}
2705
2706void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
2707 using namespace ore;
2708 if (Spills) {
2709 R << NV("NumSpills", Spills) << " spills ";
2710 R << NV("TotalSpillsCost", SpillsCost) << " total spills cost ";
2711 }
2712 if (FoldedSpills) {
2713 R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
2714 R << NV("TotalFoldedSpillsCost", FoldedSpillsCost)
2715 << " total folded spills cost ";
2716 }
2717 if (Reloads) {
2718 R << NV("NumReloads", Reloads) << " reloads ";
2719 R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost ";
2720 }
2721 if (FoldedReloads) {
2722 R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
2723 R << NV("TotalFoldedReloadsCost", FoldedReloadsCost)
2724 << " total folded reloads cost ";
2725 }
2726 if (ZeroCostFoldedReloads)
2727 R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
2728 << " zero cost folded reloads ";
2729 if (Copies) {
2730 R << NV("NumVRCopies", Copies) << " virtual registers copies ";
2731 R << NV("TotalCopiesCost", CopiesCost) << " total copies cost ";
2732 }
2733}
2734
2735RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
2736 RAGreedyStats Stats;
2737 const MachineFrameInfo &MFI = MF->getFrameInfo();
2738 int FI;
2739
2740 auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
2742 A->getPseudoValue())->getFrameIndex());
2743 };
2744 auto isPatchpointInstr = [](const MachineInstr &MI) {
2745 return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
2746 MI.getOpcode() == TargetOpcode::STACKMAP ||
2747 MI.getOpcode() == TargetOpcode::STATEPOINT;
2748 };
2749 for (MachineInstr &MI : MBB) {
2750 auto DestSrc = TII->isCopyInstr(MI);
2751 if (DestSrc) {
2752 const MachineOperand &Dest = *DestSrc->Destination;
2753 const MachineOperand &Src = *DestSrc->Source;
2754 Register SrcReg = Src.getReg();
2755 Register DestReg = Dest.getReg();
2756 // Only count `COPY`s with a virtual register as source or destination.
2757 if (SrcReg.isVirtual() || DestReg.isVirtual()) {
2758 if (SrcReg.isVirtual()) {
2759 SrcReg = VRM->getPhys(SrcReg);
2760 if (SrcReg && Src.getSubReg())
2761 SrcReg = TRI->getSubReg(SrcReg, Src.getSubReg());
2762 }
2763 if (DestReg.isVirtual()) {
2764 DestReg = VRM->getPhys(DestReg);
2765 if (DestReg && Dest.getSubReg())
2766 DestReg = TRI->getSubReg(DestReg, Dest.getSubReg());
2767 }
2768 if (SrcReg != DestReg)
2769 ++Stats.Copies;
2770 }
2771 continue;
2772 }
2773
2774 SmallVector<const MachineMemOperand *, 2> Accesses;
2775 if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2776 ++Stats.Reloads;
2777 continue;
2778 }
2779 if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2780 ++Stats.Spills;
2781 continue;
2782 }
2783 if (TII->hasLoadFromStackSlot(MI, Accesses) &&
2784 llvm::any_of(Accesses, isSpillSlotAccess)) {
2785 if (!isPatchpointInstr(MI)) {
2786 Stats.FoldedReloads += Accesses.size();
2787 continue;
2788 }
2789 // For statepoint there may be folded and zero cost folded stack reloads.
2790 std::pair<unsigned, unsigned> NonZeroCostRange =
2791 TII->getPatchpointUnfoldableRange(MI);
2792 SmallSet<unsigned, 16> FoldedReloads;
2793 SmallSet<unsigned, 16> ZeroCostFoldedReloads;
2794 for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
2795 MachineOperand &MO = MI.getOperand(Idx);
2796 if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
2797 continue;
2798 if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
2799 FoldedReloads.insert(MO.getIndex());
2800 else
2801 ZeroCostFoldedReloads.insert(MO.getIndex());
2802 }
2803 // If stack slot is used in folded reload it is not zero cost then.
2804 for (unsigned Slot : FoldedReloads)
2805 ZeroCostFoldedReloads.erase(Slot);
2806 Stats.FoldedReloads += FoldedReloads.size();
2807 Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
2808 continue;
2809 }
2810 Accesses.clear();
2811 if (TII->hasStoreToStackSlot(MI, Accesses) &&
2812 llvm::any_of(Accesses, isSpillSlotAccess)) {
2813 Stats.FoldedSpills += Accesses.size();
2814 }
2815 }
2816 // Set cost of collected statistic by multiplication to relative frequency of
2817 // this basic block.
2818 float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB);
2819 Stats.ReloadsCost = RelFreq * Stats.Reloads;
2820 Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads;
2821 Stats.SpillsCost = RelFreq * Stats.Spills;
2822 Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills;
2823 Stats.CopiesCost = RelFreq * Stats.Copies;
2824 return Stats;
2825}
2826
2827RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) {
2828 RAGreedyStats Stats;
2829
2830 // Sum up the spill and reloads in subloops.
2831 for (MachineLoop *SubLoop : *L)
2832 Stats.add(reportStats(SubLoop));
2833
2834 for (MachineBasicBlock *MBB : L->getBlocks())
2835 // Handle blocks that were not included in subloops.
2836 if (Loops->getLoopFor(MBB) == L)
2837 Stats.add(computeStats(*MBB));
2838
2839 if (!Stats.isEmpty()) {
2840 using namespace ore;
2841
2842 ORE->emit([&]() {
2843 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies",
2844 L->getStartLoc(), L->getHeader());
2845 Stats.report(R);
2846 R << "generated in loop";
2847 return R;
2848 });
2849 }
2850 return Stats;
2851}
2852
2853void RAGreedy::reportStats() {
2854 if (!ORE->allowExtraAnalysis(DEBUG_TYPE))
2855 return;
2856 RAGreedyStats Stats;
2857 for (MachineLoop *L : *Loops)
2858 Stats.add(reportStats(L));
2859 // Process non-loop blocks.
2860 for (MachineBasicBlock &MBB : *MF)
2861 if (!Loops->getLoopFor(&MBB))
2862 Stats.add(computeStats(MBB));
2863 if (!Stats.isEmpty()) {
2864 using namespace ore;
2865
2866 ORE->emit([&]() {
2867 DebugLoc Loc;
2868 if (auto *SP = MF->getFunction().getSubprogram())
2869 Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
2870 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc,
2871 &MF->front());
2872 Stats.report(R);
2873 R << "generated in function";
2874 return R;
2875 });
2876 }
2877}
2878
2879bool RAGreedy::hasVirtRegAlloc() {
2880 for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
2882 if (MRI->reg_nodbg_empty(Reg))
2883 continue;
2885 return true;
2886 }
2887
2888 return false;
2889}
2890
2892 LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
2893 << "********** Function: " << mf.getName() << '\n');
2894
2895 MF = &mf;
2896 TII = MF->getSubtarget().getInstrInfo();
2897
2898 if (VerifyEnabled)
2899 MF->verify(LIS, Indexes, "Before greedy register allocator", &errs());
2900
2901 RegAllocBase::init(*this->VRM, *this->LIS, *this->Matrix);
2902
2903 // Early return if there is no virtual register to be allocated to a
2904 // physical register.
2905 if (!hasVirtRegAlloc())
2906 return false;
2907
2908 // Renumber to get accurate and consistent results from
2909 // SlotIndexes::getApproxInstrDistance.
2910 Indexes->packIndexes();
2911
2912 initializeCSRCost();
2913
2914 RegCosts = TRI->getRegisterCosts(*MF);
2915 RegClassPriorityTrumpsGlobalness =
2916 GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
2918 : TRI->regClassPriorityTrumpsGlobalness(*MF);
2919
2920 ReverseLocalAssignment = GreedyReverseLocalAssignment.getNumOccurrences()
2922 : TRI->reverseLocalAssignment();
2923
2924 ExtraInfo.emplace();
2925
2926 EvictAdvisor = EvictProvider->getAdvisor(*MF, *this, MBFI, Loops);
2927 PriorityAdvisor = PriorityProvider->getAdvisor(*MF, *this, *Indexes);
2928
2929 VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
2930 SpillerInstance.reset(createInlineSpiller({*LIS, *LSS, *DomTree, *MBFI}, *MF,
2931 *VRM, *VRAI, Matrix));
2932
2933 VRAI->calculateSpillWeightsAndHints();
2934
2935 LLVM_DEBUG(LIS->dump());
2936
2937 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
2938 SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
2939
2940 IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
2941 GlobalCand.resize(32); // This will grow as needed.
2942 SetOfBrokenHints.clear();
2943
2945 tryHintsRecoloring();
2946
2947 if (VerifyEnabled)
2948 MF->verify(LIS, Indexes, "Before post optimization", &errs());
2950 reportStats();
2951
2952 releaseMemory();
2953 return true;
2954}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
DXIL Forward Handle Accesses
#define DEBUG_TYPE
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
This file implements an indexed map.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Live Register Matrix
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
block placement Basic Block Placement Stats
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
MachineInstr unsigned OpIdx
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static bool hasTiedDef(MachineRegisterInfo *MRI, Register reg)
Return true if reg has any tied def operand.
static cl::opt< bool > GreedyRegClassPriorityTrumpsGlobalness("greedy-regclass-priority-trumps-globalness", cl::desc("Change the greedy register allocator's live range priority " "calculation to make the AllocationPriority of the register class " "more important then whether the range is global"), cl::Hidden)
static cl::opt< bool > ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, cl::desc("Exhaustive Search for registers bypassing the depth " "and interference cutoffs of last chance recoloring"), cl::Hidden)
const float Hysteresis
static cl::opt< unsigned > LastChanceRecoloringMaxInterference("lcr-max-interf", cl::Hidden, cl::desc("Last chance recoloring maximum number of considered" " interference at a time"), cl::init(8))
static bool readsLaneSubset(const MachineRegisterInfo &MRI, const MachineInstr *MI, const LiveInterval &VirtReg, const TargetRegisterInfo *TRI, SlotIndex Use, const TargetInstrInfo *TII)
Return true if MI at \P Use reads a subset of the lanes live in VirtReg.
static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI, const VirtRegMap &VRM, MCRegister PhysReg, const LiveInterval &Intf)
Return true if the existing assignment of Intf overlaps, but is not the same, as PhysReg.
static cl::opt< unsigned > CSRFirstTimeCost("regalloc-csr-first-time-cost", cl::desc("Cost for first time use of callee-saved register."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, cl::desc("Last chance recoloring max depth"), cl::init(5))
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator)
static cl::opt< unsigned long > GrowRegionComplexityBudget("grow-region-complexity-budget", cl::desc("growRegion() does not scale with the number of BB edges, so " "limit its budget and bail out once we reach the limit."), cl::init(10000), cl::Hidden)
static cl::opt< unsigned > SplitThresholdForRegWithHint("split-threshold-for-reg-with-hint", cl::desc("The threshold for splitting a virtual register with a hint, in " "percentage"), cl::init(75), cl::Hidden)
static cl::opt< SplitEditor::ComplementSpillMode > SplitSpillMode("split-spill-mode", cl::Hidden, cl::desc("Spill mode for splitting live ranges"), cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")), cl::init(SplitEditor::SM_Speed))
static unsigned getNumAllocatableRegsForConstraints(const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const RegisterClassInfo &RCI)
Get the number of allocatable registers that match the constraints of Reg on MI and that are also in ...
static cl::opt< bool > GreedyReverseLocalAssignment("greedy-reverse-local-assignment", cl::desc("Reverse allocation order of local live ranges, such that " "shorter local live ranges will tend to be allocated first"), cl::Hidden)
static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const MachineInstr &FirstMI, Register Reg)
Remove Loads Into Fake Uses
SI Lower i1 Copies
SI optimize exec mask operations pre RA
SI Optimize VGPR LiveRange
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName) const
PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM)
bool isHint(Register Reg) const
Return true if Reg is a preferred physical register.
ArrayRef< MCPhysReg > getOrder() const
Get the allocation order without reordered hints.
Iterator end() const
static AllocationOrder create(Register VirtReg, const VirtRegMap &VRM, const RegisterClassInfo &RegClassInfo, const LiveRegMatrix *Matrix)
Create a new AllocationOrder for VirtReg.
Iterator begin() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool test(unsigned Idx) const
Definition BitVector.h:480
BitVector & reset()
Definition BitVector.h:411
static BlockFrequency max()
Returns the maximum possible frequency, the saturation value.
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
Cursor - The primary query interface for the block interference cache.
SlotIndex first()
first - Return the starting index of the first interfering range in the current block.
SlotIndex last()
last - Return the ending index of the last interfering range in the current block.
bool hasInterference()
hasInterference - Return true if the current block has any interference.
void moveToBlock(unsigned MBBNum)
moveTo - Move cursor to basic block MBBNum.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Query interferences between a single live virtual register and a live interval union.
const SmallVectorImpl< const LiveInterval * > & interferingVRegs(unsigned MaxInterferingRegs=std::numeric_limits< unsigned >::max())
LiveSegments::iterator SegmentIter
A live range for subregisters.
LiveInterval - This class represents the liveness of a register, or stack slot.
Register reg() const
bool isSpillable() const
isSpillable - Can this interval be spilled?
bool hasSubRanges() const
Returns true if subregister liveness information is available.
LLVM_ABI unsigned getSize() const
getSize - Returns the sum of sizes of all the LiveRange's.
iterator_range< subrange_iterator > subranges()
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
LiveInterval & getInterval(Register Reg)
unsigned size() const
Register get(unsigned idx) const
ArrayRef< Register > regs() const
iterator end() const
iterator begin() const
Segments::const_iterator const_iterator
bool liveAt(SlotIndex index) const
bool empty() const
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
LLVM_ABI iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
@ IK_VirtReg
Virtual register interference.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
static constexpr unsigned NoRegister
Definition MCRegister.h:60
constexpr unsigned id() const
Definition MCRegister.h:82
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
An RAII based helper class to modify MachineFunctionProperties when running pass.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
LLVM_ABI iterator getFirstNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the first non-debug instruction in the basic block, or end().
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Representation of each machine instruction.
bool isImplicitDef() const
Analysis pass that exposes the MachineLoopInfo for a machine function.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void LRE_DidCloneVirtReg(Register New, Register Old)
bool run(MachineFunction &mf)
Perform register allocation.
Spiller & spiller() override
MCRegister selectOrSplit(const LiveInterval &, SmallVectorImpl< Register > &) override
RAGreedy(RequiredAnalyses &Analyses, const RegAllocFilterFunc F=nullptr)
const LiveInterval * dequeue() override
dequeue - Return the next unassigned register, or NULL.
void enqueueImpl(const LiveInterval *LI) override
enqueue - Add VirtReg to the priority queue of unassigned registers.
void aboutToRemoveInterval(const LiveInterval &) override
Method called when the allocator is about to remove a LiveInterval.
RegAllocBase(const RegAllocFilterFunc F=nullptr)
void enqueue(const LiveInterval *LI)
enqueue - Add VirtReg to the priority queue of unassigned registers.
void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat)
SmallPtrSet< MachineInstr *, 32 > DeadRemats
Inst which is a def of an original reg and whose defs are already all dead after remat is saved in De...
const TargetRegisterInfo * TRI
LiveIntervals * LIS
static const char TimerGroupName[]
static const char TimerGroupDescription[]
LiveRegMatrix * Matrix
virtual void postOptimization()
VirtRegMap * VRM
RegisterClassInfo RegClassInfo
MachineRegisterInfo * MRI
bool shouldAllocateRegister(Register Reg)
Get whether a given register should be allocated.
static bool VerifyEnabled
VerifyEnabled - True when -verify-regalloc is given.
ImmutableAnalysis abstraction for fetching the Eviction Advisor.
A MachineFunction analysis for fetching the Eviction Advisor.
Common provider for legacy and new pass managers.
const TargetRegisterInfo *const TRI
std::optional< unsigned > getOrderLimit(const LiveInterval &VirtReg, const AllocationOrder &Order, unsigned CostPerUseLimit) const
const RegisterClassInfo & RegClassInfo
bool isUnusedCalleeSavedReg(MCRegister PhysReg) const
Returns true if the given PhysReg is a callee saved register and has not been used for allocation yet...
bool canReassign(const LiveInterval &VirtReg, MCRegister FromReg) const
bool canAllocatePhysReg(unsigned CostPerUseLimit, MCRegister PhysReg) const
Common provider for getting the priority advisor and logging rewards.
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition Register.h:72
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:107
unsigned virtRegIndex() const
Convert a virtual register number to a 0-based index.
Definition Register.h:87
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
SlotIndex - An opaque wrapper around machine indexes.
Definition SlotIndexes.h:66
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
@ InstrDist
The default distance between instructions as returned by distance().
bool isValid() const
Returns true if this is a valid index.
SlotIndex getBoundaryIndex() const
Returns the boundary index for associated with this index.
SlotIndex getBaseIndex() const
Returns the base index for associated with this index.
int getApproxInstrDistance(SlotIndex other) const
Return the scaled distance from this index to the given one, where all slots on the same instruction ...
SlotIndexes pass.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool erase(const T &V)
Definition SmallSet.h:199
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
size_type size() const
Definition SmallSet.h:170
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
@ MustSpill
A register is impossible, variable must be spilled.
@ DontCare
Block doesn't care / variable not live.
@ PrefReg
Block entry/exit prefers a register.
@ PrefSpill
Block entry/exit prefers a stack slot.
virtual void spill(LiveRangeEdit &LRE, AllocationOrder *Order=nullptr)=0
spill - Spill the LRE.getParent() live interval.
SplitAnalysis - Analyze a LiveInterval, looking for live range splitting opportunities.
Definition SplitKit.h:96
SplitEditor - Edit machine code and LiveIntervals for live range splitting.
Definition SplitKit.h:263
@ SM_Partition
SM_Partition(Default) - Try to create the complement interval so it doesn't overlap any other interva...
Definition SplitKit.h:286
@ SM_Speed
SM_Speed - Overlap intervals to minimize the expected execution frequency of the inserted copies.
Definition SplitKit.h:298
@ SM_Size
SM_Size - Overlap intervals to minimize the number of inserted COPY instructions.
Definition SplitKit.h:293
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
TargetInstrInfo - Interface to description of machine instruction set.
const uint8_t AllocationPriority
Classes with a higher priority value are assigned first by register allocators using a greedy heurist...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
Definition VirtRegMap.h:91
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
Definition VirtRegMap.h:87
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
Pass manager infrastructure for declaring and invalidating analyses.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
std::function< bool(const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, const Register Reg)> RegAllocFilterFunc
Filter function for register classes during regalloc.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
Definition MathExtras.h:207
InstructionCost Cost
SmallSet< Register, 16 > SmallVirtRegSet
LLVM_ABI FunctionPass * createGreedyRegisterAllocator()
Greedy register allocation pass - This pass implements a global register allocator for optimized buil...
LLVM_ABI void initializeRAGreedyLegacyPass(PassRegistry &)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
LLVM_ABI bool TimePassesIsEnabled
If the user specifies the -time-passes argument on an LLVM tool command line then the value of this b...
Definition IRReader.cpp:26
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
@ RS_Split2
Attempt more aggressive live range splitting that is guaranteed to make progress.
@ RS_Spill
Live range will be spilled. No more splitting will be attempted.
@ RS_Split
Attempt live range splitting if assignment is impossible.
@ RS_New
Newly created live range that has never been queued.
@ RS_Done
There is nothing more we can do to this live range.
@ RS_Assign
Only attempt assignment and eviction. Then requeue as RS_Split.
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI VirtRegInfo AnalyzeVirtRegInBundle(MachineInstr &MI, Register Reg, SmallVectorImpl< std::pair< MachineInstr *, unsigned > > *Ops=nullptr)
AnalyzeVirtRegInBundle - Analyze how the current instruction or bundle uses a virtual register.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI const float huge_valf
Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:1994
Spiller * createInlineSpiller(const Spiller::RequiredAnalyses &Analyses, MachineFunction &MF, VirtRegMap &VRM, VirtRegAuxInfo &VRAI, LiveRegMatrix *Matrix=nullptr)
Create and return a spiller that will insert spill code directly instead of deferring though VirtRegM...
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1867
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI Printable printBlockFreq(const BlockFrequencyInfo &BFI, BlockFrequency Freq)
Print the block frequency Freq relative to the current functions entry frequency.
LLVM_ABI char & RAGreedyLegacyID
Greedy register allocator.
static float normalizeSpillWeight(float UseDefFreq, unsigned Size, unsigned NumInstr)
Normalize the spill weight of a live interval.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:867
MachineBlockFrequencyInfo * MBFI
RegAllocEvictionAdvisorProvider * EvictProvider
MachineOptimizationRemarkEmitter * ORE
RegAllocPriorityAdvisorProvider * PriorityProvider
constexpr bool any() const
Definition LaneBitmask.h:53
This class is basically a combination of TimeRegion and Timer.
Definition Timer.h:175
BlockConstraint - Entry and exit constraints for a basic block.
BorderConstraint Exit
Constraint on block exit.
bool ChangesValue
True when this block changes the value of the live range.
BorderConstraint Entry
Constraint on block entry.
unsigned Number
Basic block number (from MBB::getNumber()).
Additional information about basic blocks where the current variable is live.
Definition SplitKit.h:121
SlotIndex FirstDef
First non-phi valno->def, or SlotIndex().
Definition SplitKit.h:125
bool LiveOut
Current reg is live out.
Definition SplitKit.h:127
bool LiveIn
Current reg is live in.
Definition SplitKit.h:126
MachineBasicBlock * MBB
Definition SplitKit.h:122
SlotIndex LastInstr
Last instr accessing current reg.
Definition SplitKit.h:124
SlotIndex FirstInstr
First instr accessing current reg.
Definition SplitKit.h:123