LLVM 18.0.0git
RegAllocGreedy.cpp
Go to the documentation of this file.
1//===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the RAGreedy function pass for register allocation in
10// optimized builds.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RegAllocGreedy.h"
15#include "AllocationOrder.h"
16#include "InterferenceCache.h"
17#include "LiveDebugVariables.h"
18#include "RegAllocBase.h"
21#include "SpillPlacement.h"
22#include "SplitKit.h"
23#include "llvm/ADT/ArrayRef.h"
24#include "llvm/ADT/BitVector.h"
25#include "llvm/ADT/IndexedMap.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
30#include "llvm/ADT/StringRef.h"
61#include "llvm/IR/Function.h"
62#include "llvm/IR/LLVMContext.h"
65#include "llvm/Pass.h"
69#include "llvm/Support/Debug.h"
71#include "llvm/Support/Timer.h"
73#include <algorithm>
74#include <cassert>
75#include <cstdint>
76#include <utility>
77
78using namespace llvm;
79
80#define DEBUG_TYPE "regalloc"
81
82STATISTIC(NumGlobalSplits, "Number of split global live ranges");
83STATISTIC(NumLocalSplits, "Number of split local live ranges");
84STATISTIC(NumEvicted, "Number of interferences evicted");
85
87 "split-spill-mode", cl::Hidden,
88 cl::desc("Spill mode for splitting live ranges"),
89 cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
90 clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
91 clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
93
96 cl::desc("Last chance recoloring max depth"),
97 cl::init(5));
98
100 "lcr-max-interf", cl::Hidden,
101 cl::desc("Last chance recoloring maximum number of considered"
102 " interference at a time"),
103 cl::init(8));
104
106 "exhaustive-register-search", cl::NotHidden,
107 cl::desc("Exhaustive Search for registers bypassing the depth "
108 "and interference cutoffs of last chance recoloring"),
109 cl::Hidden);
110
112 "enable-deferred-spilling", cl::Hidden,
113 cl::desc("Instead of spilling a variable right away, defer the actual "
114 "code insertion to the end of the allocation. That way the "
115 "allocator might still find a suitable coloring for this "
116 "variable because of other evicted variables."),
117 cl::init(false));
118
119// FIXME: Find a good default for this flag and remove the flag.
121CSRFirstTimeCost("regalloc-csr-first-time-cost",
122 cl::desc("Cost for first time use of callee-saved register."),
123 cl::init(0), cl::Hidden);
124
126 "grow-region-complexity-budget",
127 cl::desc("growRegion() does not scale with the number of BB edges, so "
128 "limit its budget and bail out once we reach the limit."),
129 cl::init(10000), cl::Hidden);
130
132 "greedy-regclass-priority-trumps-globalness",
133 cl::desc("Change the greedy register allocator's live range priority "
134 "calculation to make the AllocationPriority of the register class "
135 "more important then whether the range is global"),
136 cl::Hidden);
137
139 "greedy-reverse-local-assignment",
140 cl::desc("Reverse allocation order of local live ranges, such that "
141 "shorter local live ranges will tend to be allocated first"),
142 cl::Hidden);
143
145 "split-threshold-for-reg-with-hint",
146 cl::desc("The threshold for splitting a virtual register with a hint, in "
147 "percentate"),
148 cl::init(75), cl::Hidden);
149
150static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
152
153char RAGreedy::ID = 0;
155
157 "Greedy Register Allocator", false, false)
161INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
162INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
175
176#ifndef NDEBUG
177const char *const RAGreedy::StageName[] = {
178 "RS_New",
179 "RS_Assign",
180 "RS_Split",
181 "RS_Split2",
182 "RS_Spill",
183 "RS_Memory",
184 "RS_Done"
185};
186#endif
187
188// Hysteresis to use when comparing floats.
189// This helps stabilize decisions based on float comparisons.
190const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
191
193 return new RAGreedy();
194}
195
197 return new RAGreedy(Ftor);
198}
199
202 RegAllocBase(F) {
203}
204
206 AU.setPreservesCFG();
231}
232
233//===----------------------------------------------------------------------===//
234// LiveRangeEdit delegate methods
235//===----------------------------------------------------------------------===//
236
237bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
238 LiveInterval &LI = LIS->getInterval(VirtReg);
239 if (VRM->hasPhys(VirtReg)) {
240 Matrix->unassign(LI);
242 return true;
243 }
244 // Unassigned virtreg is probably in the priority queue.
245 // RegAllocBase will erase it after dequeueing.
246 // Nonetheless, clear the live-range so that the debug
247 // dump will show the right state for that VirtReg.
248 LI.clear();
249 return false;
250}
251
252void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
253 if (!VRM->hasPhys(VirtReg))
254 return;
255
256 // Register is assigned, put it back on the queue for reassignment.
257 LiveInterval &LI = LIS->getInterval(VirtReg);
258 Matrix->unassign(LI);
260}
261
262void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
263 ExtraInfo->LRE_DidCloneVirtReg(New, Old);
264}
265
267 // Cloning a register we haven't even heard about yet? Just ignore it.
268 if (!Info.inBounds(Old))
269 return;
270
271 // LRE may clone a virtual register because dead code elimination causes it to
272 // be split into connected components. The new components are much smaller
273 // than the original, so they should get a new chance at being assigned.
274 // same stage as the parent.
275 Info[Old].Stage = RS_Assign;
276 Info.grow(New.id());
277 Info[New] = Info[Old];
278}
279
281 SpillerInstance.reset();
282 GlobalCand.clear();
283}
284
285void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); }
286
287void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
288 // Prioritize live ranges by size, assigning larger ranges first.
289 // The queue holds (size, reg) pairs.
290 const Register Reg = LI->reg();
291 assert(Reg.isVirtual() && "Can only enqueue virtual registers");
292
293 auto Stage = ExtraInfo->getOrInitStage(Reg);
294 if (Stage == RS_New) {
295 Stage = RS_Assign;
296 ExtraInfo->setStage(Reg, Stage);
297 }
298
299 unsigned Ret = PriorityAdvisor->getPriority(*LI);
300
301 // The virtual register number is a tie breaker for same-sized ranges.
302 // Give lower vreg numbers higher priority to assign them first.
303 CurQueue.push(std::make_pair(Ret, ~Reg));
304}
305
306unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const {
307 const unsigned Size = LI.getSize();
308 const Register Reg = LI.reg();
309 unsigned Prio;
310 LiveRangeStage Stage = RA.getExtraInfo().getStage(LI);
311
312 if (Stage == RS_Split) {
313 // Unsplit ranges that couldn't be allocated immediately are deferred until
314 // everything else has been allocated.
315 Prio = Size;
316 } else if (Stage == RS_Memory) {
317 // Memory operand should be considered last.
318 // Change the priority such that Memory operand are assigned in
319 // the reverse order that they came in.
320 // TODO: Make this a member variable and probably do something about hints.
321 static unsigned MemOp = 0;
322 Prio = MemOp++;
323 } else {
324 // Giant live ranges fall back to the global assignment heuristic, which
325 // prevents excessive spilling in pathological cases.
326 const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
327 bool ForceGlobal = RC.GlobalPriority ||
328 (!ReverseLocalAssignment &&
331 unsigned GlobalBit = 0;
332
333 if (Stage == RS_Assign && !ForceGlobal && !LI.empty() &&
334 LIS->intervalIsInOneMBB(LI)) {
335 // Allocate original local ranges in linear instruction order. Since they
336 // are singly defined, this produces optimal coloring in the absence of
337 // global interference and other constraints.
338 if (!ReverseLocalAssignment)
339 Prio = LI.beginIndex().getApproxInstrDistance(Indexes->getLastIndex());
340 else {
341 // Allocating bottom up may allow many short LRGs to be assigned first
342 // to one of the cheap registers. This could be much faster for very
343 // large blocks on targets with many physical registers.
344 Prio = Indexes->getZeroIndex().getApproxInstrDistance(LI.endIndex());
345 }
346 } else {
347 // Allocate global and split ranges in long->short order. Long ranges that
348 // don't fit should be spilled (or split) ASAP so they don't create
349 // interference. Mark a bit to prioritize global above local ranges.
350 Prio = Size;
351 GlobalBit = 1;
352 }
353
354 // Priority bit layout:
355 // 31 RS_Assign priority
356 // 30 Preference priority
357 // if (RegClassPriorityTrumpsGlobalness)
358 // 29-25 AllocPriority
359 // 24 GlobalBit
360 // else
361 // 29 Global bit
362 // 28-24 AllocPriority
363 // 0-23 Size/Instr distance
364
365 // Clamp the size to fit with the priority masking scheme
366 Prio = std::min(Prio, (unsigned)maxUIntN(24));
367 assert(isUInt<5>(RC.AllocationPriority) && "allocation priority overflow");
368
369 if (RegClassPriorityTrumpsGlobalness)
370 Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
371 else
372 Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
373
374 // Mark a higher bit to prioritize global and local above RS_Split.
375 Prio |= (1u << 31);
376
377 // Boost ranges that have a physical register hint.
378 if (VRM->hasKnownPreference(Reg))
379 Prio |= (1u << 30);
380 }
381
382 return Prio;
383}
384
385const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
386
387const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
388 if (CurQueue.empty())
389 return nullptr;
390 LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
391 CurQueue.pop();
392 return LI;
393}
394
395//===----------------------------------------------------------------------===//
396// Direct Assignment
397//===----------------------------------------------------------------------===//
398
399/// tryAssign - Try to assign VirtReg to an available register.
400MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
401 AllocationOrder &Order,
403 const SmallVirtRegSet &FixedRegisters) {
404 MCRegister PhysReg;
405 for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
406 assert(*I);
407 if (!Matrix->checkInterference(VirtReg, *I)) {
408 if (I.isHint())
409 return *I;
410 else
411 PhysReg = *I;
412 }
413 }
414 if (!PhysReg.isValid())
415 return PhysReg;
416
417 // PhysReg is available, but there may be a better choice.
418
419 // If we missed a simple hint, try to cheaply evict interference from the
420 // preferred register.
421 if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
422 if (Order.isHint(Hint)) {
423 MCRegister PhysHint = Hint.asMCReg();
424 LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
425
426 if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint,
427 FixedRegisters)) {
428 evictInterference(VirtReg, PhysHint, NewVRegs);
429 return PhysHint;
430 }
431
432 // We can also split the virtual register in cold blocks.
433 if (trySplitAroundHintReg(PhysHint, VirtReg, NewVRegs, Order))
434 return 0;
435
436 // Record the missed hint, we may be able to recover
437 // at the end if the surrounding allocation changed.
438 SetOfBrokenHints.insert(&VirtReg);
439 }
440
441 // Try to evict interference from a cheaper alternative.
442 uint8_t Cost = RegCosts[PhysReg];
443
444 // Most registers have 0 additional cost.
445 if (!Cost)
446 return PhysReg;
447
448 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
449 << (unsigned)Cost << '\n');
450 MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
451 return CheapReg ? CheapReg : PhysReg;
452}
453
454//===----------------------------------------------------------------------===//
455// Interference eviction
456//===----------------------------------------------------------------------===//
457
459 MCRegister FromReg) const {
460 auto HasRegUnitInterference = [&](MCRegUnit Unit) {
461 // Instantiate a "subquery", not to be confused with the Queries array.
462 LiveIntervalUnion::Query SubQ(VirtReg, Matrix->getLiveUnions()[Unit]);
463 return SubQ.checkInterference();
464 };
465
466 for (MCRegister Reg :
468 if (Reg == FromReg)
469 continue;
470 // If no units have interference, reassignment is possible.
471 if (none_of(TRI->regunits(Reg), HasRegUnitInterference)) {
472 LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
473 << printReg(FromReg, TRI) << " to "
474 << printReg(Reg, TRI) << '\n');
475 return true;
476 }
477 }
478 return false;
479}
480
481/// evictInterference - Evict any interferring registers that prevent VirtReg
482/// from being assigned to Physreg. This assumes that canEvictInterference
483/// returned true.
484void RAGreedy::evictInterference(const LiveInterval &VirtReg,
485 MCRegister PhysReg,
486 SmallVectorImpl<Register> &NewVRegs) {
487 // Make sure that VirtReg has a cascade number, and assign that cascade
488 // number to every evicted register. These live ranges than then only be
489 // evicted by a newer cascade, preventing infinite loops.
490 unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg());
491
492 LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
493 << " interference: Cascade " << Cascade << '\n');
494
495 // Collect all interfering virtregs first.
497 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
498 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
499 // We usually have the interfering VRegs cached so collectInterferingVRegs()
500 // should be fast, we may need to recalculate if when different physregs
501 // overlap the same register unit so we had different SubRanges queried
502 // against it.
504 Intfs.append(IVR.begin(), IVR.end());
505 }
506
507 // Evict them second. This will invalidate the queries.
508 for (const LiveInterval *Intf : Intfs) {
509 // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
510 if (!VRM->hasPhys(Intf->reg()))
511 continue;
512
513 Matrix->unassign(*Intf);
514 assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
515 VirtReg.isSpillable() < Intf->isSpillable()) &&
516 "Cannot decrease cascade number, illegal eviction");
517 ExtraInfo->setCascade(Intf->reg(), Cascade);
518 ++NumEvicted;
519 NewVRegs.push_back(Intf->reg());
520 }
521}
522
523/// Returns true if the given \p PhysReg is a callee saved register and has not
524/// been used for allocation yet.
527 if (!CSR)
528 return false;
529
530 return !Matrix->isPhysRegUsed(PhysReg);
531}
532
533std::optional<unsigned>
535 const AllocationOrder &Order,
536 unsigned CostPerUseLimit) const {
537 unsigned OrderLimit = Order.getOrder().size();
538
539 if (CostPerUseLimit < uint8_t(~0u)) {
540 // Check of any registers in RC are below CostPerUseLimit.
541 const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
542 uint8_t MinCost = RegClassInfo.getMinCost(RC);
543 if (MinCost >= CostPerUseLimit) {
544 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
545 << MinCost << ", no cheaper registers to be found.\n");
546 return std::nullopt;
547 }
548
549 // It is normal for register classes to have a long tail of registers with
550 // the same cost. We don't need to look at them if they're too expensive.
551 if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) {
552 OrderLimit = RegClassInfo.getLastCostChange(RC);
553 LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
554 << " regs.\n");
555 }
556 }
557 return OrderLimit;
558}
559
561 MCRegister PhysReg) const {
562 if (RegCosts[PhysReg] >= CostPerUseLimit)
563 return false;
564 // The first use of a callee-saved register in a function has cost 1.
565 // Don't start using a CSR when the CostPerUseLimit is low.
566 if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
568 dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
570 << '\n');
571 return false;
572 }
573 return true;
574}
575
576/// tryEvict - Try to evict all interferences for a physreg.
577/// @param VirtReg Currently unassigned virtual register.
578/// @param Order Physregs to try.
579/// @return Physreg to assign VirtReg, or 0.
580MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg,
581 AllocationOrder &Order,
583 uint8_t CostPerUseLimit,
584 const SmallVirtRegSet &FixedRegisters) {
587
588 MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate(
589 VirtReg, Order, CostPerUseLimit, FixedRegisters);
590 if (BestPhys.isValid())
591 evictInterference(VirtReg, BestPhys, NewVRegs);
592 return BestPhys;
593}
594
595//===----------------------------------------------------------------------===//
596// Region Splitting
597//===----------------------------------------------------------------------===//
598
599/// addSplitConstraints - Fill out the SplitConstraints vector based on the
600/// interference pattern in Physreg and its aliases. Add the constraints to
601/// SpillPlacement and return the static cost of this split in Cost, assuming
602/// that all preferences in SplitConstraints are met.
603/// Return false if there are no bundles with positive bias.
604bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
606 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
607
608 // Reset interference dependent info.
609 SplitConstraints.resize(UseBlocks.size());
610 BlockFrequency StaticCost = 0;
611 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
612 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
613 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
614
615 BC.Number = BI.MBB->getNumber();
616 Intf.moveToBlock(BC.Number);
618 BC.Exit = (BI.LiveOut &&
622 BC.ChangesValue = BI.FirstDef.isValid();
623
624 if (!Intf.hasInterference())
625 continue;
626
627 // Number of spill code instructions to insert.
628 unsigned Ins = 0;
629
630 // Interference for the live-in value.
631 if (BI.LiveIn) {
632 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
634 ++Ins;
635 } else if (Intf.first() < BI.FirstInstr) {
637 ++Ins;
638 } else if (Intf.first() < BI.LastInstr) {
639 ++Ins;
640 }
641
642 // Abort if the spill cannot be inserted at the MBB' start
643 if (((BC.Entry == SpillPlacement::MustSpill) ||
646 SA->getFirstSplitPoint(BC.Number)))
647 return false;
648 }
649
650 // Interference for the live-out value.
651 if (BI.LiveOut) {
652 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
654 ++Ins;
655 } else if (Intf.last() > BI.LastInstr) {
657 ++Ins;
658 } else if (Intf.last() > BI.FirstInstr) {
659 ++Ins;
660 }
661 }
662
663 // Accumulate the total frequency of inserted spill code.
664 while (Ins--)
665 StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
666 }
667 Cost = StaticCost;
668
669 // Add constraints for use-blocks. Note that these are the only constraints
670 // that may add a positive bias, it is downhill from here.
671 SpillPlacer->addConstraints(SplitConstraints);
672 return SpillPlacer->scanActiveBundles();
673}
674
675/// addThroughConstraints - Add constraints and links to SpillPlacer from the
676/// live-through blocks in Blocks.
677bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
679 const unsigned GroupSize = 8;
680 SpillPlacement::BlockConstraint BCS[GroupSize];
681 unsigned TBS[GroupSize];
682 unsigned B = 0, T = 0;
683
684 for (unsigned Number : Blocks) {
685 Intf.moveToBlock(Number);
686
687 if (!Intf.hasInterference()) {
688 assert(T < GroupSize && "Array overflow");
689 TBS[T] = Number;
690 if (++T == GroupSize) {
691 SpillPlacer->addLinks(ArrayRef(TBS, T));
692 T = 0;
693 }
694 continue;
695 }
696
697 assert(B < GroupSize && "Array overflow");
698 BCS[B].Number = Number;
699
700 // Abort if the spill cannot be inserted at the MBB' start
702 auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
703 if (FirstNonDebugInstr != MBB->end() &&
705 SA->getFirstSplitPoint(Number)))
706 return false;
707 // Interference for the live-in value.
708 if (Intf.first() <= Indexes->getMBBStartIdx(Number))
710 else
712
713 // Interference for the live-out value.
714 if (Intf.last() >= SA->getLastSplitPoint(Number))
716 else
718
719 if (++B == GroupSize) {
720 SpillPlacer->addConstraints(ArrayRef(BCS, B));
721 B = 0;
722 }
723 }
724
725 SpillPlacer->addConstraints(ArrayRef(BCS, B));
726 SpillPlacer->addLinks(ArrayRef(TBS, T));
727 return true;
728}
729
730bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
731 // Keep track of through blocks that have not been added to SpillPlacer.
732 BitVector Todo = SA->getThroughBlocks();
733 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
734 unsigned AddedTo = 0;
735#ifndef NDEBUG
736 unsigned Visited = 0;
737#endif
738
739 unsigned long Budget = GrowRegionComplexityBudget;
740 while (true) {
741 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
742 // Find new through blocks in the periphery of PrefRegBundles.
743 for (unsigned Bundle : NewBundles) {
744 // Look at all blocks connected to Bundle in the full graph.
745 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
746 // Limit compilation time by bailing out after we use all our budget.
747 if (Blocks.size() >= Budget)
748 return false;
749 Budget -= Blocks.size();
750 for (unsigned Block : Blocks) {
751 if (!Todo.test(Block))
752 continue;
753 Todo.reset(Block);
754 // This is a new through block. Add it to SpillPlacer later.
755 ActiveBlocks.push_back(Block);
756#ifndef NDEBUG
757 ++Visited;
758#endif
759 }
760 }
761 // Any new blocks to add?
762 if (ActiveBlocks.size() == AddedTo)
763 break;
764
765 // Compute through constraints from the interference, or assume that all
766 // through blocks prefer spilling when forming compact regions.
767 auto NewBlocks = ArrayRef(ActiveBlocks).slice(AddedTo);
768 if (Cand.PhysReg) {
769 if (!addThroughConstraints(Cand.Intf, NewBlocks))
770 return false;
771 } else
772 // Provide a strong negative bias on through blocks to prevent unwanted
773 // liveness on loop backedges.
774 SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
775 AddedTo = ActiveBlocks.size();
776
777 // Perhaps iterating can enable more bundles?
778 SpillPlacer->iterate();
779 }
780 LLVM_DEBUG(dbgs() << ", v=" << Visited);
781 return true;
782}
783
784/// calcCompactRegion - Compute the set of edge bundles that should be live
785/// when splitting the current live range into compact regions. Compact
786/// regions can be computed without looking at interference. They are the
787/// regions formed by removing all the live-through blocks from the live range.
788///
789/// Returns false if the current live range is already compact, or if the
790/// compact regions would form single block regions anyway.
791bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
792 // Without any through blocks, the live range is already compact.
793 if (!SA->getNumThroughBlocks())
794 return false;
795
796 // Compact regions don't correspond to any physreg.
797 Cand.reset(IntfCache, MCRegister::NoRegister);
798
799 LLVM_DEBUG(dbgs() << "Compact region bundles");
800
801 // Use the spill placer to determine the live bundles. GrowRegion pretends
802 // that all the through blocks have interference when PhysReg is unset.
803 SpillPlacer->prepare(Cand.LiveBundles);
804
805 // The static split cost will be zero since Cand.Intf reports no interference.
807 if (!addSplitConstraints(Cand.Intf, Cost)) {
808 LLVM_DEBUG(dbgs() << ", none.\n");
809 return false;
810 }
811
812 if (!growRegion(Cand)) {
813 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
814 return false;
815 }
816
817 SpillPlacer->finish();
818
819 if (!Cand.LiveBundles.any()) {
820 LLVM_DEBUG(dbgs() << ", none.\n");
821 return false;
822 }
823
824 LLVM_DEBUG({
825 for (int I : Cand.LiveBundles.set_bits())
826 dbgs() << " EB#" << I;
827 dbgs() << ".\n";
828 });
829 return true;
830}
831
832/// calcSpillCost - Compute how expensive it would be to split the live range in
833/// SA around all use blocks instead of forming bundle regions.
834BlockFrequency RAGreedy::calcSpillCost() {
836 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
837 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
838 unsigned Number = BI.MBB->getNumber();
839 // We normally only need one spill instruction - a load or a store.
840 Cost += SpillPlacer->getBlockFrequency(Number);
841
842 // Unless the value is redefined in the block.
843 if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
844 Cost += SpillPlacer->getBlockFrequency(Number);
845 }
846 return Cost;
847}
848
849/// calcGlobalSplitCost - Return the global split cost of following the split
850/// pattern in LiveBundles. This cost should be added to the local cost of the
851/// interference pattern in SplitConstraints.
852///
853BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
854 const AllocationOrder &Order) {
855 BlockFrequency GlobalCost = 0;
856 const BitVector &LiveBundles = Cand.LiveBundles;
857 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
858 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
859 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
860 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
861 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)];
862 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
863 unsigned Ins = 0;
864
865 Cand.Intf.moveToBlock(BC.Number);
866
867 if (BI.LiveIn)
868 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
869 if (BI.LiveOut)
870 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
871 while (Ins--)
872 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
873 }
874
875 for (unsigned Number : Cand.ActiveBlocks) {
876 bool RegIn = LiveBundles[Bundles->getBundle(Number, false)];
877 bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
878 if (!RegIn && !RegOut)
879 continue;
880 if (RegIn && RegOut) {
881 // We need double spill code if this block has interference.
882 Cand.Intf.moveToBlock(Number);
883 if (Cand.Intf.hasInterference()) {
884 GlobalCost += SpillPlacer->getBlockFrequency(Number);
885 GlobalCost += SpillPlacer->getBlockFrequency(Number);
886 }
887 continue;
888 }
889 // live-in / stack-out or stack-in live-out.
890 GlobalCost += SpillPlacer->getBlockFrequency(Number);
891 }
892 return GlobalCost;
893}
894
895/// splitAroundRegion - Split the current live range around the regions
896/// determined by BundleCand and GlobalCand.
897///
898/// Before calling this function, GlobalCand and BundleCand must be initialized
899/// so each bundle is assigned to a valid candidate, or NoCand for the
900/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor
901/// objects must be initialized for the current live range, and intervals
902/// created for the used candidates.
903///
904/// @param LREdit The LiveRangeEdit object handling the current split.
905/// @param UsedCands List of used GlobalCand entries. Every BundleCand value
906/// must appear in this list.
907void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
908 ArrayRef<unsigned> UsedCands) {
909 // These are the intervals created for new global ranges. We may create more
910 // intervals for local ranges.
911 const unsigned NumGlobalIntvs = LREdit.size();
912 LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs
913 << " globals.\n");
914 assert(NumGlobalIntvs && "No global intervals configured");
915
916 // Isolate even single instructions when dealing with a proper sub-class.
917 // That guarantees register class inflation for the stack interval because it
918 // is all copies.
919 Register Reg = SA->getParent().reg();
920 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
921
922 // First handle all the blocks with uses.
923 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
924 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
925 unsigned Number = BI.MBB->getNumber();
926 unsigned IntvIn = 0, IntvOut = 0;
927 SlotIndex IntfIn, IntfOut;
928 if (BI.LiveIn) {
929 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
930 if (CandIn != NoCand) {
931 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
932 IntvIn = Cand.IntvIdx;
933 Cand.Intf.moveToBlock(Number);
934 IntfIn = Cand.Intf.first();
935 }
936 }
937 if (BI.LiveOut) {
938 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
939 if (CandOut != NoCand) {
940 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
941 IntvOut = Cand.IntvIdx;
942 Cand.Intf.moveToBlock(Number);
943 IntfOut = Cand.Intf.last();
944 }
945 }
946
947 // Create separate intervals for isolated blocks with multiple uses.
948 if (!IntvIn && !IntvOut) {
949 LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
950 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
951 SE->splitSingleBlock(BI);
952 continue;
953 }
954
955 if (IntvIn && IntvOut)
956 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
957 else if (IntvIn)
958 SE->splitRegInBlock(BI, IntvIn, IntfIn);
959 else
960 SE->splitRegOutBlock(BI, IntvOut, IntfOut);
961 }
962
963 // Handle live-through blocks. The relevant live-through blocks are stored in
964 // the ActiveBlocks list with each candidate. We need to filter out
965 // duplicates.
966 BitVector Todo = SA->getThroughBlocks();
967 for (unsigned UsedCand : UsedCands) {
968 ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks;
969 for (unsigned Number : Blocks) {
970 if (!Todo.test(Number))
971 continue;
972 Todo.reset(Number);
973
974 unsigned IntvIn = 0, IntvOut = 0;
975 SlotIndex IntfIn, IntfOut;
976
977 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
978 if (CandIn != NoCand) {
979 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
980 IntvIn = Cand.IntvIdx;
981 Cand.Intf.moveToBlock(Number);
982 IntfIn = Cand.Intf.first();
983 }
984
985 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
986 if (CandOut != NoCand) {
987 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
988 IntvOut = Cand.IntvIdx;
989 Cand.Intf.moveToBlock(Number);
990 IntfOut = Cand.Intf.last();
991 }
992 if (!IntvIn && !IntvOut)
993 continue;
994 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
995 }
996 }
997
998 ++NumGlobalSplits;
999
1001 SE->finish(&IntvMap);
1002 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1003
1004 unsigned OrigBlocks = SA->getNumLiveBlocks();
1005
1006 // Sort out the new intervals created by splitting. We get four kinds:
1007 // - Remainder intervals should not be split again.
1008 // - Candidate intervals can be assigned to Cand.PhysReg.
1009 // - Block-local splits are candidates for local splitting.
1010 // - DCE leftovers should go back on the queue.
1011 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1012 const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
1013
1014 // Ignore old intervals from DCE.
1015 if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New)
1016 continue;
1017
1018 // Remainder interval. Don't try splitting again, spill if it doesn't
1019 // allocate.
1020 if (IntvMap[I] == 0) {
1021 ExtraInfo->setStage(Reg, RS_Spill);
1022 continue;
1023 }
1024
1025 // Global intervals. Allow repeated splitting as long as the number of live
1026 // blocks is strictly decreasing.
1027 if (IntvMap[I] < NumGlobalIntvs) {
1028 if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
1029 LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
1030 << " blocks as original.\n");
1031 // Don't allow repeated splitting as a safe guard against looping.
1032 ExtraInfo->setStage(Reg, RS_Split2);
1033 }
1034 continue;
1035 }
1036
1037 // Other intervals are treated as new. This includes local intervals created
1038 // for blocks with multiple uses, and anything created by DCE.
1039 }
1040
1041 if (VerifyEnabled)
1042 MF->verify(this, "After splitting live range around region");
1043}
1044
1045MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
1046 AllocationOrder &Order,
1047 SmallVectorImpl<Register> &NewVRegs) {
1048 if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
1050 unsigned NumCands = 0;
1051 BlockFrequency SpillCost = calcSpillCost();
1052 BlockFrequency BestCost;
1053
1054 // Check if we can split this live range around a compact region.
1055 bool HasCompact = calcCompactRegion(GlobalCand.front());
1056 if (HasCompact) {
1057 // Yes, keep GlobalCand[0] as the compact region candidate.
1058 NumCands = 1;
1060 } else {
1061 // No benefit from the compact region, our fallback will be per-block
1062 // splitting. Make sure we find a solution that is cheaper than spilling.
1063 BestCost = SpillCost;
1064 LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = ";
1065 MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
1066 }
1067
1068 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
1069 NumCands, false /*IgnoreCSR*/);
1070
1071 // No solutions found, fall back to single block splitting.
1072 if (!HasCompact && BestCand == NoCand)
1074
1075 return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
1076}
1077
1078unsigned
1079RAGreedy::calculateRegionSplitCostAroundReg(MCPhysReg PhysReg,
1080 AllocationOrder &Order,
1081 BlockFrequency &BestCost,
1082 unsigned &NumCands,
1083 unsigned &BestCand) {
1084 // Discard bad candidates before we run out of interference cache cursors.
1085 // This will only affect register classes with a lot of registers (>32).
1086 if (NumCands == IntfCache.getMaxCursors()) {
1087 unsigned WorstCount = ~0u;
1088 unsigned Worst = 0;
1089 for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
1090 if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
1091 continue;
1092 unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
1093 if (Count < WorstCount) {
1094 Worst = CandIndex;
1095 WorstCount = Count;
1096 }
1097 }
1098 --NumCands;
1099 GlobalCand[Worst] = GlobalCand[NumCands];
1100 if (BestCand == NumCands)
1101 BestCand = Worst;
1102 }
1103
1104 if (GlobalCand.size() <= NumCands)
1105 GlobalCand.resize(NumCands+1);
1106 GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1107 Cand.reset(IntfCache, PhysReg);
1108
1109 SpillPlacer->prepare(Cand.LiveBundles);
1111 if (!addSplitConstraints(Cand.Intf, Cost)) {
1112 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
1113 return BestCand;
1114 }
1115 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = ";
1116 MBFI->printBlockFreq(dbgs(), Cost));
1117 if (Cost >= BestCost) {
1118 LLVM_DEBUG({
1119 if (BestCand == NoCand)
1120 dbgs() << " worse than no bundles\n";
1121 else
1122 dbgs() << " worse than "
1123 << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
1124 });
1125 return BestCand;
1126 }
1127 if (!growRegion(Cand)) {
1128 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
1129 return BestCand;
1130 }
1131
1132 SpillPlacer->finish();
1133
1134 // No live bundles, defer to splitSingleBlocks().
1135 if (!Cand.LiveBundles.any()) {
1136 LLVM_DEBUG(dbgs() << " no bundles.\n");
1137 return BestCand;
1138 }
1139
1140 Cost += calcGlobalSplitCost(Cand, Order);
1141 LLVM_DEBUG({
1142 dbgs() << ", total = ";
1143 MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
1144 for (int I : Cand.LiveBundles.set_bits())
1145 dbgs() << " EB#" << I;
1146 dbgs() << ".\n";
1147 });
1148 if (Cost < BestCost) {
1149 BestCand = NumCands;
1150 BestCost = Cost;
1151 }
1152 ++NumCands;
1153
1154 return BestCand;
1155}
1156
1157unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
1158 AllocationOrder &Order,
1159 BlockFrequency &BestCost,
1160 unsigned &NumCands,
1161 bool IgnoreCSR) {
1162 unsigned BestCand = NoCand;
1163 for (MCPhysReg PhysReg : Order) {
1164 assert(PhysReg);
1165 if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
1166 continue;
1167
1168 calculateRegionSplitCostAroundReg(PhysReg, Order, BestCost, NumCands,
1169 BestCand);
1170 }
1171
1172 return BestCand;
1173}
1174
1175unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
1176 bool HasCompact,
1177 SmallVectorImpl<Register> &NewVRegs) {
1178 SmallVector<unsigned, 8> UsedCands;
1179 // Prepare split editor.
1180 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1181 SE->reset(LREdit, SplitSpillMode);
1182
1183 // Assign all edge bundles to the preferred candidate, or NoCand.
1184 BundleCand.assign(Bundles->getNumBundles(), NoCand);
1185
1186 // Assign bundles for the best candidate region.
1187 if (BestCand != NoCand) {
1188 GlobalSplitCandidate &Cand = GlobalCand[BestCand];
1189 if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
1190 UsedCands.push_back(BestCand);
1191 Cand.IntvIdx = SE->openIntv();
1192 LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
1193 << B << " bundles, intv " << Cand.IntvIdx << ".\n");
1194 (void)B;
1195 }
1196 }
1197
1198 // Assign bundles for the compact region.
1199 if (HasCompact) {
1200 GlobalSplitCandidate &Cand = GlobalCand.front();
1201 assert(!Cand.PhysReg && "Compact region has no physreg");
1202 if (unsigned B = Cand.getBundles(BundleCand, 0)) {
1203 UsedCands.push_back(0);
1204 Cand.IntvIdx = SE->openIntv();
1205 LLVM_DEBUG(dbgs() << "Split for compact region in " << B
1206 << " bundles, intv " << Cand.IntvIdx << ".\n");
1207 (void)B;
1208 }
1209 }
1210
1211 splitAroundRegion(LREdit, UsedCands);
1212 return 0;
1213}
1214
1215// VirtReg has a physical Hint, this function tries to split VirtReg around
1216// Hint if we can place new COPY instructions in cold blocks.
1217bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
1218 const LiveInterval &VirtReg,
1219 SmallVectorImpl<Register> &NewVRegs,
1220 AllocationOrder &Order) {
1221 BlockFrequency Cost = 0;
1222 Register Reg = VirtReg.reg();
1223
1224 // Compute the cost of assigning a non Hint physical register to VirtReg.
1225 // We define it as the total frequency of broken COPY instructions to/from
1226 // Hint register, and after split, they can be deleted.
1227 for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
1228 if (!TII->isFullCopyInstr(Instr))
1229 continue;
1230 Register OtherReg = Instr.getOperand(1).getReg();
1231 if (OtherReg == Reg) {
1232 OtherReg = Instr.getOperand(0).getReg();
1233 if (OtherReg == Reg)
1234 continue;
1235 // Check if VirtReg interferes with OtherReg after this COPY instruction.
1236 if (VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot()))
1237 continue;
1238 }
1239 MCRegister OtherPhysReg =
1240 OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
1241 if (OtherPhysReg == Hint)
1242 Cost += MBFI->getBlockFreq(Instr.getParent());
1243 }
1244
1245 // Decrease the cost so it will be split in colder blocks.
1247 Cost *= Threshold;
1248 if (Cost == 0)
1249 return false;
1250
1251 unsigned NumCands = 0;
1252 unsigned BestCand = NoCand;
1253 SA->analyze(&VirtReg);
1254 calculateRegionSplitCostAroundReg(Hint, Order, Cost, NumCands, BestCand);
1255 if (BestCand == NoCand)
1256 return false;
1257
1258 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
1259 return true;
1260}
1261
1262//===----------------------------------------------------------------------===//
1263// Per-Block Splitting
1264//===----------------------------------------------------------------------===//
1265
1266/// tryBlockSplit - Split a global live range around every block with uses. This
1267/// creates a lot of local live ranges, that will be split by tryLocalSplit if
1268/// they don't allocate.
1269unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg,
1270 AllocationOrder &Order,
1271 SmallVectorImpl<Register> &NewVRegs) {
1272 assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
1273 Register Reg = VirtReg.reg();
1274 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1275 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1276 SE->reset(LREdit, SplitSpillMode);
1277 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1278 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
1279 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1280 SE->splitSingleBlock(BI);
1281 }
1282 // No blocks were split.
1283 if (LREdit.empty())
1284 return 0;
1285
1286 // We did split for some blocks.
1288 SE->finish(&IntvMap);
1289
1290 // Tell LiveDebugVariables about the new ranges.
1291 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1292
1293 // Sort out the new intervals created by splitting. The remainder interval
1294 // goes straight to spilling, the new local ranges get to stay RS_New.
1295 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1296 const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
1297 if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
1298 ExtraInfo->setStage(LI, RS_Spill);
1299 }
1300
1301 if (VerifyEnabled)
1302 MF->verify(this, "After splitting live range around basic blocks");
1303 return 0;
1304}
1305
1306//===----------------------------------------------------------------------===//
1307// Per-Instruction Splitting
1308//===----------------------------------------------------------------------===//
1309
1310/// Get the number of allocatable registers that match the constraints of \p Reg
1311/// on \p MI and that are also in \p SuperRC.
1313 const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
1314 const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
1315 const RegisterClassInfo &RCI) {
1316 assert(SuperRC && "Invalid register class");
1317
1318 const TargetRegisterClass *ConstrainedRC =
1319 MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
1320 /* ExploreBundle */ true);
1321 if (!ConstrainedRC)
1322 return 0;
1323 return RCI.getNumAllocatableRegs(ConstrainedRC);
1324}
1325
1327 const TargetRegisterInfo &TRI,
1328 const MachineInstr &MI, Register Reg) {
1329 LaneBitmask Mask;
1330 for (const MachineOperand &MO : MI.operands()) {
1331 if (!MO.isReg() || MO.getReg() != Reg)
1332 continue;
1333
1334 unsigned SubReg = MO.getSubReg();
1335 if (SubReg == 0 && MO.isUse()) {
1336 Mask |= MRI.getMaxLaneMaskForVReg(Reg);
1337 continue;
1338 }
1339
1341 if (MO.isDef()) {
1342 if (!MO.isUndef())
1343 Mask |= ~SubRegMask;
1344 } else
1345 Mask |= SubRegMask;
1346 }
1347
1348 return Mask;
1349}
1350
1351/// Return true if \p MI at \P Use reads a subset of the lanes live in \p
1352/// VirtReg.
1354 const MachineInstr *MI, const LiveInterval &VirtReg,
1356 const TargetInstrInfo *TII) {
1357 // Early check the common case.
1358 auto DestSrc = TII->isCopyInstr(*MI);
1359 if (DestSrc &&
1360 DestSrc->Destination->getSubReg() == DestSrc->Source->getSubReg())
1361 return false;
1362
1363 // FIXME: We're only considering uses, but should be consider defs too?
1364 LaneBitmask ReadMask = getInstReadLaneMask(MRI, *TRI, *MI, VirtReg.reg());
1365
1366 LaneBitmask LiveAtMask;
1367 for (const LiveInterval::SubRange &S : VirtReg.subranges()) {
1368 if (S.liveAt(Use))
1369 LiveAtMask |= S.LaneMask;
1370 }
1371
1372 // If the live lanes aren't different from the lanes used by the instruction,
1373 // this doesn't help.
1374 return (ReadMask & ~(LiveAtMask & TRI->getCoveringLanes())).any();
1375}
1376
1377/// tryInstructionSplit - Split a live range around individual instructions.
1378/// This is normally not worthwhile since the spiller is doing essentially the
1379/// same thing. However, when the live range is in a constrained register
1380/// class, it may help to insert copies such that parts of the live range can
1381/// be moved to a larger register class.
1382///
1383/// This is similar to spilling to a larger register class.
1384unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
1385 AllocationOrder &Order,
1386 SmallVectorImpl<Register> &NewVRegs) {
1387 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1388 // There is no point to this if there are no larger sub-classes.
1389
1390 bool SplitSubClass = true;
1391 if (!RegClassInfo.isProperSubClass(CurRC)) {
1392 if (!VirtReg.hasSubRanges())
1393 return 0;
1394 SplitSubClass = false;
1395 }
1396
1397 // Always enable split spill mode, since we're effectively spilling to a
1398 // register.
1399 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1400 SE->reset(LREdit, SplitEditor::SM_Size);
1401
1402 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1403 if (Uses.size() <= 1)
1404 return 0;
1405
1406 LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
1407 << " individual instrs.\n");
1408
1409 const TargetRegisterClass *SuperRC =
1410 TRI->getLargestLegalSuperClass(CurRC, *MF);
1411 unsigned SuperRCNumAllocatableRegs =
1413 // Split around every non-copy instruction if this split will relax
1414 // the constraints on the virtual register.
1415 // Otherwise, splitting just inserts uncoalescable copies that do not help
1416 // the allocation.
1417 for (const SlotIndex Use : Uses) {
1418 if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) {
1419 if (TII->isFullCopyInstr(*MI) ||
1420 (SplitSubClass &&
1421 SuperRCNumAllocatableRegs ==
1422 getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
1423 TII, TRI, RegClassInfo)) ||
1424 // TODO: Handle split for subranges with subclass constraints?
1425 (!SplitSubClass && VirtReg.hasSubRanges() &&
1426 !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use, TII))) {
1427 LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
1428 continue;
1429 }
1430 }
1431 SE->openIntv();
1432 SlotIndex SegStart = SE->enterIntvBefore(Use);
1433 SlotIndex SegStop = SE->leaveIntvAfter(Use);
1434 SE->useIntv(SegStart, SegStop);
1435 }
1436
1437 if (LREdit.empty()) {
1438 LLVM_DEBUG(dbgs() << "All uses were copies.\n");
1439 return 0;
1440 }
1441
1443 SE->finish(&IntvMap);
1444 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1445 // Assign all new registers to RS_Spill. This was the last chance.
1446 ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill);
1447 return 0;
1448}
1449
1450//===----------------------------------------------------------------------===//
1451// Local Splitting
1452//===----------------------------------------------------------------------===//
1453
1454/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
1455/// in order to use PhysReg between two entries in SA->UseSlots.
1456///
1457/// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
1458///
1459void RAGreedy::calcGapWeights(MCRegister PhysReg,
1460 SmallVectorImpl<float> &GapWeight) {
1461 assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1462 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1463 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1464 const unsigned NumGaps = Uses.size()-1;
1465
1466 // Start and end points for the interference check.
1467 SlotIndex StartIdx =
1469 SlotIndex StopIdx =
1471
1472 GapWeight.assign(NumGaps, 0.0f);
1473
1474 // Add interference from each overlapping register.
1475 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1476 if (!Matrix->query(const_cast<LiveInterval &>(SA->getParent()), Unit)
1478 continue;
1479
1480 // We know that VirtReg is a continuous interval from FirstInstr to
1481 // LastInstr, so we don't need InterferenceQuery.
1482 //
1483 // Interference that overlaps an instruction is counted in both gaps
1484 // surrounding the instruction. The exception is interference before
1485 // StartIdx and after StopIdx.
1486 //
1488 Matrix->getLiveUnions()[Unit].find(StartIdx);
1489 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
1490 // Skip the gaps before IntI.
1491 while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
1492 if (++Gap == NumGaps)
1493 break;
1494 if (Gap == NumGaps)
1495 break;
1496
1497 // Update the gaps covered by IntI.
1498 const float weight = IntI.value()->weight();
1499 for (; Gap != NumGaps; ++Gap) {
1500 GapWeight[Gap] = std::max(GapWeight[Gap], weight);
1501 if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
1502 break;
1503 }
1504 if (Gap == NumGaps)
1505 break;
1506 }
1507 }
1508
1509 // Add fixed interference.
1510 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1511 const LiveRange &LR = LIS->getRegUnit(Unit);
1512 LiveRange::const_iterator I = LR.find(StartIdx);
1514
1515 // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
1516 for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
1517 while (Uses[Gap+1].getBoundaryIndex() < I->start)
1518 if (++Gap == NumGaps)
1519 break;
1520 if (Gap == NumGaps)
1521 break;
1522
1523 for (; Gap != NumGaps; ++Gap) {
1524 GapWeight[Gap] = huge_valf;
1525 if (Uses[Gap+1].getBaseIndex() >= I->end)
1526 break;
1527 }
1528 if (Gap == NumGaps)
1529 break;
1530 }
1531 }
1532}
1533
1534/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
1535/// basic block.
1536///
1537unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
1538 AllocationOrder &Order,
1539 SmallVectorImpl<Register> &NewVRegs) {
1540 // TODO: the function currently only handles a single UseBlock; it should be
1541 // possible to generalize.
1542 if (SA->getUseBlocks().size() != 1)
1543 return 0;
1544
1545 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1546
1547 // Note that it is possible to have an interval that is live-in or live-out
1548 // while only covering a single block - A phi-def can use undef values from
1549 // predecessors, and the block could be a single-block loop.
1550 // We don't bother doing anything clever about such a case, we simply assume
1551 // that the interval is continuous from FirstInstr to LastInstr. We should
1552 // make sure that we don't do anything illegal to such an interval, though.
1553
1554 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1555 if (Uses.size() <= 2)
1556 return 0;
1557 const unsigned NumGaps = Uses.size()-1;
1558
1559 LLVM_DEBUG({
1560 dbgs() << "tryLocalSplit: ";
1561 for (const auto &Use : Uses)
1562 dbgs() << ' ' << Use;
1563 dbgs() << '\n';
1564 });
1565
1566 // If VirtReg is live across any register mask operands, compute a list of
1567 // gaps with register masks.
1568 SmallVector<unsigned, 8> RegMaskGaps;
1569 if (Matrix->checkRegMaskInterference(VirtReg)) {
1570 // Get regmask slots for the whole block.
1572 LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
1573 // Constrain to VirtReg's live range.
1574 unsigned RI =
1575 llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
1576 unsigned RE = RMS.size();
1577 for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
1578 // Look for Uses[I] <= RMS <= Uses[I + 1].
1580 if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
1581 continue;
1582 // Skip a regmask on the same instruction as the last use. It doesn't
1583 // overlap the live range.
1584 if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
1585 break;
1586 LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
1587 << Uses[I + 1]);
1588 RegMaskGaps.push_back(I);
1589 // Advance ri to the next gap. A regmask on one of the uses counts in
1590 // both gaps.
1591 while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
1592 ++RI;
1593 }
1594 LLVM_DEBUG(dbgs() << '\n');
1595 }
1596
1597 // Since we allow local split results to be split again, there is a risk of
1598 // creating infinite loops. It is tempting to require that the new live
1599 // ranges have less instructions than the original. That would guarantee
1600 // convergence, but it is too strict. A live range with 3 instructions can be
1601 // split 2+3 (including the COPY), and we want to allow that.
1602 //
1603 // Instead we use these rules:
1604 //
1605 // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
1606 // noop split, of course).
1607 // 2. Require progress be made for ranges with getStage() == RS_Split2. All
1608 // the new ranges must have fewer instructions than before the split.
1609 // 3. New ranges with the same number of instructions are marked RS_Split2,
1610 // smaller ranges are marked RS_New.
1611 //
1612 // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
1613 // excessive splitting and infinite loops.
1614 //
1615 bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2;
1616
1617 // Best split candidate.
1618 unsigned BestBefore = NumGaps;
1619 unsigned BestAfter = 0;
1620 float BestDiff = 0;
1621
1622 const float blockFreq =
1623 SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
1624 (1.0f / MBFI->getEntryFreq());
1625 SmallVector<float, 8> GapWeight;
1626
1627 for (MCPhysReg PhysReg : Order) {
1628 assert(PhysReg);
1629 // Keep track of the largest spill weight that would need to be evicted in
1630 // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
1631 calcGapWeights(PhysReg, GapWeight);
1632
1633 // Remove any gaps with regmask clobbers.
1634 if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
1635 for (unsigned I = 0, E = RegMaskGaps.size(); I != E; ++I)
1636 GapWeight[RegMaskGaps[I]] = huge_valf;
1637
1638 // Try to find the best sequence of gaps to close.
1639 // The new spill weight must be larger than any gap interference.
1640
1641 // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
1642 unsigned SplitBefore = 0, SplitAfter = 1;
1643
1644 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
1645 // It is the spill weight that needs to be evicted.
1646 float MaxGap = GapWeight[0];
1647
1648 while (true) {
1649 // Live before/after split?
1650 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
1651 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
1652
1653 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
1654 << '-' << Uses[SplitAfter] << " I=" << MaxGap);
1655
1656 // Stop before the interval gets so big we wouldn't be making progress.
1657 if (!LiveBefore && !LiveAfter) {
1658 LLVM_DEBUG(dbgs() << " all\n");
1659 break;
1660 }
1661 // Should the interval be extended or shrunk?
1662 bool Shrink = true;
1663
1664 // How many gaps would the new range have?
1665 unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
1666
1667 // Legally, without causing looping?
1668 bool Legal = !ProgressRequired || NewGaps < NumGaps;
1669
1670 if (Legal && MaxGap < huge_valf) {
1671 // Estimate the new spill weight. Each instruction reads or writes the
1672 // register. Conservatively assume there are no read-modify-write
1673 // instructions.
1674 //
1675 // Try to guess the size of the new interval.
1676 const float EstWeight = normalizeSpillWeight(
1677 blockFreq * (NewGaps + 1),
1678 Uses[SplitBefore].distance(Uses[SplitAfter]) +
1679 (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
1680 1);
1681 // Would this split be possible to allocate?
1682 // Never allocate all gaps, we wouldn't be making progress.
1683 LLVM_DEBUG(dbgs() << " w=" << EstWeight);
1684 if (EstWeight * Hysteresis >= MaxGap) {
1685 Shrink = false;
1686 float Diff = EstWeight - MaxGap;
1687 if (Diff > BestDiff) {
1688 LLVM_DEBUG(dbgs() << " (best)");
1689 BestDiff = Hysteresis * Diff;
1690 BestBefore = SplitBefore;
1691 BestAfter = SplitAfter;
1692 }
1693 }
1694 }
1695
1696 // Try to shrink.
1697 if (Shrink) {
1698 if (++SplitBefore < SplitAfter) {
1699 LLVM_DEBUG(dbgs() << " shrink\n");
1700 // Recompute the max when necessary.
1701 if (GapWeight[SplitBefore - 1] >= MaxGap) {
1702 MaxGap = GapWeight[SplitBefore];
1703 for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
1704 MaxGap = std::max(MaxGap, GapWeight[I]);
1705 }
1706 continue;
1707 }
1708 MaxGap = 0;
1709 }
1710
1711 // Try to extend the interval.
1712 if (SplitAfter >= NumGaps) {
1713 LLVM_DEBUG(dbgs() << " end\n");
1714 break;
1715 }
1716
1717 LLVM_DEBUG(dbgs() << " extend\n");
1718 MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
1719 }
1720 }
1721
1722 // Didn't find any candidates?
1723 if (BestBefore == NumGaps)
1724 return 0;
1725
1726 LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-'
1727 << Uses[BestAfter] << ", " << BestDiff << ", "
1728 << (BestAfter - BestBefore + 1) << " instrs\n");
1729
1730 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1731 SE->reset(LREdit);
1732
1733 SE->openIntv();
1734 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
1735 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
1736 SE->useIntv(SegStart, SegStop);
1738 SE->finish(&IntvMap);
1739 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1740 // If the new range has the same number of instructions as before, mark it as
1741 // RS_Split2 so the next split will be forced to make progress. Otherwise,
1742 // leave the new intervals as RS_New so they can compete.
1743 bool LiveBefore = BestBefore != 0 || BI.LiveIn;
1744 bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
1745 unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
1746 if (NewGaps >= NumGaps) {
1747 LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
1748 assert(!ProgressRequired && "Didn't make progress when it was required.");
1749 for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
1750 if (IntvMap[I] == 1) {
1751 ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
1752 LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
1753 }
1754 LLVM_DEBUG(dbgs() << '\n');
1755 }
1756 ++NumLocalSplits;
1757
1758 return 0;
1759}
1760
1761//===----------------------------------------------------------------------===//
1762// Live Range Splitting
1763//===----------------------------------------------------------------------===//
1764
1765/// trySplit - Try to split VirtReg or one of its interferences, making it
1766/// assignable.
1767/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
1768unsigned RAGreedy::trySplit(const LiveInterval &VirtReg, AllocationOrder &Order,
1769 SmallVectorImpl<Register> &NewVRegs,
1770 const SmallVirtRegSet &FixedRegisters) {
1771 // Ranges must be Split2 or less.
1772 if (ExtraInfo->getStage(VirtReg) >= RS_Spill)
1773 return 0;
1774
1775 // Local intervals are handled separately.
1776 if (LIS->intervalIsInOneMBB(VirtReg)) {
1777 NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
1779 SA->analyze(&VirtReg);
1780 Register PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
1781 if (PhysReg || !NewVRegs.empty())
1782 return PhysReg;
1783 return tryInstructionSplit(VirtReg, Order, NewVRegs);
1784 }
1785
1786 NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
1788
1789 SA->analyze(&VirtReg);
1790
1791 // First try to split around a region spanning multiple blocks. RS_Split2
1792 // ranges already made dubious progress with region splitting, so they go
1793 // straight to single block splitting.
1794 if (ExtraInfo->getStage(VirtReg) < RS_Split2) {
1795 MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
1796 if (PhysReg || !NewVRegs.empty())
1797 return PhysReg;
1798 }
1799
1800 // Then isolate blocks.
1801 return tryBlockSplit(VirtReg, Order, NewVRegs);
1802}
1803
1804//===----------------------------------------------------------------------===//
1805// Last Chance Recoloring
1806//===----------------------------------------------------------------------===//
1807
1808/// Return true if \p reg has any tied def operand.
1809static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
1810 for (const MachineOperand &MO : MRI->def_operands(reg))
1811 if (MO.isTied())
1812 return true;
1813
1814 return false;
1815}
1816
1817/// Return true if the existing assignment of \p Intf overlaps, but is not the
1818/// same, as \p PhysReg.
1820 const VirtRegMap &VRM,
1821 MCRegister PhysReg,
1822 const LiveInterval &Intf) {
1823 MCRegister AssignedReg = VRM.getPhys(Intf.reg());
1824 if (PhysReg == AssignedReg)
1825 return false;
1826 return TRI.regsOverlap(PhysReg, AssignedReg);
1827}
1828
1829/// mayRecolorAllInterferences - Check if the virtual registers that
1830/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
1831/// recolored to free \p PhysReg.
1832/// When true is returned, \p RecoloringCandidates has been augmented with all
1833/// the live intervals that need to be recolored in order to free \p PhysReg
1834/// for \p VirtReg.
1835/// \p FixedRegisters contains all the virtual registers that cannot be
1836/// recolored.
1837bool RAGreedy::mayRecolorAllInterferences(
1838 MCRegister PhysReg, const LiveInterval &VirtReg,
1839 SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
1840 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1841
1842 for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
1843 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
1844 // If there is LastChanceRecoloringMaxInterference or more interferences,
1845 // chances are one would not be recolorable.
1849 LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
1850 CutOffInfo |= CO_Interf;
1851 return false;
1852 }
1853 for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
1854 // If Intf is done and sits on the same register class as VirtReg, it
1855 // would not be recolorable as it is in the same state as
1856 // VirtReg. However there are at least two exceptions.
1857 //
1858 // If VirtReg has tied defs and Intf doesn't, then
1859 // there is still a point in examining if it can be recolorable.
1860 //
1861 // Additionally, if the register class has overlapping tuple members, it
1862 // may still be recolorable using a different tuple. This is more likely
1863 // if the existing assignment aliases with the candidate.
1864 //
1865 if (((ExtraInfo->getStage(*Intf) == RS_Done &&
1866 MRI->getRegClass(Intf->reg()) == CurRC &&
1867 !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
1868 !(hasTiedDef(MRI, VirtReg.reg()) &&
1869 !hasTiedDef(MRI, Intf->reg()))) ||
1870 FixedRegisters.count(Intf->reg())) {
1871 LLVM_DEBUG(
1872 dbgs() << "Early abort: the interference is not recolorable.\n");
1873 return false;
1874 }
1875 RecoloringCandidates.insert(Intf);
1876 }
1877 }
1878 return true;
1879}
1880
1881/// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
1882/// its interferences.
1883/// Last chance recoloring chooses a color for \p VirtReg and recolors every
1884/// virtual register that was using it. The recoloring process may recursively
1885/// use the last chance recoloring. Therefore, when a virtual register has been
1886/// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
1887/// be last-chance-recolored again during this recoloring "session".
1888/// E.g.,
1889/// Let
1890/// vA can use {R1, R2 }
1891/// vB can use { R2, R3}
1892/// vC can use {R1 }
1893/// Where vA, vB, and vC cannot be split anymore (they are reloads for
1894/// instance) and they all interfere.
1895///
1896/// vA is assigned R1
1897/// vB is assigned R2
1898/// vC tries to evict vA but vA is already done.
1899/// Regular register allocation fails.
1900///
1901/// Last chance recoloring kicks in:
1902/// vC does as if vA was evicted => vC uses R1.
1903/// vC is marked as fixed.
1904/// vA needs to find a color.
1905/// None are available.
1906/// vA cannot evict vC: vC is a fixed virtual register now.
1907/// vA does as if vB was evicted => vA uses R2.
1908/// vB needs to find a color.
1909/// R3 is available.
1910/// Recoloring => vC = R1, vA = R2, vB = R3
1911///
1912/// \p Order defines the preferred allocation order for \p VirtReg.
1913/// \p NewRegs will contain any new virtual register that have been created
1914/// (split, spill) during the process and that must be assigned.
1915/// \p FixedRegisters contains all the virtual registers that cannot be
1916/// recolored.
1917///
1918/// \p RecolorStack tracks the original assignments of successfully recolored
1919/// registers.
1920///
1921/// \p Depth gives the current depth of the last chance recoloring.
1922/// \return a physical register that can be used for VirtReg or ~0u if none
1923/// exists.
1924unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
1925 AllocationOrder &Order,
1926 SmallVectorImpl<Register> &NewVRegs,
1927 SmallVirtRegSet &FixedRegisters,
1928 RecoloringStack &RecolorStack,
1929 unsigned Depth) {
1930 if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
1931 return ~0u;
1932
1933 LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
1934
1935 const ssize_t EntryStackSize = RecolorStack.size();
1936
1937 // Ranges must be Done.
1938 assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
1939 "Last chance recoloring should really be last chance");
1940 // Set the max depth to LastChanceRecoloringMaxDepth.
1941 // We may want to reconsider that if we end up with a too large search space
1942 // for target with hundreds of registers.
1943 // Indeed, in that case we may want to cut the search space earlier.
1945 LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");
1946 CutOffInfo |= CO_Depth;
1947 return ~0u;
1948 }
1949
1950 // Set of Live intervals that will need to be recolored.
1951 SmallLISet RecoloringCandidates;
1952
1953 // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
1954 // this recoloring "session".
1955 assert(!FixedRegisters.count(VirtReg.reg()));
1956 FixedRegisters.insert(VirtReg.reg());
1957 SmallVector<Register, 4> CurrentNewVRegs;
1958
1959 for (MCRegister PhysReg : Order) {
1960 assert(PhysReg.isValid());
1961 LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
1962 << printReg(PhysReg, TRI) << '\n');
1963 RecoloringCandidates.clear();
1964 CurrentNewVRegs.clear();
1965
1966 // It is only possible to recolor virtual register interference.
1967 if (Matrix->checkInterference(VirtReg, PhysReg) >
1969 LLVM_DEBUG(
1970 dbgs() << "Some interferences are not with virtual registers.\n");
1971
1972 continue;
1973 }
1974
1975 // Early give up on this PhysReg if it is obvious we cannot recolor all
1976 // the interferences.
1977 if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
1978 FixedRegisters)) {
1979 LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
1980 continue;
1981 }
1982
1983 // RecoloringCandidates contains all the virtual registers that interfere
1984 // with VirtReg on PhysReg (or one of its aliases). Enqueue them for
1985 // recoloring and perform the actual recoloring.
1986 PQueue RecoloringQueue;
1987 for (const LiveInterval *RC : RecoloringCandidates) {
1988 Register ItVirtReg = RC->reg();
1989 enqueue(RecoloringQueue, RC);
1990 assert(VRM->hasPhys(ItVirtReg) &&
1991 "Interferences are supposed to be with allocated variables");
1992
1993 // Record the current allocation.
1994 RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg)));
1995
1996 // unset the related struct.
1997 Matrix->unassign(*RC);
1998 }
1999
2000 // Do as if VirtReg was assigned to PhysReg so that the underlying
2001 // recoloring has the right information about the interferes and
2002 // available colors.
2003 Matrix->assign(VirtReg, PhysReg);
2004
2005 // Save the current recoloring state.
2006 // If we cannot recolor all the interferences, we will have to start again
2007 // at this point for the next physical register.
2008 SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
2009 if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
2010 FixedRegisters, RecolorStack, Depth)) {
2011 // Push the queued vregs into the main queue.
2012 for (Register NewVReg : CurrentNewVRegs)
2013 NewVRegs.push_back(NewVReg);
2014 // Do not mess up with the global assignment process.
2015 // I.e., VirtReg must be unassigned.
2016 Matrix->unassign(VirtReg);
2017 return PhysReg;
2018 }
2019
2020 LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
2021 << printReg(PhysReg, TRI) << '\n');
2022
2023 // The recoloring attempt failed, undo the changes.
2024 FixedRegisters = SaveFixedRegisters;
2025 Matrix->unassign(VirtReg);
2026
2027 // For a newly created vreg which is also in RecoloringCandidates,
2028 // don't add it to NewVRegs because its physical register will be restored
2029 // below. Other vregs in CurrentNewVRegs are created by calling
2030 // selectOrSplit and should be added into NewVRegs.
2031 for (Register R : CurrentNewVRegs) {
2032 if (RecoloringCandidates.count(&LIS->getInterval(R)))
2033 continue;
2034 NewVRegs.push_back(R);
2035 }
2036
2037 // Roll back our unsuccessful recoloring. Also roll back any successful
2038 // recolorings in any recursive recoloring attempts, since it's possible
2039 // they would have introduced conflicts with assignments we will be
2040 // restoring further up the stack. Perform all unassignments prior to
2041 // reassigning, since sub-recolorings may have conflicted with the registers
2042 // we are going to restore to their original assignments.
2043 for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) {
2044 const LiveInterval *LI;
2045 MCRegister PhysReg;
2046 std::tie(LI, PhysReg) = RecolorStack[I];
2047
2048 if (VRM->hasPhys(LI->reg()))
2049 Matrix->unassign(*LI);
2050 }
2051
2052 for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) {
2053 const LiveInterval *LI;
2054 MCRegister PhysReg;
2055 std::tie(LI, PhysReg) = RecolorStack[I];
2056 if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg()))
2057 Matrix->assign(*LI, PhysReg);
2058 }
2059
2060 // Pop the stack of recoloring attempts.
2061 RecolorStack.resize(EntryStackSize);
2062 }
2063
2064 // Last chance recoloring did not worked either, give up.
2065 return ~0u;
2066}
2067
2068/// tryRecoloringCandidates - Try to assign a new color to every register
2069/// in \RecoloringQueue.
2070/// \p NewRegs will contain any new virtual register created during the
2071/// recoloring process.
2072/// \p FixedRegisters[in/out] contains all the registers that have been
2073/// recolored.
2074/// \return true if all virtual registers in RecoloringQueue were successfully
2075/// recolored, false otherwise.
2076bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
2077 SmallVectorImpl<Register> &NewVRegs,
2078 SmallVirtRegSet &FixedRegisters,
2079 RecoloringStack &RecolorStack,
2080 unsigned Depth) {
2081 while (!RecoloringQueue.empty()) {
2082 const LiveInterval *LI = dequeue(RecoloringQueue);
2083 LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
2084 MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
2085 RecolorStack, Depth + 1);
2086 // When splitting happens, the live-range may actually be empty.
2087 // In that case, this is okay to continue the recoloring even
2088 // if we did not find an alternative color for it. Indeed,
2089 // there will not be anything to color for LI in the end.
2090 if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
2091 return false;
2092
2093 if (!PhysReg) {
2094 assert(LI->empty() && "Only empty live-range do not require a register");
2095 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2096 << " succeeded. Empty LI.\n");
2097 continue;
2098 }
2099 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2100 << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
2101
2102 Matrix->assign(*LI, PhysReg);
2103 FixedRegisters.insert(LI->reg());
2104 }
2105 return true;
2106}
2107
2108//===----------------------------------------------------------------------===//
2109// Main Entry Point
2110//===----------------------------------------------------------------------===//
2111
2113 SmallVectorImpl<Register> &NewVRegs) {
2114 CutOffInfo = CO_None;
2115 LLVMContext &Ctx = MF->getFunction().getContext();
2116 SmallVirtRegSet FixedRegisters;
2117 RecoloringStack RecolorStack;
2118 MCRegister Reg =
2119 selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
2120 if (Reg == ~0U && (CutOffInfo != CO_None)) {
2121 uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
2122 if (CutOffEncountered == CO_Depth)
2123 Ctx.emitError("register allocation failed: maximum depth for recoloring "
2124 "reached. Use -fexhaustive-register-search to skip "
2125 "cutoffs");
2126 else if (CutOffEncountered == CO_Interf)
2127 Ctx.emitError("register allocation failed: maximum interference for "
2128 "recoloring reached. Use -fexhaustive-register-search "
2129 "to skip cutoffs");
2130 else if (CutOffEncountered == (CO_Depth | CO_Interf))
2131 Ctx.emitError("register allocation failed: maximum interference and "
2132 "depth for recoloring reached. Use "
2133 "-fexhaustive-register-search to skip cutoffs");
2134 }
2135 return Reg;
2136}
2137
2138/// Using a CSR for the first time has a cost because it causes push|pop
2139/// to be added to prologue|epilogue. Splitting a cold section of the live
2140/// range can have lower cost than using the CSR for the first time;
2141/// Spilling a live range in the cold path can have lower cost than using
2142/// the CSR for the first time. Returns the physical register if we decide
2143/// to use the CSR; otherwise return 0.
2144MCRegister RAGreedy::tryAssignCSRFirstTime(
2145 const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg,
2146 uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) {
2147 if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
2148 // We choose spill over using the CSR for the first time if the spill cost
2149 // is lower than CSRCost.
2150 SA->analyze(&VirtReg);
2151 if (calcSpillCost() >= CSRCost)
2152 return PhysReg;
2153
2154 // We are going to spill, set CostPerUseLimit to 1 to make sure that
2155 // we will not use a callee-saved register in tryEvict.
2156 CostPerUseLimit = 1;
2157 return 0;
2158 }
2159 if (ExtraInfo->getStage(VirtReg) < RS_Split) {
2160 // We choose pre-splitting over using the CSR for the first time if
2161 // the cost of splitting is lower than CSRCost.
2162 SA->analyze(&VirtReg);
2163 unsigned NumCands = 0;
2164 BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
2165 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
2166 NumCands, true /*IgnoreCSR*/);
2167 if (BestCand == NoCand)
2168 // Use the CSR if we can't find a region split below CSRCost.
2169 return PhysReg;
2170
2171 // Perform the actual pre-splitting.
2172 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
2173 return 0;
2174 }
2175 return PhysReg;
2176}
2177
2179 // Do not keep invalid information around.
2180 SetOfBrokenHints.remove(&LI);
2181}
2182
2183void RAGreedy::initializeCSRCost() {
2184 // We use the larger one out of the command-line option and the value report
2185 // by TRI.
2186 CSRCost = BlockFrequency(
2187 std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
2188 if (!CSRCost.getFrequency())
2189 return;
2190
2191 // Raw cost is relative to Entry == 2^14; scale it appropriately.
2192 uint64_t ActualEntry = MBFI->getEntryFreq();
2193 if (!ActualEntry) {
2194 CSRCost = 0;
2195 return;
2196 }
2197 uint64_t FixedEntry = 1 << 14;
2198 if (ActualEntry < FixedEntry)
2199 CSRCost *= BranchProbability(ActualEntry, FixedEntry);
2200 else if (ActualEntry <= UINT32_MAX)
2201 // Invert the fraction and divide.
2202 CSRCost /= BranchProbability(FixedEntry, ActualEntry);
2203 else
2204 // Can't use BranchProbability in general, since it takes 32-bit numbers.
2205 CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
2206}
2207
2208/// Collect the hint info for \p Reg.
2209/// The results are stored into \p Out.
2210/// \p Out is not cleared before being populated.
2211void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
2212 for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
2213 if (!TII->isFullCopyInstr(Instr))
2214 continue;
2215 // Look for the other end of the copy.
2216 Register OtherReg = Instr.getOperand(0).getReg();
2217 if (OtherReg == Reg) {
2218 OtherReg = Instr.getOperand(1).getReg();
2219 if (OtherReg == Reg)
2220 continue;
2221 }
2222 // Get the current assignment.
2223 MCRegister OtherPhysReg =
2224 OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
2225 // Push the collected information.
2226 Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
2227 OtherPhysReg));
2228 }
2229}
2230
2231/// Using the given \p List, compute the cost of the broken hints if
2232/// \p PhysReg was used.
2233/// \return The cost of \p List for \p PhysReg.
2234BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
2235 MCRegister PhysReg) {
2236 BlockFrequency Cost = 0;
2237 for (const HintInfo &Info : List) {
2238 if (Info.PhysReg != PhysReg)
2239 Cost += Info.Freq;
2240 }
2241 return Cost;
2242}
2243
2244/// Using the register assigned to \p VirtReg, try to recolor
2245/// all the live ranges that are copy-related with \p VirtReg.
2246/// The recoloring is then propagated to all the live-ranges that have
2247/// been recolored and so on, until no more copies can be coalesced or
2248/// it is not profitable.
2249/// For a given live range, profitability is determined by the sum of the
2250/// frequencies of the non-identity copies it would introduce with the old
2251/// and new register.
2252void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
2253 // We have a broken hint, check if it is possible to fix it by
2254 // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
2255 // some register and PhysReg may be available for the other live-ranges.
2256 SmallSet<Register, 4> Visited;
2257 SmallVector<unsigned, 2> RecoloringCandidates;
2258 HintsInfo Info;
2259 Register Reg = VirtReg.reg();
2260 MCRegister PhysReg = VRM->getPhys(Reg);
2261 // Start the recoloring algorithm from the input live-interval, then
2262 // it will propagate to the ones that are copy-related with it.
2263 Visited.insert(Reg);
2264 RecoloringCandidates.push_back(Reg);
2265
2266 LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
2267 << '(' << printReg(PhysReg, TRI) << ")\n");
2268
2269 do {
2270 Reg = RecoloringCandidates.pop_back_val();
2271
2272 // We cannot recolor physical register.
2273 if (Reg.isPhysical())
2274 continue;
2275
2276 // This may be a skipped class
2277 if (!VRM->hasPhys(Reg)) {
2279 "We have an unallocated variable which should have been handled");
2280 continue;
2281 }
2282
2283 // Get the live interval mapped with this virtual register to be able
2284 // to check for the interference with the new color.
2285 LiveInterval &LI = LIS->getInterval(Reg);
2286 MCRegister CurrPhys = VRM->getPhys(Reg);
2287 // Check that the new color matches the register class constraints and
2288 // that it is free for this live range.
2289 if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
2290 Matrix->checkInterference(LI, PhysReg)))
2291 continue;
2292
2293 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
2294 << ") is recolorable.\n");
2295
2296 // Gather the hint info.
2297 Info.clear();
2298 collectHintInfo(Reg, Info);
2299 // Check if recoloring the live-range will increase the cost of the
2300 // non-identity copies.
2301 if (CurrPhys != PhysReg) {
2302 LLVM_DEBUG(dbgs() << "Checking profitability:\n");
2303 BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
2304 BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
2305 LLVM_DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
2306 << "\nNew Cost: " << NewCopiesCost.getFrequency()
2307 << '\n');
2308 if (OldCopiesCost < NewCopiesCost) {
2309 LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
2310 continue;
2311 }
2312 // At this point, the cost is either cheaper or equal. If it is
2313 // equal, we consider this is profitable because it may expose
2314 // more recoloring opportunities.
2315 LLVM_DEBUG(dbgs() << "=> Profitable.\n");
2316 // Recolor the live-range.
2317 Matrix->unassign(LI);
2318 Matrix->assign(LI, PhysReg);
2319 }
2320 // Push all copy-related live-ranges to keep reconciling the broken
2321 // hints.
2322 for (const HintInfo &HI : Info) {
2323 if (Visited.insert(HI.Reg).second)
2324 RecoloringCandidates.push_back(HI.Reg);
2325 }
2326 } while (!RecoloringCandidates.empty());
2327}
2328
2329/// Try to recolor broken hints.
2330/// Broken hints may be repaired by recoloring when an evicted variable
2331/// freed up a register for a larger live-range.
2332/// Consider the following example:
2333/// BB1:
2334/// a =
2335/// b =
2336/// BB2:
2337/// ...
2338/// = b
2339/// = a
2340/// Let us assume b gets split:
2341/// BB1:
2342/// a =
2343/// b =
2344/// BB2:
2345/// c = b
2346/// ...
2347/// d = c
2348/// = d
2349/// = a
2350/// Because of how the allocation work, b, c, and d may be assigned different
2351/// colors. Now, if a gets evicted later:
2352/// BB1:
2353/// a =
2354/// st a, SpillSlot
2355/// b =
2356/// BB2:
2357/// c = b
2358/// ...
2359/// d = c
2360/// = d
2361/// e = ld SpillSlot
2362/// = e
2363/// This is likely that we can assign the same register for b, c, and d,
2364/// getting rid of 2 copies.
2365void RAGreedy::tryHintsRecoloring() {
2366 for (const LiveInterval *LI : SetOfBrokenHints) {
2367 assert(LI->reg().isVirtual() &&
2368 "Recoloring is possible only for virtual registers");
2369 // Some dead defs may be around (e.g., because of debug uses).
2370 // Ignore those.
2371 if (!VRM->hasPhys(LI->reg()))
2372 continue;
2373 tryHintRecoloring(*LI);
2374 }
2375}
2376
2377MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
2378 SmallVectorImpl<Register> &NewVRegs,
2379 SmallVirtRegSet &FixedRegisters,
2380 RecoloringStack &RecolorStack,
2381 unsigned Depth) {
2382 uint8_t CostPerUseLimit = uint8_t(~0u);
2383 // First try assigning a free register.
2384 auto Order =
2386 if (MCRegister PhysReg =
2387 tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
2388 // When NewVRegs is not empty, we may have made decisions such as evicting
2389 // a virtual register, go with the earlier decisions and use the physical
2390 // register.
2391 if (CSRCost.getFrequency() &&
2392 EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
2393 MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
2394 CostPerUseLimit, NewVRegs);
2395 if (CSRReg || !NewVRegs.empty())
2396 // Return now if we decide to use a CSR or create new vregs due to
2397 // pre-splitting.
2398 return CSRReg;
2399 } else
2400 return PhysReg;
2401 }
2402 // Non emtpy NewVRegs means VirtReg has been split.
2403 if (!NewVRegs.empty())
2404 return 0;
2405
2406 LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
2407 LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
2408 << ExtraInfo->getCascade(VirtReg.reg()) << '\n');
2409
2410 // Try to evict a less worthy live range, but only for ranges from the primary
2411 // queue. The RS_Split ranges already failed to do this, and they should not
2412 // get a second chance until they have been split.
2413 if (Stage != RS_Split)
2414 if (Register PhysReg =
2415 tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
2416 FixedRegisters)) {
2417 Register Hint = MRI->getSimpleHint(VirtReg.reg());
2418 // If VirtReg has a hint and that hint is broken record this
2419 // virtual register as a recoloring candidate for broken hint.
2420 // Indeed, since we evicted a variable in its neighborhood it is
2421 // likely we can at least partially recolor some of the
2422 // copy-related live-ranges.
2423 if (Hint && Hint != PhysReg)
2424 SetOfBrokenHints.insert(&VirtReg);
2425 return PhysReg;
2426 }
2427
2428 assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
2429
2430 // The first time we see a live range, don't try to split or spill.
2431 // Wait until the second time, when all smaller ranges have been allocated.
2432 // This gives a better picture of the interference to split around.
2433 if (Stage < RS_Split) {
2434 ExtraInfo->setStage(VirtReg, RS_Split);
2435 LLVM_DEBUG(dbgs() << "wait for second round\n");
2436 NewVRegs.push_back(VirtReg.reg());
2437 return 0;
2438 }
2439
2440 if (Stage < RS_Spill) {
2441 // Try splitting VirtReg or interferences.
2442 unsigned NewVRegSizeBefore = NewVRegs.size();
2443 Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
2444 if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
2445 return PhysReg;
2446 }
2447
2448 // If we couldn't allocate a register from spilling, there is probably some
2449 // invalid inline assembly. The base class will report it.
2450 if (Stage >= RS_Done || !VirtReg.isSpillable()) {
2451 return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
2452 RecolorStack, Depth);
2453 }
2454
2455 // Finally spill VirtReg itself.
2457 TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) &&
2458 ExtraInfo->getStage(VirtReg) < RS_Memory) {
2459 // TODO: This is experimental and in particular, we do not model
2460 // the live range splitting done by spilling correctly.
2461 // We would need a deep integration with the spiller to do the
2462 // right thing here. Anyway, that is still good for early testing.
2463 ExtraInfo->setStage(VirtReg, RS_Memory);
2464 LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
2465 NewVRegs.push_back(VirtReg.reg());
2466 } else {
2467 NamedRegionTimer T("spill", "Spiller", TimerGroupName,
2469 LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
2470 spiller().spill(LRE);
2471 ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
2472
2473 // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
2474 // the new regs are kept in LDV (still mapping to the old register), until
2475 // we rewrite spilled locations in LDV at a later stage.
2476 DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS);
2477
2478 if (VerifyEnabled)
2479 MF->verify(this, "After spilling");
2480 }
2481
2482 // The live virtual register requesting allocation was spilled, so tell
2483 // the caller not to allocate anything during this round.
2484 return 0;
2485}
2486
2487void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
2488 using namespace ore;
2489 if (Spills) {
2490 R << NV("NumSpills", Spills) << " spills ";
2491 R << NV("TotalSpillsCost", SpillsCost) << " total spills cost ";
2492 }
2493 if (FoldedSpills) {
2494 R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
2495 R << NV("TotalFoldedSpillsCost", FoldedSpillsCost)
2496 << " total folded spills cost ";
2497 }
2498 if (Reloads) {
2499 R << NV("NumReloads", Reloads) << " reloads ";
2500 R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost ";
2501 }
2502 if (FoldedReloads) {
2503 R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
2504 R << NV("TotalFoldedReloadsCost", FoldedReloadsCost)
2505 << " total folded reloads cost ";
2506 }
2507 if (ZeroCostFoldedReloads)
2508 R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
2509 << " zero cost folded reloads ";
2510 if (Copies) {
2511 R << NV("NumVRCopies", Copies) << " virtual registers copies ";
2512 R << NV("TotalCopiesCost", CopiesCost) << " total copies cost ";
2513 }
2514}
2515
2516RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
2517 RAGreedyStats Stats;
2518 const MachineFrameInfo &MFI = MF->getFrameInfo();
2519 int FI;
2520
2521 auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
2522 return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>(
2523 A->getPseudoValue())->getFrameIndex());
2524 };
2525 auto isPatchpointInstr = [](const MachineInstr &MI) {
2526 return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
2527 MI.getOpcode() == TargetOpcode::STACKMAP ||
2528 MI.getOpcode() == TargetOpcode::STATEPOINT;
2529 };
2530 for (MachineInstr &MI : MBB) {
2531 auto DestSrc = TII->isCopyInstr(MI);
2532 if (DestSrc) {
2533 const MachineOperand &Dest = *DestSrc->Destination;
2534 const MachineOperand &Src = *DestSrc->Source;
2535 Register SrcReg = Src.getReg();
2536 Register DestReg = Dest.getReg();
2537 // Only count `COPY`s with a virtual register as source or destination.
2538 if (SrcReg.isVirtual() || DestReg.isVirtual()) {
2539 if (SrcReg.isVirtual()) {
2540 SrcReg = VRM->getPhys(SrcReg);
2541 if (SrcReg && Src.getSubReg())
2542 SrcReg = TRI->getSubReg(SrcReg, Src.getSubReg());
2543 }
2544 if (DestReg.isVirtual()) {
2545 DestReg = VRM->getPhys(DestReg);
2546 if (DestReg && Dest.getSubReg())
2547 DestReg = TRI->getSubReg(DestReg, Dest.getSubReg());
2548 }
2549 if (SrcReg != DestReg)
2550 ++Stats.Copies;
2551 }
2552 continue;
2553 }
2554
2556 if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2557 ++Stats.Reloads;
2558 continue;
2559 }
2560 if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2561 ++Stats.Spills;
2562 continue;
2563 }
2564 if (TII->hasLoadFromStackSlot(MI, Accesses) &&
2565 llvm::any_of(Accesses, isSpillSlotAccess)) {
2566 if (!isPatchpointInstr(MI)) {
2567 Stats.FoldedReloads += Accesses.size();
2568 continue;
2569 }
2570 // For statepoint there may be folded and zero cost folded stack reloads.
2571 std::pair<unsigned, unsigned> NonZeroCostRange =
2573 SmallSet<unsigned, 16> FoldedReloads;
2574 SmallSet<unsigned, 16> ZeroCostFoldedReloads;
2575 for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
2576 MachineOperand &MO = MI.getOperand(Idx);
2577 if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
2578 continue;
2579 if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
2580 FoldedReloads.insert(MO.getIndex());
2581 else
2582 ZeroCostFoldedReloads.insert(MO.getIndex());
2583 }
2584 // If stack slot is used in folded reload it is not zero cost then.
2585 for (unsigned Slot : FoldedReloads)
2586 ZeroCostFoldedReloads.erase(Slot);
2587 Stats.FoldedReloads += FoldedReloads.size();
2588 Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
2589 continue;
2590 }
2591 Accesses.clear();
2592 if (TII->hasStoreToStackSlot(MI, Accesses) &&
2593 llvm::any_of(Accesses, isSpillSlotAccess)) {
2594 Stats.FoldedSpills += Accesses.size();
2595 }
2596 }
2597 // Set cost of collected statistic by multiplication to relative frequency of
2598 // this basic block.
2599 float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB);
2600 Stats.ReloadsCost = RelFreq * Stats.Reloads;
2601 Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads;
2602 Stats.SpillsCost = RelFreq * Stats.Spills;
2603 Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills;
2604 Stats.CopiesCost = RelFreq * Stats.Copies;
2605 return Stats;
2606}
2607
2608RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) {
2609 RAGreedyStats Stats;
2610
2611 // Sum up the spill and reloads in subloops.
2612 for (MachineLoop *SubLoop : *L)
2613 Stats.add(reportStats(SubLoop));
2614
2615 for (MachineBasicBlock *MBB : L->getBlocks())
2616 // Handle blocks that were not included in subloops.
2617 if (Loops->getLoopFor(MBB) == L)
2618 Stats.add(computeStats(*MBB));
2619
2620 if (!Stats.isEmpty()) {
2621 using namespace ore;
2622
2623 ORE->emit([&]() {
2624 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies",
2625 L->getStartLoc(), L->getHeader());
2626 Stats.report(R);
2627 R << "generated in loop";
2628 return R;
2629 });
2630 }
2631 return Stats;
2632}
2633
2634void RAGreedy::reportStats() {
2635 if (!ORE->allowExtraAnalysis(DEBUG_TYPE))
2636 return;
2637 RAGreedyStats Stats;
2638 for (MachineLoop *L : *Loops)
2639 Stats.add(reportStats(L));
2640 // Process non-loop blocks.
2641 for (MachineBasicBlock &MBB : *MF)
2642 if (!Loops->getLoopFor(&MBB))
2643 Stats.add(computeStats(MBB));
2644 if (!Stats.isEmpty()) {
2645 using namespace ore;
2646
2647 ORE->emit([&]() {
2648 DebugLoc Loc;
2649 if (auto *SP = MF->getFunction().getSubprogram())
2650 Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
2651 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc,
2652 &MF->front());
2653 Stats.report(R);
2654 R << "generated in function";
2655 return R;
2656 });
2657 }
2658}
2659
2660bool RAGreedy::hasVirtRegAlloc() {
2661 for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
2663 if (MRI->reg_nodbg_empty(Reg))
2664 continue;
2665 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
2666 if (!RC)
2667 continue;
2668 if (ShouldAllocateClass(*TRI, *RC))
2669 return true;
2670 }
2671
2672 return false;
2673}
2674
2676 LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
2677 << "********** Function: " << mf.getName() << '\n');
2678
2679 MF = &mf;
2680 TII = MF->getSubtarget().getInstrInfo();
2681
2682 if (VerifyEnabled)
2683 MF->verify(this, "Before greedy register allocator");
2684
2685 RegAllocBase::init(getAnalysis<VirtRegMap>(),
2686 getAnalysis<LiveIntervals>(),
2687 getAnalysis<LiveRegMatrix>());
2688
2689 // Early return if there is no virtual register to be allocated to a
2690 // physical register.
2691 if (!hasVirtRegAlloc())
2692 return false;
2693
2694 Indexes = &getAnalysis<SlotIndexes>();
2695 // Renumber to get accurate and consistent results from
2696 // SlotIndexes::getApproxInstrDistance.
2697 Indexes->packIndexes();
2698 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
2699 DomTree = &getAnalysis<MachineDominatorTree>();
2700 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
2701 Loops = &getAnalysis<MachineLoopInfo>();
2702 Bundles = &getAnalysis<EdgeBundles>();
2703 SpillPlacer = &getAnalysis<SpillPlacement>();
2704 DebugVars = &getAnalysis<LiveDebugVariables>();
2705
2706 initializeCSRCost();
2707
2708 RegCosts = TRI->getRegisterCosts(*MF);
2709 RegClassPriorityTrumpsGlobalness =
2710 GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
2713
2714 ReverseLocalAssignment = GreedyReverseLocalAssignment.getNumOccurrences()
2717
2718 ExtraInfo.emplace();
2719 EvictAdvisor =
2720 getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
2721 PriorityAdvisor =
2722 getAnalysis<RegAllocPriorityAdvisorAnalysis>().getAdvisor(*MF, *this);
2723
2724 VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
2725 SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI));
2726
2727 VRAI->calculateSpillWeightsAndHints();
2728
2729 LLVM_DEBUG(LIS->dump());
2730
2731 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
2732 SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
2733
2734 IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
2735 GlobalCand.resize(32); // This will grow as needed.
2736 SetOfBrokenHints.clear();
2737
2739 tryHintsRecoloring();
2740
2741 if (VerifyEnabled)
2742 MF->verify(this, "Before post optimization");
2744 reportStats();
2745
2746 releaseMemory();
2747 return true;
2748}
unsigned SubReg
MachineBasicBlock & MBB
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:680
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:496
Rewrite Partial Register Uses
#define DEBUG_TYPE
const HexagonInstrInfo * TII
Hexagon Hardware Loops
IRTranslator LLVM IR MI
This file implements an indexed map.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
block placement Basic Block Placement Stats
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static cl::opt< bool > GreedyRegClassPriorityTrumpsGlobalness("greedy-regclass-priority-trumps-globalness", cl::desc("Change the greedy register allocator's live range priority " "calculation to make the AllocationPriority of the register class " "more important then whether the range is global"), cl::Hidden)
static cl::opt< bool > ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, cl::desc("Exhaustive Search for registers bypassing the depth " "and interference cutoffs of last chance recoloring"), cl::Hidden)
const float Hysteresis
static cl::opt< unsigned > LastChanceRecoloringMaxInterference("lcr-max-interf", cl::Hidden, cl::desc("Last chance recoloring maximum number of considered" " interference at a time"), cl::init(8))
static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg)
Return true if reg has any tied def operand.
static bool readsLaneSubset(const MachineRegisterInfo &MRI, const MachineInstr *MI, const LiveInterval &VirtReg, const TargetRegisterInfo *TRI, SlotIndex Use, const TargetInstrInfo *TII)
Return true if MI at \P Use reads a subset of the lanes live in VirtReg.
static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI, const VirtRegMap &VRM, MCRegister PhysReg, const LiveInterval &Intf)
Return true if the existing assignment of Intf overlaps, but is not the same, as PhysReg.
static cl::opt< unsigned > CSRFirstTimeCost("regalloc-csr-first-time-cost", cl::desc("Cost for first time use of callee-saved register."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, cl::desc("Last chance recoloring max depth"), cl::init(5))
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator)
static cl::opt< unsigned long > GrowRegionComplexityBudget("grow-region-complexity-budget", cl::desc("growRegion() does not scale with the number of BB edges, so " "limit its budget and bail out once we reach the limit."), cl::init(10000), cl::Hidden)
Greedy Register Allocator
static cl::opt< SplitEditor::ComplementSpillMode > SplitSpillMode("split-spill-mode", cl::Hidden, cl::desc("Spill mode for splitting live ranges"), cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")), cl::init(SplitEditor::SM_Speed))
static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const MachineInstr &MI, Register Reg)
static unsigned getNumAllocatableRegsForConstraints(const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const RegisterClassInfo &RCI)
Get the number of allocatable registers that match the constraints of Reg on MI and that are also in ...
static cl::opt< bool > EnableDeferredSpilling("enable-deferred-spilling", cl::Hidden, cl::desc("Instead of spilling a variable right away, defer the actual " "code insertion to the end of the allocation. That way the " "allocator might still find a suitable coloring for this " "variable because of other evicted variables."), cl::init(false))
static cl::opt< unsigned > SplitThresholdForRegWithHint("split-threshold-for-reg-with-hint", cl::desc("The threshold for splitting a virtual register with a hint, in " "percentate"), cl::init(75), cl::Hidden)
static cl::opt< bool > GreedyReverseLocalAssignment("greedy-reverse-local-assignment", cl::desc("Reverse allocation order of local live ranges, such that " "shorter local live ranges will tend to be allocated first"), cl::Hidden)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
SI optimize exec mask operations pre RA
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
bool isHint(Register Reg) const
Return true if Reg is a preferred physical register.
ArrayRef< MCPhysReg > getOrder() const
Get the allocation order without reordered hints.
static AllocationOrder create(unsigned VirtReg, const VirtRegMap &VRM, const RegisterClassInfo &RegClassInfo, const LiveRegMatrix *Matrix)
Create a new AllocationOrder for VirtReg.
Iterator end() const
Iterator begin() const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & reset()
Definition: BitVector.h:392
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static uint64_t getMaxFrequency()
Returns the maximum possible frequency, the saturation value.
A debug info location.
Definition: DebugLoc.h:33
ArrayRef< unsigned > getBlocks(unsigned Bundle) const
getBlocks - Return an array of blocks that are connected to Bundle.
Definition: EdgeBundles.h:47
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
Definition: EdgeBundles.h:41
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
Definition: EdgeBundles.h:44
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:320
Cursor - The primary query interface for the block interference cache.
SlotIndex first()
first - Return the starting index of the first interfering range in the current block.
SlotIndex last()
last - Return the ending index of the last interfering range in the current block.
bool hasInterference()
hasInterference - Return true if the current block has any interference.
void moveToBlock(unsigned MBBNum)
moveTo - Move cursor to basic block MBBNum.
void init(MachineFunction *mf, LiveIntervalUnion *liuarray, SlotIndexes *indexes, LiveIntervals *lis, const TargetRegisterInfo *tri)
init - Prepare cache for a new function.
unsigned getMaxCursors() const
getMaxCursors - Return the maximum number of concurrent cursors that can be supported.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
void splitRegister(Register OldReg, ArrayRef< Register > NewRegs, LiveIntervals &LIS)
splitRegister - Move any user variables in OldReg to the live ranges in NewRegs where they are live.
Query interferences between a single live virtual register and a live interval union.
const SmallVectorImpl< const LiveInterval * > & interferingVRegs(unsigned MaxInterferingRegs=std::numeric_limits< unsigned >::max())
SegmentIter find(SlotIndex x)
LiveSegments::iterator SegmentIter
A live range for subregisters.
Definition: LiveInterval.h:693
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:686
Register reg() const
Definition: LiveInterval.h:717
bool isSpillable() const
isSpillable - Can this interval be spilled?
Definition: LiveInterval.h:819
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:803
unsigned getSize() const
getSize - Returns the sum of sizes of all the LiveRange's.
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:775
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
ArrayRef< SlotIndex > getRegMaskSlotsInBlock(unsigned MBBNum) const
Returns a sorted array of slot indices of all instructions with register mask operands in the basic b...
LiveInterval & getInterval(Register Reg)
MachineBasicBlock * intervalIsInOneMBB(const LiveInterval &LI) const
If LI is confined to a single basic block, return a pointer to that block.
bool empty() const
unsigned size() const
Register get(unsigned idx) const
ArrayRef< Register > regs() const
iterator end() const
iterator begin() const
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
bool liveAt(SlotIndex index) const
Definition: LiveInterval.h:401
bool empty() const
Definition: LiveInterval.h:382
iterator end()
Definition: LiveInterval.h:216
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:392
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
bool checkRegMaskInterference(const LiveInterval &VirtReg, MCRegister PhysReg=MCRegister::NoRegister)
Check for regmask interference only.
void unassign(const LiveInterval &VirtReg)
Unassign VirtReg from its PhysReg.
LiveIntervalUnion::Query & query(const LiveRange &LR, MCRegister RegUnit)
Query a line of the assigned virtual register matrix directly.
bool isPhysRegUsed(MCRegister PhysReg) const
Returns true if the given PhysReg has any live intervals assigned.
@ IK_VirtReg
Virtual register interference.
Definition: LiveRegMatrix.h:90
void assign(const LiveInterval &VirtReg, MCRegister PhysReg)
Assign VirtReg to PhysReg.
InterferenceKind checkInterference(const LiveInterval &VirtReg, MCRegister PhysReg)
Check for interference before assigning VirtReg to PhysReg.
LiveIntervalUnion * getLiveUnions()
Directly access the live interval unions per regunit.
iterator_range< MCRegUnitIterator > regunits(MCRegister Reg) const
Returns an iterator range over all regunits for Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
constexpr bool isValid() const
Definition: MCRegister.h:81
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1416
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator getFirstNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the first non-debug instruction in the basic block, or end().
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
raw_ostream & printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const
uint64_t getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
float getBlockFreqRelativeToEntryBlock(const MachineBasicBlock *MBB) const
Compute the frequency of the block, relative to the entry block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineBasicBlock * getBlockNumbered(unsigned N) const
getBlockNumbered - MachineBasicBlocks are automatically numbered when they are inserted into the mach...
Function & getFunction()
Return the LLVM function that this machine code represents.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Representation of each machine instruction.
Definition: MachineInstr.h:68
bool isImplicitDef() const
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to be more informative.
Diagnostic information for missed-optimization remarks.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register getSimpleHint(Register VReg) const
getSimpleHint - same as getRegAllocationHint except it will only return a target independent hint.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
bool reg_nodbg_empty(Register RegNo) const
reg_nodbg_empty - Return true if the only instructions using or defining Reg are Debug instructions.
iterator_range< def_iterator > def_operands(Register Reg) const
LaneBitmask getMaxLaneMaskForVReg(Register Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
iterator_range< reg_instr_nodbg_iterator > reg_nodbg_instructions(Register Reg) const
void dump() const
Definition: Pass.cpp:136
void LRE_DidCloneVirtReg(Register New, Register Old)
Spiller & spiller() override
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
MCRegister selectOrSplit(const LiveInterval &, SmallVectorImpl< Register > &) override
bool runOnMachineFunction(MachineFunction &mf) override
Perform register allocation.
const LiveInterval * dequeue() override
dequeue - Return the next unassigned register, or NULL.
void enqueueImpl(const LiveInterval *LI) override
enqueue - Add VirtReg to the priority queue of unassigned registers.
RAGreedy(const RegClassFilterFunc F=allocateAllRegClasses)
static char ID
void getAnalysisUsage(AnalysisUsage &AU) const override
RAGreedy analysis usage.
void aboutToRemoveInterval(const LiveInterval &) override
Method called when the allocator is about to remove a LiveInterval.
RegAllocBase provides the register allocation driver and interface that can be extended to add intere...
Definition: RegAllocBase.h:61
void enqueue(const LiveInterval *LI)
enqueue - Add VirtReg to the priority queue of unassigned registers.
void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat)
SmallPtrSet< MachineInstr *, 32 > DeadRemats
Inst which is a def of an original reg and whose defs are already all dead after remat is saved in De...
Definition: RegAllocBase.h:77
const TargetRegisterInfo * TRI
Definition: RegAllocBase.h:65
LiveIntervals * LIS
Definition: RegAllocBase.h:68
static const char TimerGroupName[]
Definition: RegAllocBase.h:115
static const char TimerGroupDescription[]
Definition: RegAllocBase.h:116
LiveRegMatrix * Matrix
Definition: RegAllocBase.h:69
virtual void postOptimization()
VirtRegMap * VRM
Definition: RegAllocBase.h:67
RegisterClassInfo RegClassInfo
Definition: RegAllocBase.h:70
MachineRegisterInfo * MRI
Definition: RegAllocBase.h:66
static bool VerifyEnabled
VerifyEnabled - True when -verify-regalloc is given.
Definition: RegAllocBase.h:123
const RegClassFilterFunc ShouldAllocateClass
Definition: RegAllocBase.h:71
ImmutableAnalysis abstraction for fetching the Eviction Advisor.
std::optional< unsigned > getOrderLimit(const LiveInterval &VirtReg, const AllocationOrder &Order, unsigned CostPerUseLimit) const
bool isUnusedCalleeSavedReg(MCRegister PhysReg) const
Returns true if the given PhysReg is a callee saved register and has not been used for allocation yet...
bool canReassign(const LiveInterval &VirtReg, MCRegister FromReg) const
bool canAllocatePhysReg(unsigned CostPerUseLimit, MCRegister PhysReg) const
unsigned getLastCostChange(const TargetRegisterClass *RC) const
Get the position of the last cost change in getOrder(RC).
bool isProperSubClass(const TargetRegisterClass *RC) const
isProperSubClass - Returns true if RC has a legal super-class with more allocatable registers.
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
uint8_t getMinCost(const TargetRegisterClass *RC) const
Get the minimum register cost in RC's allocation order.
MCRegister getLastCalleeSavedAlias(MCRegister PhysReg) const
getLastCalleeSavedAlias - Returns the last callee saved register that overlaps PhysReg,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:180
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
Definition: SlotIndexes.h:186
@ InstrDist
The default distance between instructions as returned by distance().
Definition: SlotIndexes.h:115
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:134
SlotIndex getBoundaryIndex() const
Returns the boundary index for associated with this index.
Definition: SlotIndexes.h:235
SlotIndex getBaseIndex() const
Returns the base index for associated with this index.
Definition: SlotIndexes.h:228
int getApproxInstrDistance(SlotIndex other) const
Return the scaled distance from this index to the given one, where all slots on the same instruction ...
Definition: SlotIndexes.h:207
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:241
SlotIndexes pass.
Definition: SlotIndexes.h:301
SlotIndex getLastIndex()
Returns the base index of the last slot in this analysis.
Definition: SlotIndexes.h:361
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:453
void packIndexes()
Renumber all indexes using the default instruction distance.
SlotIndex getZeroIndex()
Returns the zero index for this analysis.
Definition: SlotIndexes.h:355
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction for the given index, or null if the given index has no instruction associated...
Definition: SlotIndexes.h:390
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
bool erase(const T &V)
Definition: SmallSet.h:207
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
size_type size() const
Definition: SmallSet.h:161
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:708
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
void resize(size_type N)
Definition: SmallVector.h:642
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
void addConstraints(ArrayRef< BlockConstraint > LiveBlocks)
addConstraints - Add constraints and biases.
bool finish()
finish - Compute the optimal spill code placement given the constraints.
void addPrefSpill(ArrayRef< unsigned > Blocks, bool Strong)
addPrefSpill - Add PrefSpill constraints to all blocks listed.
void prepare(BitVector &RegBundles)
prepare - Reset state and prepare for a new spill placement computation.
bool scanActiveBundles()
scanActiveBundles - Perform an initial scan of all bundles activated by addConstraints and addLinks,...
void addLinks(ArrayRef< unsigned > Links)
addLinks - Add transparent blocks with the given numbers.
void iterate()
iterate - Update the network iteratively until convergence, or new bundles are found.
@ MustSpill
A register is impossible, variable must be spilled.
@ DontCare
Block doesn't care / variable not live.
@ PrefReg
Block entry/exit prefers a register.
@ PrefSpill
Block entry/exit prefers a stack slot.
ArrayRef< unsigned > getRecentPositive()
getRecentPositive - Return an array of bundles that became positive during the previous call to scanA...
BlockFrequency getBlockFrequency(unsigned Number) const
getBlockFrequency - Return the estimated block execution frequency per function invocation.
virtual void spill(LiveRangeEdit &LRE)=0
spill - Spill the LRE.getParent() live interval.
SplitAnalysis - Analyze a LiveInterval, looking for live range splitting opportunities.
Definition: SplitKit.h:96
SplitEditor - Edit machine code and LiveIntervals for live range splitting.
Definition: SplitKit.h:257
@ SM_Partition
SM_Partition(Default) - Try to create the complement interval so it doesn't overlap any other interva...
Definition: SplitKit.h:280
@ SM_Speed
SM_Speed - Overlap intervals to minimize the expected execution frequency of the inserted copies.
Definition: SplitKit.h:292
@ SM_Size
SM_Size - Overlap intervals to minimize the number of inserted COPY instructions.
Definition: SplitKit.h:287
TargetInstrInfo - Interface to description of machine instruction set.
virtual unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const
If the specified machine instruction is a direct store to a stack slot, return the virtual or physica...
virtual std::pair< unsigned, unsigned > getPatchpointUnfoldableRange(const MachineInstr &MI) const
For a patchpoint, stackmap, or statepoint intrinsic, return the range of operands which can't be fold...
bool isFullCopyInstr(const MachineInstr &MI) const
virtual bool hasStoreToStackSlot(const MachineInstr &MI, SmallVectorImpl< const MachineMemOperand * > &Accesses) const
If the specified machine instruction has a store to a stack slot, return true along with the FrameInd...
std::optional< DestSourcePair > isCopyInstr(const MachineInstr &MI) const
If the specific machine instruction is a instruction that moves/copies value from one register to ano...
virtual bool hasLoadFromStackSlot(const MachineInstr &MI, SmallVectorImpl< const MachineMemOperand * > &Accesses) const
If the specified machine instruction has a load from a stack slot, return true along with the FrameIn...
virtual unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const
If the specified machine instruction is a direct load from a stack slot, return the virtual or physic...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
const uint8_t AllocationPriority
Classes with a higher priority value are assigned first by register allocators using a greedy heurist...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool shouldUseDeferredSpillingForVirtReg(const MachineFunction &MF, const LiveInterval &VirtReg) const
Deferred spilling delays the spill insertion of a virtual register after every other allocation.
virtual bool shouldRegionSplitForVirtReg(const MachineFunction &MF, const LiveInterval &VirtReg) const
Region split has a high compile time cost especially for large live range.
virtual bool shouldUseLastChanceRecoloringForVirtReg(const MachineFunction &MF, const LiveInterval &VirtReg) const
Last chance recoloring has a high compile time cost especially for targets with a lot of registers.
virtual unsigned getCSRFirstUseCost() const
Allow the target to override the cost of using a callee-saved register for the first time.
LaneBitmask getCoveringLanes() const
The lane masks returned by getSubRegIndexLaneMask() above can only be used to determine if sub-regist...
ArrayRef< uint8_t > getRegisterCosts(const MachineFunction &MF) const
Get a list of cost values for all registers that correspond to the index returned by RegisterCostTabl...
virtual bool reverseLocalAssignment() const
Allow the target to reverse allocation order of local live ranges.
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
bool regsOverlap(Register RegA, Register RegB) const
Returns true if the two registers are equal or alias each other.
const char * getRegClassName(const TargetRegisterClass *Class) const
Returns the name of the register class.
virtual bool regClassPriorityTrumpsGlobalness(const MachineFunction &MF) const
When prioritizing live ranges in register allocation, if this hook returns true then the AllocationPr...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
bool hasKnownPreference(Register VirtReg) const
returns true if VirtReg has a known preferred register.
Definition: VirtRegMap.cpp:118
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
Definition: VirtRegMap.h:105
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
Definition: VirtRegMap.h:99
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:705
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< InstrNode * > Instr
Definition: RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createGreedyRegisterAllocator()
Greedy register allocation pass - This pass implements a global register allocator for optimized buil...
bool TimePassesIsEnabled
If the user specifies the -time-passes argument on an LLVM tool command line then the value of this b...
char & RAGreedyID
Greedy register allocator.
Spiller * createInlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap &VRM, VirtRegAuxInfo &VRAI)
Create and return a spiller that will insert spill code directly instead of deferring though VirtRegM...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:429
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1741
@ RS_Split2
Attempt more aggressive live range splitting that is guaranteed to make progress.
@ RS_Spill
Live range will be spilled. No more splitting will be attempted.
@ RS_Split
Attempt live range splitting if assignment is impossible.
@ RS_New
Newly created live range that has never been queued.
@ RS_Done
There is nothing more we can do to this live range.
@ RS_Assign
Only attempt assignment and eviction. Then requeue as RS_Split.
@ RS_Memory
Live range is in memory.
const float huge_valf
Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
Definition: MathExtras.cpp:28
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1946
InstructionCost Cost
static float normalizeSpillWeight(float UseDefFreq, unsigned Size, unsigned NumInstr)
Normalize the spill weight of a live interval.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
std::function< bool(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC)> RegClassFilterFunc
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
Definition: MathExtras.h:201
constexpr bool any() const
Definition: LaneBitmask.h:53
This class is basically a combination of TimeRegion and Timer.
Definition: Timer.h:163
BlockConstraint - Entry and exit constraints for a basic block.
BorderConstraint Exit
Constraint on block exit.
bool ChangesValue
True when this block changes the value of the live range.
BorderConstraint Entry
Constraint on block entry.
unsigned Number
Basic block number (from MBB::getNumber()).
Additional information about basic blocks where the current variable is live.
Definition: SplitKit.h:121
SlotIndex FirstDef
First non-phi valno->def, or SlotIndex().
Definition: SplitKit.h:125
bool LiveOut
Current reg is live out.
Definition: SplitKit.h:127
bool LiveIn
Current reg is live in.
Definition: SplitKit.h:126
MachineBasicBlock * MBB
Definition: SplitKit.h:122
SlotIndex LastInstr
Last instr accessing current reg.
Definition: SplitKit.h:124
SlotIndex FirstInstr
First instr accessing current reg.
Definition: SplitKit.h:123