LLVM 17.0.0git
RegAllocGreedy.cpp
Go to the documentation of this file.
1//===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the RAGreedy function pass for register allocation in
10// optimized builds.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RegAllocGreedy.h"
15#include "AllocationOrder.h"
16#include "InterferenceCache.h"
17#include "LiveDebugVariables.h"
18#include "RegAllocBase.h"
21#include "SpillPlacement.h"
22#include "SplitKit.h"
23#include "llvm/ADT/ArrayRef.h"
24#include "llvm/ADT/BitVector.h"
25#include "llvm/ADT/IndexedMap.h"
27#include "llvm/ADT/SmallSet.h"
29#include "llvm/ADT/Statistic.h"
30#include "llvm/ADT/StringRef.h"
61#include "llvm/IR/Function.h"
62#include "llvm/IR/LLVMContext.h"
65#include "llvm/Pass.h"
69#include "llvm/Support/Debug.h"
71#include "llvm/Support/Timer.h"
73#include <algorithm>
74#include <cassert>
75#include <cstdint>
76#include <utility>
77
78using namespace llvm;
79
80#define DEBUG_TYPE "regalloc"
81
82STATISTIC(NumGlobalSplits, "Number of split global live ranges");
83STATISTIC(NumLocalSplits, "Number of split local live ranges");
84STATISTIC(NumEvicted, "Number of interferences evicted");
85
87 "split-spill-mode", cl::Hidden,
88 cl::desc("Spill mode for splitting live ranges"),
89 cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
90 clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
91 clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
93
96 cl::desc("Last chance recoloring max depth"),
97 cl::init(5));
98
100 "lcr-max-interf", cl::Hidden,
101 cl::desc("Last chance recoloring maximum number of considered"
102 " interference at a time"),
103 cl::init(8));
104
106 "exhaustive-register-search", cl::NotHidden,
107 cl::desc("Exhaustive Search for registers bypassing the depth "
108 "and interference cutoffs of last chance recoloring"),
109 cl::Hidden);
110
112 "enable-deferred-spilling", cl::Hidden,
113 cl::desc("Instead of spilling a variable right away, defer the actual "
114 "code insertion to the end of the allocation. That way the "
115 "allocator might still find a suitable coloring for this "
116 "variable because of other evicted variables."),
117 cl::init(false));
118
119// FIXME: Find a good default for this flag and remove the flag.
121CSRFirstTimeCost("regalloc-csr-first-time-cost",
122 cl::desc("Cost for first time use of callee-saved register."),
123 cl::init(0), cl::Hidden);
124
126 "grow-region-complexity-budget",
127 cl::desc("growRegion() does not scale with the number of BB edges, so "
128 "limit its budget and bail out once we reach the limit."),
129 cl::init(10000), cl::Hidden);
130
132 "greedy-regclass-priority-trumps-globalness",
133 cl::desc("Change the greedy register allocator's live range priority "
134 "calculation to make the AllocationPriority of the register class "
135 "more important then whether the range is global"),
136 cl::Hidden);
137
139 "greedy-reverse-local-assignment",
140 cl::desc("Reverse allocation order of local live ranges, such that "
141 "shorter local live ranges will tend to be allocated first"),
142 cl::Hidden);
143
144static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
146
147char RAGreedy::ID = 0;
149
151 "Greedy Register Allocator", false, false)
155INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
156INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
169
170#ifndef NDEBUG
171const char *const RAGreedy::StageName[] = {
172 "RS_New",
173 "RS_Assign",
174 "RS_Split",
175 "RS_Split2",
176 "RS_Spill",
177 "RS_Memory",
178 "RS_Done"
179};
180#endif
181
182// Hysteresis to use when comparing floats.
183// This helps stabilize decisions based on float comparisons.
184const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
185
187 return new RAGreedy();
188}
189
191 return new RAGreedy(Ftor);
192}
193
196 RegAllocBase(F) {
197}
198
200 AU.setPreservesCFG();
225}
226
227//===----------------------------------------------------------------------===//
228// LiveRangeEdit delegate methods
229//===----------------------------------------------------------------------===//
230
231bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
232 LiveInterval &LI = LIS->getInterval(VirtReg);
233 if (VRM->hasPhys(VirtReg)) {
234 Matrix->unassign(LI);
236 return true;
237 }
238 // Unassigned virtreg is probably in the priority queue.
239 // RegAllocBase will erase it after dequeueing.
240 // Nonetheless, clear the live-range so that the debug
241 // dump will show the right state for that VirtReg.
242 LI.clear();
243 return false;
244}
245
246void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
247 if (!VRM->hasPhys(VirtReg))
248 return;
249
250 // Register is assigned, put it back on the queue for reassignment.
251 LiveInterval &LI = LIS->getInterval(VirtReg);
252 Matrix->unassign(LI);
254}
255
256void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
257 ExtraInfo->LRE_DidCloneVirtReg(New, Old);
258}
259
261 // Cloning a register we haven't even heard about yet? Just ignore it.
262 if (!Info.inBounds(Old))
263 return;
264
265 // LRE may clone a virtual register because dead code elimination causes it to
266 // be split into connected components. The new components are much smaller
267 // than the original, so they should get a new chance at being assigned.
268 // same stage as the parent.
269 Info[Old].Stage = RS_Assign;
270 Info.grow(New.id());
271 Info[New] = Info[Old];
272}
273
275 SpillerInstance.reset();
276 GlobalCand.clear();
277}
278
279void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); }
280
281void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
282 // Prioritize live ranges by size, assigning larger ranges first.
283 // The queue holds (size, reg) pairs.
284 const Register Reg = LI->reg();
285 assert(Reg.isVirtual() && "Can only enqueue virtual registers");
286
287 auto Stage = ExtraInfo->getOrInitStage(Reg);
288 if (Stage == RS_New) {
289 Stage = RS_Assign;
290 ExtraInfo->setStage(Reg, Stage);
291 }
292
293 unsigned Ret = PriorityAdvisor->getPriority(*LI);
294
295 // The virtual register number is a tie breaker for same-sized ranges.
296 // Give lower vreg numbers higher priority to assign them first.
297 CurQueue.push(std::make_pair(Ret, ~Reg));
298}
299
300unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const {
301 const unsigned Size = LI.getSize();
302 const Register Reg = LI.reg();
303 unsigned Prio;
304 LiveRangeStage Stage = RA.getExtraInfo().getStage(LI);
305
306 if (Stage == RS_Split) {
307 // Unsplit ranges that couldn't be allocated immediately are deferred until
308 // everything else has been allocated.
309 Prio = Size;
310 } else if (Stage == RS_Memory) {
311 // Memory operand should be considered last.
312 // Change the priority such that Memory operand are assigned in
313 // the reverse order that they came in.
314 // TODO: Make this a member variable and probably do something about hints.
315 static unsigned MemOp = 0;
316 Prio = MemOp++;
317 } else {
318 // Giant live ranges fall back to the global assignment heuristic, which
319 // prevents excessive spilling in pathological cases.
320 const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
321 bool ForceGlobal = RC.GlobalPriority ||
322 (!ReverseLocalAssignment &&
325 unsigned GlobalBit = 0;
326
327 if (Stage == RS_Assign && !ForceGlobal && !LI.empty() &&
328 LIS->intervalIsInOneMBB(LI)) {
329 // Allocate original local ranges in linear instruction order. Since they
330 // are singly defined, this produces optimal coloring in the absence of
331 // global interference and other constraints.
332 if (!ReverseLocalAssignment)
333 Prio = LI.beginIndex().getApproxInstrDistance(Indexes->getLastIndex());
334 else {
335 // Allocating bottom up may allow many short LRGs to be assigned first
336 // to one of the cheap registers. This could be much faster for very
337 // large blocks on targets with many physical registers.
338 Prio = Indexes->getZeroIndex().getApproxInstrDistance(LI.endIndex());
339 }
340 } else {
341 // Allocate global and split ranges in long->short order. Long ranges that
342 // don't fit should be spilled (or split) ASAP so they don't create
343 // interference. Mark a bit to prioritize global above local ranges.
344 Prio = Size;
345 GlobalBit = 1;
346 }
347
348 // Priority bit layout:
349 // 31 RS_Assign priority
350 // 30 Preference priority
351 // if (RegClassPriorityTrumpsGlobalness)
352 // 29-25 AllocPriority
353 // 24 GlobalBit
354 // else
355 // 29 Global bit
356 // 28-24 AllocPriority
357 // 0-23 Size/Instr distance
358
359 // Clamp the size to fit with the priority masking scheme
360 Prio = std::min(Prio, (unsigned)maxUIntN(24));
361 assert(isUInt<5>(RC.AllocationPriority) && "allocation priority overflow");
362
363 if (RegClassPriorityTrumpsGlobalness)
364 Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
365 else
366 Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
367
368 // Mark a higher bit to prioritize global and local above RS_Split.
369 Prio |= (1u << 31);
370
371 // Boost ranges that have a physical register hint.
372 if (VRM->hasKnownPreference(Reg))
373 Prio |= (1u << 30);
374 }
375
376 return Prio;
377}
378
379const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
380
381const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
382 if (CurQueue.empty())
383 return nullptr;
384 LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
385 CurQueue.pop();
386 return LI;
387}
388
389//===----------------------------------------------------------------------===//
390// Direct Assignment
391//===----------------------------------------------------------------------===//
392
393/// tryAssign - Try to assign VirtReg to an available register.
394MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
395 AllocationOrder &Order,
397 const SmallVirtRegSet &FixedRegisters) {
398 MCRegister PhysReg;
399 for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
400 assert(*I);
401 if (!Matrix->checkInterference(VirtReg, *I)) {
402 if (I.isHint())
403 return *I;
404 else
405 PhysReg = *I;
406 }
407 }
408 if (!PhysReg.isValid())
409 return PhysReg;
410
411 // PhysReg is available, but there may be a better choice.
412
413 // If we missed a simple hint, try to cheaply evict interference from the
414 // preferred register.
415 if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
416 if (Order.isHint(Hint)) {
417 MCRegister PhysHint = Hint.asMCReg();
418 LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
419
420 if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint,
421 FixedRegisters)) {
422 evictInterference(VirtReg, PhysHint, NewVRegs);
423 return PhysHint;
424 }
425 // Record the missed hint, we may be able to recover
426 // at the end if the surrounding allocation changed.
427 SetOfBrokenHints.insert(&VirtReg);
428 }
429
430 // Try to evict interference from a cheaper alternative.
431 uint8_t Cost = RegCosts[PhysReg];
432
433 // Most registers have 0 additional cost.
434 if (!Cost)
435 return PhysReg;
436
437 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
438 << (unsigned)Cost << '\n');
439 MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
440 return CheapReg ? CheapReg : PhysReg;
441}
442
443//===----------------------------------------------------------------------===//
444// Interference eviction
445//===----------------------------------------------------------------------===//
446
448 Register PrevReg) const {
449 auto Order =
451 MCRegister PhysReg;
452 for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
453 if ((*I).id() == PrevReg.id())
454 continue;
455
456 MCRegUnitIterator Units(*I, TRI);
457 for (; Units.isValid(); ++Units) {
458 // Instantiate a "subquery", not to be confused with the Queries array.
459 LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]);
460 if (subQ.checkInterference())
461 break;
462 }
463 // If no units have interference, break out with the current PhysReg.
464 if (!Units.isValid())
465 PhysReg = *I;
466 }
467 if (PhysReg)
468 LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
469 << printReg(PrevReg, TRI) << " to "
470 << printReg(PhysReg, TRI) << '\n');
471 return PhysReg;
472}
473
474/// evictInterference - Evict any interferring registers that prevent VirtReg
475/// from being assigned to Physreg. This assumes that canEvictInterference
476/// returned true.
477void RAGreedy::evictInterference(const LiveInterval &VirtReg,
478 MCRegister PhysReg,
479 SmallVectorImpl<Register> &NewVRegs) {
480 // Make sure that VirtReg has a cascade number, and assign that cascade
481 // number to every evicted register. These live ranges than then only be
482 // evicted by a newer cascade, preventing infinite loops.
483 unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg());
484
485 LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
486 << " interference: Cascade " << Cascade << '\n');
487
488 // Collect all interfering virtregs first.
490 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
491 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
492 // We usually have the interfering VRegs cached so collectInterferingVRegs()
493 // should be fast, we may need to recalculate if when different physregs
494 // overlap the same register unit so we had different SubRanges queried
495 // against it.
497 Intfs.append(IVR.begin(), IVR.end());
498 }
499
500 // Evict them second. This will invalidate the queries.
501 for (const LiveInterval *Intf : Intfs) {
502 // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
503 if (!VRM->hasPhys(Intf->reg()))
504 continue;
505
506 Matrix->unassign(*Intf);
507 assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
508 VirtReg.isSpillable() < Intf->isSpillable()) &&
509 "Cannot decrease cascade number, illegal eviction");
510 ExtraInfo->setCascade(Intf->reg(), Cascade);
511 ++NumEvicted;
512 NewVRegs.push_back(Intf->reg());
513 }
514}
515
516/// Returns true if the given \p PhysReg is a callee saved register and has not
517/// been used for allocation yet.
520 if (!CSR)
521 return false;
522
523 return !Matrix->isPhysRegUsed(PhysReg);
524}
525
526std::optional<unsigned>
528 const AllocationOrder &Order,
529 unsigned CostPerUseLimit) const {
530 unsigned OrderLimit = Order.getOrder().size();
531
532 if (CostPerUseLimit < uint8_t(~0u)) {
533 // Check of any registers in RC are below CostPerUseLimit.
534 const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
535 uint8_t MinCost = RegClassInfo.getMinCost(RC);
536 if (MinCost >= CostPerUseLimit) {
537 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
538 << MinCost << ", no cheaper registers to be found.\n");
539 return std::nullopt;
540 }
541
542 // It is normal for register classes to have a long tail of registers with
543 // the same cost. We don't need to look at them if they're too expensive.
544 if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) {
545 OrderLimit = RegClassInfo.getLastCostChange(RC);
546 LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
547 << " regs.\n");
548 }
549 }
550 return OrderLimit;
551}
552
554 MCRegister PhysReg) const {
555 if (RegCosts[PhysReg] >= CostPerUseLimit)
556 return false;
557 // The first use of a callee-saved register in a function has cost 1.
558 // Don't start using a CSR when the CostPerUseLimit is low.
559 if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
561 dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
563 << '\n');
564 return false;
565 }
566 return true;
567}
568
569/// tryEvict - Try to evict all interferences for a physreg.
570/// @param VirtReg Currently unassigned virtual register.
571/// @param Order Physregs to try.
572/// @return Physreg to assign VirtReg, or 0.
573MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg,
574 AllocationOrder &Order,
576 uint8_t CostPerUseLimit,
577 const SmallVirtRegSet &FixedRegisters) {
580
581 MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate(
582 VirtReg, Order, CostPerUseLimit, FixedRegisters);
583 if (BestPhys.isValid())
584 evictInterference(VirtReg, BestPhys, NewVRegs);
585 return BestPhys;
586}
587
588//===----------------------------------------------------------------------===//
589// Region Splitting
590//===----------------------------------------------------------------------===//
591
592/// addSplitConstraints - Fill out the SplitConstraints vector based on the
593/// interference pattern in Physreg and its aliases. Add the constraints to
594/// SpillPlacement and return the static cost of this split in Cost, assuming
595/// that all preferences in SplitConstraints are met.
596/// Return false if there are no bundles with positive bias.
597bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
598 BlockFrequency &Cost) {
599 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
600
601 // Reset interference dependent info.
602 SplitConstraints.resize(UseBlocks.size());
603 BlockFrequency StaticCost = 0;
604 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
605 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
606 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
607
608 BC.Number = BI.MBB->getNumber();
609 Intf.moveToBlock(BC.Number);
611 BC.Exit = (BI.LiveOut &&
615 BC.ChangesValue = BI.FirstDef.isValid();
616
617 if (!Intf.hasInterference())
618 continue;
619
620 // Number of spill code instructions to insert.
621 unsigned Ins = 0;
622
623 // Interference for the live-in value.
624 if (BI.LiveIn) {
625 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
627 ++Ins;
628 } else if (Intf.first() < BI.FirstInstr) {
630 ++Ins;
631 } else if (Intf.first() < BI.LastInstr) {
632 ++Ins;
633 }
634
635 // Abort if the spill cannot be inserted at the MBB' start
636 if (((BC.Entry == SpillPlacement::MustSpill) ||
639 SA->getFirstSplitPoint(BC.Number)))
640 return false;
641 }
642
643 // Interference for the live-out value.
644 if (BI.LiveOut) {
645 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
647 ++Ins;
648 } else if (Intf.last() > BI.LastInstr) {
650 ++Ins;
651 } else if (Intf.last() > BI.FirstInstr) {
652 ++Ins;
653 }
654 }
655
656 // Accumulate the total frequency of inserted spill code.
657 while (Ins--)
658 StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
659 }
660 Cost = StaticCost;
661
662 // Add constraints for use-blocks. Note that these are the only constraints
663 // that may add a positive bias, it is downhill from here.
664 SpillPlacer->addConstraints(SplitConstraints);
665 return SpillPlacer->scanActiveBundles();
666}
667
668/// addThroughConstraints - Add constraints and links to SpillPlacer from the
669/// live-through blocks in Blocks.
670bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
671 ArrayRef<unsigned> Blocks) {
672 const unsigned GroupSize = 8;
673 SpillPlacement::BlockConstraint BCS[GroupSize];
674 unsigned TBS[GroupSize];
675 unsigned B = 0, T = 0;
676
677 for (unsigned Number : Blocks) {
678 Intf.moveToBlock(Number);
679
680 if (!Intf.hasInterference()) {
681 assert(T < GroupSize && "Array overflow");
682 TBS[T] = Number;
683 if (++T == GroupSize) {
684 SpillPlacer->addLinks(ArrayRef(TBS, T));
685 T = 0;
686 }
687 continue;
688 }
689
690 assert(B < GroupSize && "Array overflow");
691 BCS[B].Number = Number;
692
693 // Abort if the spill cannot be inserted at the MBB' start
695 auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
696 if (FirstNonDebugInstr != MBB->end() &&
698 SA->getFirstSplitPoint(Number)))
699 return false;
700 // Interference for the live-in value.
701 if (Intf.first() <= Indexes->getMBBStartIdx(Number))
703 else
705
706 // Interference for the live-out value.
707 if (Intf.last() >= SA->getLastSplitPoint(Number))
709 else
711
712 if (++B == GroupSize) {
713 SpillPlacer->addConstraints(ArrayRef(BCS, B));
714 B = 0;
715 }
716 }
717
718 SpillPlacer->addConstraints(ArrayRef(BCS, B));
719 SpillPlacer->addLinks(ArrayRef(TBS, T));
720 return true;
721}
722
723bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
724 // Keep track of through blocks that have not been added to SpillPlacer.
725 BitVector Todo = SA->getThroughBlocks();
726 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
727 unsigned AddedTo = 0;
728#ifndef NDEBUG
729 unsigned Visited = 0;
730#endif
731
732 unsigned long Budget = GrowRegionComplexityBudget;
733 while (true) {
734 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
735 // Find new through blocks in the periphery of PrefRegBundles.
736 for (unsigned Bundle : NewBundles) {
737 // Look at all blocks connected to Bundle in the full graph.
738 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
739 // Limit compilation time by bailing out after we use all our budget.
740 if (Blocks.size() >= Budget)
741 return false;
742 Budget -= Blocks.size();
743 for (unsigned Block : Blocks) {
744 if (!Todo.test(Block))
745 continue;
746 Todo.reset(Block);
747 // This is a new through block. Add it to SpillPlacer later.
748 ActiveBlocks.push_back(Block);
749#ifndef NDEBUG
750 ++Visited;
751#endif
752 }
753 }
754 // Any new blocks to add?
755 if (ActiveBlocks.size() == AddedTo)
756 break;
757
758 // Compute through constraints from the interference, or assume that all
759 // through blocks prefer spilling when forming compact regions.
760 auto NewBlocks = ArrayRef(ActiveBlocks).slice(AddedTo);
761 if (Cand.PhysReg) {
762 if (!addThroughConstraints(Cand.Intf, NewBlocks))
763 return false;
764 } else
765 // Provide a strong negative bias on through blocks to prevent unwanted
766 // liveness on loop backedges.
767 SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
768 AddedTo = ActiveBlocks.size();
769
770 // Perhaps iterating can enable more bundles?
771 SpillPlacer->iterate();
772 }
773 LLVM_DEBUG(dbgs() << ", v=" << Visited);
774 return true;
775}
776
777/// calcCompactRegion - Compute the set of edge bundles that should be live
778/// when splitting the current live range into compact regions. Compact
779/// regions can be computed without looking at interference. They are the
780/// regions formed by removing all the live-through blocks from the live range.
781///
782/// Returns false if the current live range is already compact, or if the
783/// compact regions would form single block regions anyway.
784bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
785 // Without any through blocks, the live range is already compact.
786 if (!SA->getNumThroughBlocks())
787 return false;
788
789 // Compact regions don't correspond to any physreg.
790 Cand.reset(IntfCache, MCRegister::NoRegister);
791
792 LLVM_DEBUG(dbgs() << "Compact region bundles");
793
794 // Use the spill placer to determine the live bundles. GrowRegion pretends
795 // that all the through blocks have interference when PhysReg is unset.
796 SpillPlacer->prepare(Cand.LiveBundles);
797
798 // The static split cost will be zero since Cand.Intf reports no interference.
800 if (!addSplitConstraints(Cand.Intf, Cost)) {
801 LLVM_DEBUG(dbgs() << ", none.\n");
802 return false;
803 }
804
805 if (!growRegion(Cand)) {
806 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
807 return false;
808 }
809
810 SpillPlacer->finish();
811
812 if (!Cand.LiveBundles.any()) {
813 LLVM_DEBUG(dbgs() << ", none.\n");
814 return false;
815 }
816
817 LLVM_DEBUG({
818 for (int I : Cand.LiveBundles.set_bits())
819 dbgs() << " EB#" << I;
820 dbgs() << ".\n";
821 });
822 return true;
823}
824
825/// calcSpillCost - Compute how expensive it would be to split the live range in
826/// SA around all use blocks instead of forming bundle regions.
827BlockFrequency RAGreedy::calcSpillCost() {
829 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
830 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
831 unsigned Number = BI.MBB->getNumber();
832 // We normally only need one spill instruction - a load or a store.
833 Cost += SpillPlacer->getBlockFrequency(Number);
834
835 // Unless the value is redefined in the block.
836 if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
837 Cost += SpillPlacer->getBlockFrequency(Number);
838 }
839 return Cost;
840}
841
842/// calcGlobalSplitCost - Return the global split cost of following the split
843/// pattern in LiveBundles. This cost should be added to the local cost of the
844/// interference pattern in SplitConstraints.
845///
846BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
847 const AllocationOrder &Order) {
848 BlockFrequency GlobalCost = 0;
849 const BitVector &LiveBundles = Cand.LiveBundles;
850 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
851 for (unsigned I = 0; I != UseBlocks.size(); ++I) {
852 const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
853 SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
854 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, false)];
855 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
856 unsigned Ins = 0;
857
858 Cand.Intf.moveToBlock(BC.Number);
859
860 if (BI.LiveIn)
861 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
862 if (BI.LiveOut)
863 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
864 while (Ins--)
865 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
866 }
867
868 for (unsigned Number : Cand.ActiveBlocks) {
869 bool RegIn = LiveBundles[Bundles->getBundle(Number, false)];
870 bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
871 if (!RegIn && !RegOut)
872 continue;
873 if (RegIn && RegOut) {
874 // We need double spill code if this block has interference.
875 Cand.Intf.moveToBlock(Number);
876 if (Cand.Intf.hasInterference()) {
877 GlobalCost += SpillPlacer->getBlockFrequency(Number);
878 GlobalCost += SpillPlacer->getBlockFrequency(Number);
879 }
880 continue;
881 }
882 // live-in / stack-out or stack-in live-out.
883 GlobalCost += SpillPlacer->getBlockFrequency(Number);
884 }
885 return GlobalCost;
886}
887
888/// splitAroundRegion - Split the current live range around the regions
889/// determined by BundleCand and GlobalCand.
890///
891/// Before calling this function, GlobalCand and BundleCand must be initialized
892/// so each bundle is assigned to a valid candidate, or NoCand for the
893/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor
894/// objects must be initialized for the current live range, and intervals
895/// created for the used candidates.
896///
897/// @param LREdit The LiveRangeEdit object handling the current split.
898/// @param UsedCands List of used GlobalCand entries. Every BundleCand value
899/// must appear in this list.
900void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
901 ArrayRef<unsigned> UsedCands) {
902 // These are the intervals created for new global ranges. We may create more
903 // intervals for local ranges.
904 const unsigned NumGlobalIntvs = LREdit.size();
905 LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs
906 << " globals.\n");
907 assert(NumGlobalIntvs && "No global intervals configured");
908
909 // Isolate even single instructions when dealing with a proper sub-class.
910 // That guarantees register class inflation for the stack interval because it
911 // is all copies.
912 Register Reg = SA->getParent().reg();
913 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
914
915 // First handle all the blocks with uses.
916 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
917 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
918 unsigned Number = BI.MBB->getNumber();
919 unsigned IntvIn = 0, IntvOut = 0;
920 SlotIndex IntfIn, IntfOut;
921 if (BI.LiveIn) {
922 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
923 if (CandIn != NoCand) {
924 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
925 IntvIn = Cand.IntvIdx;
926 Cand.Intf.moveToBlock(Number);
927 IntfIn = Cand.Intf.first();
928 }
929 }
930 if (BI.LiveOut) {
931 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
932 if (CandOut != NoCand) {
933 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
934 IntvOut = Cand.IntvIdx;
935 Cand.Intf.moveToBlock(Number);
936 IntfOut = Cand.Intf.last();
937 }
938 }
939
940 // Create separate intervals for isolated blocks with multiple uses.
941 if (!IntvIn && !IntvOut) {
942 LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
943 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
944 SE->splitSingleBlock(BI);
945 continue;
946 }
947
948 if (IntvIn && IntvOut)
949 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
950 else if (IntvIn)
951 SE->splitRegInBlock(BI, IntvIn, IntfIn);
952 else
953 SE->splitRegOutBlock(BI, IntvOut, IntfOut);
954 }
955
956 // Handle live-through blocks. The relevant live-through blocks are stored in
957 // the ActiveBlocks list with each candidate. We need to filter out
958 // duplicates.
959 BitVector Todo = SA->getThroughBlocks();
960 for (unsigned UsedCand : UsedCands) {
961 ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks;
962 for (unsigned Number : Blocks) {
963 if (!Todo.test(Number))
964 continue;
965 Todo.reset(Number);
966
967 unsigned IntvIn = 0, IntvOut = 0;
968 SlotIndex IntfIn, IntfOut;
969
970 unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
971 if (CandIn != NoCand) {
972 GlobalSplitCandidate &Cand = GlobalCand[CandIn];
973 IntvIn = Cand.IntvIdx;
974 Cand.Intf.moveToBlock(Number);
975 IntfIn = Cand.Intf.first();
976 }
977
978 unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
979 if (CandOut != NoCand) {
980 GlobalSplitCandidate &Cand = GlobalCand[CandOut];
981 IntvOut = Cand.IntvIdx;
982 Cand.Intf.moveToBlock(Number);
983 IntfOut = Cand.Intf.last();
984 }
985 if (!IntvIn && !IntvOut)
986 continue;
987 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
988 }
989 }
990
991 ++NumGlobalSplits;
992
994 SE->finish(&IntvMap);
995 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
996
997 unsigned OrigBlocks = SA->getNumLiveBlocks();
998
999 // Sort out the new intervals created by splitting. We get four kinds:
1000 // - Remainder intervals should not be split again.
1001 // - Candidate intervals can be assigned to Cand.PhysReg.
1002 // - Block-local splits are candidates for local splitting.
1003 // - DCE leftovers should go back on the queue.
1004 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1005 const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
1006
1007 // Ignore old intervals from DCE.
1008 if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New)
1009 continue;
1010
1011 // Remainder interval. Don't try splitting again, spill if it doesn't
1012 // allocate.
1013 if (IntvMap[I] == 0) {
1014 ExtraInfo->setStage(Reg, RS_Spill);
1015 continue;
1016 }
1017
1018 // Global intervals. Allow repeated splitting as long as the number of live
1019 // blocks is strictly decreasing.
1020 if (IntvMap[I] < NumGlobalIntvs) {
1021 if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
1022 LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
1023 << " blocks as original.\n");
1024 // Don't allow repeated splitting as a safe guard against looping.
1025 ExtraInfo->setStage(Reg, RS_Split2);
1026 }
1027 continue;
1028 }
1029
1030 // Other intervals are treated as new. This includes local intervals created
1031 // for blocks with multiple uses, and anything created by DCE.
1032 }
1033
1034 if (VerifyEnabled)
1035 MF->verify(this, "After splitting live range around region");
1036}
1037
1038MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
1039 AllocationOrder &Order,
1040 SmallVectorImpl<Register> &NewVRegs) {
1041 if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
1043 unsigned NumCands = 0;
1044 BlockFrequency SpillCost = calcSpillCost();
1045 BlockFrequency BestCost;
1046
1047 // Check if we can split this live range around a compact region.
1048 bool HasCompact = calcCompactRegion(GlobalCand.front());
1049 if (HasCompact) {
1050 // Yes, keep GlobalCand[0] as the compact region candidate.
1051 NumCands = 1;
1053 } else {
1054 // No benefit from the compact region, our fallback will be per-block
1055 // splitting. Make sure we find a solution that is cheaper than spilling.
1056 BestCost = SpillCost;
1057 LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = ";
1058 MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
1059 }
1060
1061 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
1062 NumCands, false /*IgnoreCSR*/);
1063
1064 // No solutions found, fall back to single block splitting.
1065 if (!HasCompact && BestCand == NoCand)
1067
1068 return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
1069}
1070
1071unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
1072 AllocationOrder &Order,
1073 BlockFrequency &BestCost,
1074 unsigned &NumCands,
1075 bool IgnoreCSR) {
1076 unsigned BestCand = NoCand;
1077 for (MCPhysReg PhysReg : Order) {
1078 assert(PhysReg);
1079 if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
1080 continue;
1081
1082 // Discard bad candidates before we run out of interference cache cursors.
1083 // This will only affect register classes with a lot of registers (>32).
1084 if (NumCands == IntfCache.getMaxCursors()) {
1085 unsigned WorstCount = ~0u;
1086 unsigned Worst = 0;
1087 for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
1088 if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
1089 continue;
1090 unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
1091 if (Count < WorstCount) {
1092 Worst = CandIndex;
1093 WorstCount = Count;
1094 }
1095 }
1096 --NumCands;
1097 GlobalCand[Worst] = GlobalCand[NumCands];
1098 if (BestCand == NumCands)
1099 BestCand = Worst;
1100 }
1101
1102 if (GlobalCand.size() <= NumCands)
1103 GlobalCand.resize(NumCands+1);
1104 GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1105 Cand.reset(IntfCache, PhysReg);
1106
1107 SpillPlacer->prepare(Cand.LiveBundles);
1109 if (!addSplitConstraints(Cand.Intf, Cost)) {
1110 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
1111 continue;
1112 }
1113 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = ";
1114 MBFI->printBlockFreq(dbgs(), Cost));
1115 if (Cost >= BestCost) {
1116 LLVM_DEBUG({
1117 if (BestCand == NoCand)
1118 dbgs() << " worse than no bundles\n";
1119 else
1120 dbgs() << " worse than "
1121 << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
1122 });
1123 continue;
1124 }
1125 if (!growRegion(Cand)) {
1126 LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
1127 continue;
1128 }
1129
1130 SpillPlacer->finish();
1131
1132 // No live bundles, defer to splitSingleBlocks().
1133 if (!Cand.LiveBundles.any()) {
1134 LLVM_DEBUG(dbgs() << " no bundles.\n");
1135 continue;
1136 }
1137
1138 Cost += calcGlobalSplitCost(Cand, Order);
1139 LLVM_DEBUG({
1140 dbgs() << ", total = ";
1141 MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
1142 for (int I : Cand.LiveBundles.set_bits())
1143 dbgs() << " EB#" << I;
1144 dbgs() << ".\n";
1145 });
1146 if (Cost < BestCost) {
1147 BestCand = NumCands;
1148 BestCost = Cost;
1149 }
1150 ++NumCands;
1151 }
1152
1153 return BestCand;
1154}
1155
1156unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
1157 bool HasCompact,
1158 SmallVectorImpl<Register> &NewVRegs) {
1159 SmallVector<unsigned, 8> UsedCands;
1160 // Prepare split editor.
1161 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1162 SE->reset(LREdit, SplitSpillMode);
1163
1164 // Assign all edge bundles to the preferred candidate, or NoCand.
1165 BundleCand.assign(Bundles->getNumBundles(), NoCand);
1166
1167 // Assign bundles for the best candidate region.
1168 if (BestCand != NoCand) {
1169 GlobalSplitCandidate &Cand = GlobalCand[BestCand];
1170 if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
1171 UsedCands.push_back(BestCand);
1172 Cand.IntvIdx = SE->openIntv();
1173 LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
1174 << B << " bundles, intv " << Cand.IntvIdx << ".\n");
1175 (void)B;
1176 }
1177 }
1178
1179 // Assign bundles for the compact region.
1180 if (HasCompact) {
1181 GlobalSplitCandidate &Cand = GlobalCand.front();
1182 assert(!Cand.PhysReg && "Compact region has no physreg");
1183 if (unsigned B = Cand.getBundles(BundleCand, 0)) {
1184 UsedCands.push_back(0);
1185 Cand.IntvIdx = SE->openIntv();
1186 LLVM_DEBUG(dbgs() << "Split for compact region in " << B
1187 << " bundles, intv " << Cand.IntvIdx << ".\n");
1188 (void)B;
1189 }
1190 }
1191
1192 splitAroundRegion(LREdit, UsedCands);
1193 return 0;
1194}
1195
1196//===----------------------------------------------------------------------===//
1197// Per-Block Splitting
1198//===----------------------------------------------------------------------===//
1199
1200/// tryBlockSplit - Split a global live range around every block with uses. This
1201/// creates a lot of local live ranges, that will be split by tryLocalSplit if
1202/// they don't allocate.
1203unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg,
1204 AllocationOrder &Order,
1205 SmallVectorImpl<Register> &NewVRegs) {
1206 assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
1207 Register Reg = VirtReg.reg();
1208 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1209 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1210 SE->reset(LREdit, SplitSpillMode);
1211 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1212 for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
1213 if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1214 SE->splitSingleBlock(BI);
1215 }
1216 // No blocks were split.
1217 if (LREdit.empty())
1218 return 0;
1219
1220 // We did split for some blocks.
1222 SE->finish(&IntvMap);
1223
1224 // Tell LiveDebugVariables about the new ranges.
1225 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1226
1227 // Sort out the new intervals created by splitting. The remainder interval
1228 // goes straight to spilling, the new local ranges get to stay RS_New.
1229 for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1230 const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
1231 if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
1232 ExtraInfo->setStage(LI, RS_Spill);
1233 }
1234
1235 if (VerifyEnabled)
1236 MF->verify(this, "After splitting live range around basic blocks");
1237 return 0;
1238}
1239
1240//===----------------------------------------------------------------------===//
1241// Per-Instruction Splitting
1242//===----------------------------------------------------------------------===//
1243
1244/// Get the number of allocatable registers that match the constraints of \p Reg
1245/// on \p MI and that are also in \p SuperRC.
1247 const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
1248 const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
1249 const RegisterClassInfo &RCI) {
1250 assert(SuperRC && "Invalid register class");
1251
1252 const TargetRegisterClass *ConstrainedRC =
1253 MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
1254 /* ExploreBundle */ true);
1255 if (!ConstrainedRC)
1256 return 0;
1257 return RCI.getNumAllocatableRegs(ConstrainedRC);
1258}
1259
1261 const TargetRegisterInfo &TRI,
1262 const MachineInstr &MI, Register Reg) {
1263 LaneBitmask Mask;
1264 for (const MachineOperand &MO : MI.operands()) {
1265 if (!MO.isReg() || MO.getReg() != Reg)
1266 continue;
1267
1268 unsigned SubReg = MO.getSubReg();
1269 if (SubReg == 0 && MO.isUse()) {
1270 Mask |= MRI.getMaxLaneMaskForVReg(Reg);
1271 continue;
1272 }
1273
1275 if (MO.isDef()) {
1276 if (!MO.isUndef())
1277 Mask |= ~SubRegMask;
1278 } else
1279 Mask |= SubRegMask;
1280 }
1281
1282 return Mask;
1283}
1284
1285/// Return true if \p MI at \P Use reads a subset of the lanes live in \p
1286/// VirtReg.
1288 const MachineInstr *MI, const LiveInterval &VirtReg,
1290 // Early check the common case.
1291 if (MI->isCopy() &&
1292 MI->getOperand(0).getSubReg() == MI->getOperand(1).getSubReg())
1293 return false;
1294
1295 // FIXME: We're only considering uses, but should be consider defs too?
1296 LaneBitmask ReadMask = getInstReadLaneMask(MRI, *TRI, *MI, VirtReg.reg());
1297
1298 LaneBitmask LiveAtMask;
1299 for (const LiveInterval::SubRange &S : VirtReg.subranges()) {
1300 if (S.liveAt(Use))
1301 LiveAtMask |= S.LaneMask;
1302 }
1303
1304 // If the live lanes aren't different from the lanes used by the instruction,
1305 // this doesn't help.
1306 return (ReadMask & ~(LiveAtMask & TRI->getCoveringLanes())).any();
1307}
1308
1309/// tryInstructionSplit - Split a live range around individual instructions.
1310/// This is normally not worthwhile since the spiller is doing essentially the
1311/// same thing. However, when the live range is in a constrained register
1312/// class, it may help to insert copies such that parts of the live range can
1313/// be moved to a larger register class.
1314///
1315/// This is similar to spilling to a larger register class.
1316unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
1317 AllocationOrder &Order,
1318 SmallVectorImpl<Register> &NewVRegs) {
1319 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1320 // There is no point to this if there are no larger sub-classes.
1321
1322 bool SplitSubClass = true;
1323 if (!RegClassInfo.isProperSubClass(CurRC)) {
1324 if (!VirtReg.hasSubRanges())
1325 return 0;
1326 SplitSubClass = false;
1327 }
1328
1329 // Always enable split spill mode, since we're effectively spilling to a
1330 // register.
1331 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1332 SE->reset(LREdit, SplitEditor::SM_Size);
1333
1334 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1335 if (Uses.size() <= 1)
1336 return 0;
1337
1338 LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
1339 << " individual instrs.\n");
1340
1341 const TargetRegisterClass *SuperRC =
1342 TRI->getLargestLegalSuperClass(CurRC, *MF);
1343 unsigned SuperRCNumAllocatableRegs =
1345 // Split around every non-copy instruction if this split will relax
1346 // the constraints on the virtual register.
1347 // Otherwise, splitting just inserts uncoalescable copies that do not help
1348 // the allocation.
1349 for (const SlotIndex Use : Uses) {
1350 if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) {
1351 if (MI->isFullCopy() ||
1352 (SplitSubClass &&
1353 SuperRCNumAllocatableRegs ==
1354 getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
1355 TII, TRI, RegClassInfo)) ||
1356 // TODO: Handle split for subranges with subclass constraints?
1357 (!SplitSubClass && VirtReg.hasSubRanges() &&
1358 !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use))) {
1359 LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
1360 continue;
1361 }
1362 }
1363 SE->openIntv();
1364 SlotIndex SegStart = SE->enterIntvBefore(Use);
1365 SlotIndex SegStop = SE->leaveIntvAfter(Use);
1366 SE->useIntv(SegStart, SegStop);
1367 }
1368
1369 if (LREdit.empty()) {
1370 LLVM_DEBUG(dbgs() << "All uses were copies.\n");
1371 return 0;
1372 }
1373
1375 SE->finish(&IntvMap);
1376 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1377 // Assign all new registers to RS_Spill. This was the last chance.
1378 ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill);
1379 return 0;
1380}
1381
1382//===----------------------------------------------------------------------===//
1383// Local Splitting
1384//===----------------------------------------------------------------------===//
1385
1386/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
1387/// in order to use PhysReg between two entries in SA->UseSlots.
1388///
1389/// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
1390///
1391void RAGreedy::calcGapWeights(MCRegister PhysReg,
1392 SmallVectorImpl<float> &GapWeight) {
1393 assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1394 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1395 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1396 const unsigned NumGaps = Uses.size()-1;
1397
1398 // Start and end points for the interference check.
1399 SlotIndex StartIdx =
1401 SlotIndex StopIdx =
1403
1404 GapWeight.assign(NumGaps, 0.0f);
1405
1406 // Add interference from each overlapping register.
1407 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1408 if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units)
1410 continue;
1411
1412 // We know that VirtReg is a continuous interval from FirstInstr to
1413 // LastInstr, so we don't need InterferenceQuery.
1414 //
1415 // Interference that overlaps an instruction is counted in both gaps
1416 // surrounding the instruction. The exception is interference before
1417 // StartIdx and after StopIdx.
1418 //
1420 Matrix->getLiveUnions()[*Units] .find(StartIdx);
1421 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
1422 // Skip the gaps before IntI.
1423 while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
1424 if (++Gap == NumGaps)
1425 break;
1426 if (Gap == NumGaps)
1427 break;
1428
1429 // Update the gaps covered by IntI.
1430 const float weight = IntI.value()->weight();
1431 for (; Gap != NumGaps; ++Gap) {
1432 GapWeight[Gap] = std::max(GapWeight[Gap], weight);
1433 if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
1434 break;
1435 }
1436 if (Gap == NumGaps)
1437 break;
1438 }
1439 }
1440
1441 // Add fixed interference.
1442 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1443 const LiveRange &LR = LIS->getRegUnit(*Units);
1444 LiveRange::const_iterator I = LR.find(StartIdx);
1446
1447 // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
1448 for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
1449 while (Uses[Gap+1].getBoundaryIndex() < I->start)
1450 if (++Gap == NumGaps)
1451 break;
1452 if (Gap == NumGaps)
1453 break;
1454
1455 for (; Gap != NumGaps; ++Gap) {
1456 GapWeight[Gap] = huge_valf;
1457 if (Uses[Gap+1].getBaseIndex() >= I->end)
1458 break;
1459 }
1460 if (Gap == NumGaps)
1461 break;
1462 }
1463 }
1464}
1465
1466/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
1467/// basic block.
1468///
1469unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
1470 AllocationOrder &Order,
1471 SmallVectorImpl<Register> &NewVRegs) {
1472 // TODO: the function currently only handles a single UseBlock; it should be
1473 // possible to generalize.
1474 if (SA->getUseBlocks().size() != 1)
1475 return 0;
1476
1477 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1478
1479 // Note that it is possible to have an interval that is live-in or live-out
1480 // while only covering a single block - A phi-def can use undef values from
1481 // predecessors, and the block could be a single-block loop.
1482 // We don't bother doing anything clever about such a case, we simply assume
1483 // that the interval is continuous from FirstInstr to LastInstr. We should
1484 // make sure that we don't do anything illegal to such an interval, though.
1485
1486 ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1487 if (Uses.size() <= 2)
1488 return 0;
1489 const unsigned NumGaps = Uses.size()-1;
1490
1491 LLVM_DEBUG({
1492 dbgs() << "tryLocalSplit: ";
1493 for (const auto &Use : Uses)
1494 dbgs() << ' ' << Use;
1495 dbgs() << '\n';
1496 });
1497
1498 // If VirtReg is live across any register mask operands, compute a list of
1499 // gaps with register masks.
1500 SmallVector<unsigned, 8> RegMaskGaps;
1501 if (Matrix->checkRegMaskInterference(VirtReg)) {
1502 // Get regmask slots for the whole block.
1504 LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
1505 // Constrain to VirtReg's live range.
1506 unsigned RI =
1507 llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
1508 unsigned RE = RMS.size();
1509 for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
1510 // Look for Uses[I] <= RMS <= Uses[I + 1].
1512 if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
1513 continue;
1514 // Skip a regmask on the same instruction as the last use. It doesn't
1515 // overlap the live range.
1516 if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
1517 break;
1518 LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
1519 << Uses[I + 1]);
1520 RegMaskGaps.push_back(I);
1521 // Advance ri to the next gap. A regmask on one of the uses counts in
1522 // both gaps.
1523 while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
1524 ++RI;
1525 }
1526 LLVM_DEBUG(dbgs() << '\n');
1527 }
1528
1529 // Since we allow local split results to be split again, there is a risk of
1530 // creating infinite loops. It is tempting to require that the new live
1531 // ranges have less instructions than the original. That would guarantee
1532 // convergence, but it is too strict. A live range with 3 instructions can be
1533 // split 2+3 (including the COPY), and we want to allow that.
1534 //
1535 // Instead we use these rules:
1536 //
1537 // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
1538 // noop split, of course).
1539 // 2. Require progress be made for ranges with getStage() == RS_Split2. All
1540 // the new ranges must have fewer instructions than before the split.
1541 // 3. New ranges with the same number of instructions are marked RS_Split2,
1542 // smaller ranges are marked RS_New.
1543 //
1544 // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
1545 // excessive splitting and infinite loops.
1546 //
1547 bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2;
1548
1549 // Best split candidate.
1550 unsigned BestBefore = NumGaps;
1551 unsigned BestAfter = 0;
1552 float BestDiff = 0;
1553
1554 const float blockFreq =
1555 SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
1556 (1.0f / MBFI->getEntryFreq());
1557 SmallVector<float, 8> GapWeight;
1558
1559 for (MCPhysReg PhysReg : Order) {
1560 assert(PhysReg);
1561 // Keep track of the largest spill weight that would need to be evicted in
1562 // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
1563 calcGapWeights(PhysReg, GapWeight);
1564
1565 // Remove any gaps with regmask clobbers.
1566 if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
1567 for (unsigned I = 0, E = RegMaskGaps.size(); I != E; ++I)
1568 GapWeight[RegMaskGaps[I]] = huge_valf;
1569
1570 // Try to find the best sequence of gaps to close.
1571 // The new spill weight must be larger than any gap interference.
1572
1573 // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
1574 unsigned SplitBefore = 0, SplitAfter = 1;
1575
1576 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
1577 // It is the spill weight that needs to be evicted.
1578 float MaxGap = GapWeight[0];
1579
1580 while (true) {
1581 // Live before/after split?
1582 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
1583 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
1584
1585 LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
1586 << '-' << Uses[SplitAfter] << " I=" << MaxGap);
1587
1588 // Stop before the interval gets so big we wouldn't be making progress.
1589 if (!LiveBefore && !LiveAfter) {
1590 LLVM_DEBUG(dbgs() << " all\n");
1591 break;
1592 }
1593 // Should the interval be extended or shrunk?
1594 bool Shrink = true;
1595
1596 // How many gaps would the new range have?
1597 unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
1598
1599 // Legally, without causing looping?
1600 bool Legal = !ProgressRequired || NewGaps < NumGaps;
1601
1602 if (Legal && MaxGap < huge_valf) {
1603 // Estimate the new spill weight. Each instruction reads or writes the
1604 // register. Conservatively assume there are no read-modify-write
1605 // instructions.
1606 //
1607 // Try to guess the size of the new interval.
1608 const float EstWeight = normalizeSpillWeight(
1609 blockFreq * (NewGaps + 1),
1610 Uses[SplitBefore].distance(Uses[SplitAfter]) +
1611 (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
1612 1);
1613 // Would this split be possible to allocate?
1614 // Never allocate all gaps, we wouldn't be making progress.
1615 LLVM_DEBUG(dbgs() << " w=" << EstWeight);
1616 if (EstWeight * Hysteresis >= MaxGap) {
1617 Shrink = false;
1618 float Diff = EstWeight - MaxGap;
1619 if (Diff > BestDiff) {
1620 LLVM_DEBUG(dbgs() << " (best)");
1621 BestDiff = Hysteresis * Diff;
1622 BestBefore = SplitBefore;
1623 BestAfter = SplitAfter;
1624 }
1625 }
1626 }
1627
1628 // Try to shrink.
1629 if (Shrink) {
1630 if (++SplitBefore < SplitAfter) {
1631 LLVM_DEBUG(dbgs() << " shrink\n");
1632 // Recompute the max when necessary.
1633 if (GapWeight[SplitBefore - 1] >= MaxGap) {
1634 MaxGap = GapWeight[SplitBefore];
1635 for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
1636 MaxGap = std::max(MaxGap, GapWeight[I]);
1637 }
1638 continue;
1639 }
1640 MaxGap = 0;
1641 }
1642
1643 // Try to extend the interval.
1644 if (SplitAfter >= NumGaps) {
1645 LLVM_DEBUG(dbgs() << " end\n");
1646 break;
1647 }
1648
1649 LLVM_DEBUG(dbgs() << " extend\n");
1650 MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
1651 }
1652 }
1653
1654 // Didn't find any candidates?
1655 if (BestBefore == NumGaps)
1656 return 0;
1657
1658 LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-'
1659 << Uses[BestAfter] << ", " << BestDiff << ", "
1660 << (BestAfter - BestBefore + 1) << " instrs\n");
1661
1662 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1663 SE->reset(LREdit);
1664
1665 SE->openIntv();
1666 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
1667 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]);
1668 SE->useIntv(SegStart, SegStop);
1670 SE->finish(&IntvMap);
1671 DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1672 // If the new range has the same number of instructions as before, mark it as
1673 // RS_Split2 so the next split will be forced to make progress. Otherwise,
1674 // leave the new intervals as RS_New so they can compete.
1675 bool LiveBefore = BestBefore != 0 || BI.LiveIn;
1676 bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
1677 unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
1678 if (NewGaps >= NumGaps) {
1679 LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
1680 assert(!ProgressRequired && "Didn't make progress when it was required.");
1681 for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
1682 if (IntvMap[I] == 1) {
1683 ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
1684 LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
1685 }
1686 LLVM_DEBUG(dbgs() << '\n');
1687 }
1688 ++NumLocalSplits;
1689
1690 return 0;
1691}
1692
1693//===----------------------------------------------------------------------===//
1694// Live Range Splitting
1695//===----------------------------------------------------------------------===//
1696
1697/// trySplit - Try to split VirtReg or one of its interferences, making it
1698/// assignable.
1699/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
1700unsigned RAGreedy::trySplit(const LiveInterval &VirtReg, AllocationOrder &Order,
1701 SmallVectorImpl<Register> &NewVRegs,
1702 const SmallVirtRegSet &FixedRegisters) {
1703 // Ranges must be Split2 or less.
1704 if (ExtraInfo->getStage(VirtReg) >= RS_Spill)
1705 return 0;
1706
1707 // Local intervals are handled separately.
1708 if (LIS->intervalIsInOneMBB(VirtReg)) {
1709 NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
1711 SA->analyze(&VirtReg);
1712 Register PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
1713 if (PhysReg || !NewVRegs.empty())
1714 return PhysReg;
1715 return tryInstructionSplit(VirtReg, Order, NewVRegs);
1716 }
1717
1718 NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
1720
1721 SA->analyze(&VirtReg);
1722
1723 // First try to split around a region spanning multiple blocks. RS_Split2
1724 // ranges already made dubious progress with region splitting, so they go
1725 // straight to single block splitting.
1726 if (ExtraInfo->getStage(VirtReg) < RS_Split2) {
1727 MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
1728 if (PhysReg || !NewVRegs.empty())
1729 return PhysReg;
1730 }
1731
1732 // Then isolate blocks.
1733 return tryBlockSplit(VirtReg, Order, NewVRegs);
1734}
1735
1736//===----------------------------------------------------------------------===//
1737// Last Chance Recoloring
1738//===----------------------------------------------------------------------===//
1739
1740/// Return true if \p reg has any tied def operand.
1741static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
1742 for (const MachineOperand &MO : MRI->def_operands(reg))
1743 if (MO.isTied())
1744 return true;
1745
1746 return false;
1747}
1748
1749/// Return true if the existing assignment of \p Intf overlaps, but is not the
1750/// same, as \p PhysReg.
1752 const VirtRegMap &VRM,
1753 MCRegister PhysReg,
1754 const LiveInterval &Intf) {
1755 MCRegister AssignedReg = VRM.getPhys(Intf.reg());
1756 if (PhysReg == AssignedReg)
1757 return false;
1758 return TRI.regsOverlap(PhysReg, AssignedReg);
1759}
1760
1761/// mayRecolorAllInterferences - Check if the virtual registers that
1762/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
1763/// recolored to free \p PhysReg.
1764/// When true is returned, \p RecoloringCandidates has been augmented with all
1765/// the live intervals that need to be recolored in order to free \p PhysReg
1766/// for \p VirtReg.
1767/// \p FixedRegisters contains all the virtual registers that cannot be
1768/// recolored.
1769bool RAGreedy::mayRecolorAllInterferences(
1770 MCRegister PhysReg, const LiveInterval &VirtReg,
1771 SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
1772 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1773
1774 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1775 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
1776 // If there is LastChanceRecoloringMaxInterference or more interferences,
1777 // chances are one would not be recolorable.
1781 LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
1782 CutOffInfo |= CO_Interf;
1783 return false;
1784 }
1785 for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
1786 // If Intf is done and sits on the same register class as VirtReg, it
1787 // would not be recolorable as it is in the same state as
1788 // VirtReg. However there are at least two exceptions.
1789 //
1790 // If VirtReg has tied defs and Intf doesn't, then
1791 // there is still a point in examining if it can be recolorable.
1792 //
1793 // Additionally, if the register class has overlapping tuple members, it
1794 // may still be recolorable using a different tuple. This is more likely
1795 // if the existing assignment aliases with the candidate.
1796 //
1797 if (((ExtraInfo->getStage(*Intf) == RS_Done &&
1798 MRI->getRegClass(Intf->reg()) == CurRC &&
1799 !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
1800 !(hasTiedDef(MRI, VirtReg.reg()) &&
1801 !hasTiedDef(MRI, Intf->reg()))) ||
1802 FixedRegisters.count(Intf->reg())) {
1803 LLVM_DEBUG(
1804 dbgs() << "Early abort: the interference is not recolorable.\n");
1805 return false;
1806 }
1807 RecoloringCandidates.insert(Intf);
1808 }
1809 }
1810 return true;
1811}
1812
1813/// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
1814/// its interferences.
1815/// Last chance recoloring chooses a color for \p VirtReg and recolors every
1816/// virtual register that was using it. The recoloring process may recursively
1817/// use the last chance recoloring. Therefore, when a virtual register has been
1818/// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
1819/// be last-chance-recolored again during this recoloring "session".
1820/// E.g.,
1821/// Let
1822/// vA can use {R1, R2 }
1823/// vB can use { R2, R3}
1824/// vC can use {R1 }
1825/// Where vA, vB, and vC cannot be split anymore (they are reloads for
1826/// instance) and they all interfere.
1827///
1828/// vA is assigned R1
1829/// vB is assigned R2
1830/// vC tries to evict vA but vA is already done.
1831/// Regular register allocation fails.
1832///
1833/// Last chance recoloring kicks in:
1834/// vC does as if vA was evicted => vC uses R1.
1835/// vC is marked as fixed.
1836/// vA needs to find a color.
1837/// None are available.
1838/// vA cannot evict vC: vC is a fixed virtual register now.
1839/// vA does as if vB was evicted => vA uses R2.
1840/// vB needs to find a color.
1841/// R3 is available.
1842/// Recoloring => vC = R1, vA = R2, vB = R3
1843///
1844/// \p Order defines the preferred allocation order for \p VirtReg.
1845/// \p NewRegs will contain any new virtual register that have been created
1846/// (split, spill) during the process and that must be assigned.
1847/// \p FixedRegisters contains all the virtual registers that cannot be
1848/// recolored.
1849///
1850/// \p RecolorStack tracks the original assignments of successfully recolored
1851/// registers.
1852///
1853/// \p Depth gives the current depth of the last chance recoloring.
1854/// \return a physical register that can be used for VirtReg or ~0u if none
1855/// exists.
1856unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
1857 AllocationOrder &Order,
1858 SmallVectorImpl<Register> &NewVRegs,
1859 SmallVirtRegSet &FixedRegisters,
1860 RecoloringStack &RecolorStack,
1861 unsigned Depth) {
1862 if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
1863 return ~0u;
1864
1865 LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
1866
1867 const ssize_t EntryStackSize = RecolorStack.size();
1868
1869 // Ranges must be Done.
1870 assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
1871 "Last chance recoloring should really be last chance");
1872 // Set the max depth to LastChanceRecoloringMaxDepth.
1873 // We may want to reconsider that if we end up with a too large search space
1874 // for target with hundreds of registers.
1875 // Indeed, in that case we may want to cut the search space earlier.
1877 LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");
1878 CutOffInfo |= CO_Depth;
1879 return ~0u;
1880 }
1881
1882 // Set of Live intervals that will need to be recolored.
1883 SmallLISet RecoloringCandidates;
1884
1885 // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
1886 // this recoloring "session".
1887 assert(!FixedRegisters.count(VirtReg.reg()));
1888 FixedRegisters.insert(VirtReg.reg());
1889 SmallVector<Register, 4> CurrentNewVRegs;
1890
1891 for (MCRegister PhysReg : Order) {
1892 assert(PhysReg.isValid());
1893 LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
1894 << printReg(PhysReg, TRI) << '\n');
1895 RecoloringCandidates.clear();
1896 CurrentNewVRegs.clear();
1897
1898 // It is only possible to recolor virtual register interference.
1899 if (Matrix->checkInterference(VirtReg, PhysReg) >
1901 LLVM_DEBUG(
1902 dbgs() << "Some interferences are not with virtual registers.\n");
1903
1904 continue;
1905 }
1906
1907 // Early give up on this PhysReg if it is obvious we cannot recolor all
1908 // the interferences.
1909 if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
1910 FixedRegisters)) {
1911 LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
1912 continue;
1913 }
1914
1915 // RecoloringCandidates contains all the virtual registers that interfere
1916 // with VirtReg on PhysReg (or one of its aliases). Enqueue them for
1917 // recoloring and perform the actual recoloring.
1918 PQueue RecoloringQueue;
1919 for (const LiveInterval *RC : RecoloringCandidates) {
1920 Register ItVirtReg = RC->reg();
1921 enqueue(RecoloringQueue, RC);
1922 assert(VRM->hasPhys(ItVirtReg) &&
1923 "Interferences are supposed to be with allocated variables");
1924
1925 // Record the current allocation.
1926 RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg)));
1927
1928 // unset the related struct.
1929 Matrix->unassign(*RC);
1930 }
1931
1932 // Do as if VirtReg was assigned to PhysReg so that the underlying
1933 // recoloring has the right information about the interferes and
1934 // available colors.
1935 Matrix->assign(VirtReg, PhysReg);
1936
1937 // Save the current recoloring state.
1938 // If we cannot recolor all the interferences, we will have to start again
1939 // at this point for the next physical register.
1940 SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
1941 if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
1942 FixedRegisters, RecolorStack, Depth)) {
1943 // Push the queued vregs into the main queue.
1944 for (Register NewVReg : CurrentNewVRegs)
1945 NewVRegs.push_back(NewVReg);
1946 // Do not mess up with the global assignment process.
1947 // I.e., VirtReg must be unassigned.
1948 Matrix->unassign(VirtReg);
1949 return PhysReg;
1950 }
1951
1952 LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
1953 << printReg(PhysReg, TRI) << '\n');
1954
1955 // The recoloring attempt failed, undo the changes.
1956 FixedRegisters = SaveFixedRegisters;
1957 Matrix->unassign(VirtReg);
1958
1959 // For a newly created vreg which is also in RecoloringCandidates,
1960 // don't add it to NewVRegs because its physical register will be restored
1961 // below. Other vregs in CurrentNewVRegs are created by calling
1962 // selectOrSplit and should be added into NewVRegs.
1963 for (Register R : CurrentNewVRegs) {
1964 if (RecoloringCandidates.count(&LIS->getInterval(R)))
1965 continue;
1966 NewVRegs.push_back(R);
1967 }
1968
1969 // Roll back our unsuccessful recoloring. Also roll back any successful
1970 // recolorings in any recursive recoloring attempts, since it's possible
1971 // they would have introduced conflicts with assignments we will be
1972 // restoring further up the stack. Perform all unassignments prior to
1973 // reassigning, since sub-recolorings may have conflicted with the registers
1974 // we are going to restore to their original assignments.
1975 for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) {
1976 const LiveInterval *LI;
1977 MCRegister PhysReg;
1978 std::tie(LI, PhysReg) = RecolorStack[I];
1979
1980 if (VRM->hasPhys(LI->reg()))
1981 Matrix->unassign(*LI);
1982 }
1983
1984 for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) {
1985 const LiveInterval *LI;
1986 MCRegister PhysReg;
1987 std::tie(LI, PhysReg) = RecolorStack[I];
1988 if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg()))
1989 Matrix->assign(*LI, PhysReg);
1990 }
1991
1992 // Pop the stack of recoloring attempts.
1993 RecolorStack.resize(EntryStackSize);
1994 }
1995
1996 // Last chance recoloring did not worked either, give up.
1997 return ~0u;
1998}
1999
2000/// tryRecoloringCandidates - Try to assign a new color to every register
2001/// in \RecoloringQueue.
2002/// \p NewRegs will contain any new virtual register created during the
2003/// recoloring process.
2004/// \p FixedRegisters[in/out] contains all the registers that have been
2005/// recolored.
2006/// \return true if all virtual registers in RecoloringQueue were successfully
2007/// recolored, false otherwise.
2008bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
2009 SmallVectorImpl<Register> &NewVRegs,
2010 SmallVirtRegSet &FixedRegisters,
2011 RecoloringStack &RecolorStack,
2012 unsigned Depth) {
2013 while (!RecoloringQueue.empty()) {
2014 const LiveInterval *LI = dequeue(RecoloringQueue);
2015 LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
2016 MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
2017 RecolorStack, Depth + 1);
2018 // When splitting happens, the live-range may actually be empty.
2019 // In that case, this is okay to continue the recoloring even
2020 // if we did not find an alternative color for it. Indeed,
2021 // there will not be anything to color for LI in the end.
2022 if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
2023 return false;
2024
2025 if (!PhysReg) {
2026 assert(LI->empty() && "Only empty live-range do not require a register");
2027 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2028 << " succeeded. Empty LI.\n");
2029 continue;
2030 }
2031 LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2032 << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
2033
2034 Matrix->assign(*LI, PhysReg);
2035 FixedRegisters.insert(LI->reg());
2036 }
2037 return true;
2038}
2039
2040//===----------------------------------------------------------------------===//
2041// Main Entry Point
2042//===----------------------------------------------------------------------===//
2043
2045 SmallVectorImpl<Register> &NewVRegs) {
2046 CutOffInfo = CO_None;
2047 LLVMContext &Ctx = MF->getFunction().getContext();
2048 SmallVirtRegSet FixedRegisters;
2049 RecoloringStack RecolorStack;
2050 MCRegister Reg =
2051 selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
2052 if (Reg == ~0U && (CutOffInfo != CO_None)) {
2053 uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
2054 if (CutOffEncountered == CO_Depth)
2055 Ctx.emitError("register allocation failed: maximum depth for recoloring "
2056 "reached. Use -fexhaustive-register-search to skip "
2057 "cutoffs");
2058 else if (CutOffEncountered == CO_Interf)
2059 Ctx.emitError("register allocation failed: maximum interference for "
2060 "recoloring reached. Use -fexhaustive-register-search "
2061 "to skip cutoffs");
2062 else if (CutOffEncountered == (CO_Depth | CO_Interf))
2063 Ctx.emitError("register allocation failed: maximum interference and "
2064 "depth for recoloring reached. Use "
2065 "-fexhaustive-register-search to skip cutoffs");
2066 }
2067 return Reg;
2068}
2069
2070/// Using a CSR for the first time has a cost because it causes push|pop
2071/// to be added to prologue|epilogue. Splitting a cold section of the live
2072/// range can have lower cost than using the CSR for the first time;
2073/// Spilling a live range in the cold path can have lower cost than using
2074/// the CSR for the first time. Returns the physical register if we decide
2075/// to use the CSR; otherwise return 0.
2076MCRegister RAGreedy::tryAssignCSRFirstTime(
2077 const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg,
2078 uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) {
2079 if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
2080 // We choose spill over using the CSR for the first time if the spill cost
2081 // is lower than CSRCost.
2082 SA->analyze(&VirtReg);
2083 if (calcSpillCost() >= CSRCost)
2084 return PhysReg;
2085
2086 // We are going to spill, set CostPerUseLimit to 1 to make sure that
2087 // we will not use a callee-saved register in tryEvict.
2088 CostPerUseLimit = 1;
2089 return 0;
2090 }
2091 if (ExtraInfo->getStage(VirtReg) < RS_Split) {
2092 // We choose pre-splitting over using the CSR for the first time if
2093 // the cost of splitting is lower than CSRCost.
2094 SA->analyze(&VirtReg);
2095 unsigned NumCands = 0;
2096 BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
2097 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
2098 NumCands, true /*IgnoreCSR*/);
2099 if (BestCand == NoCand)
2100 // Use the CSR if we can't find a region split below CSRCost.
2101 return PhysReg;
2102
2103 // Perform the actual pre-splitting.
2104 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
2105 return 0;
2106 }
2107 return PhysReg;
2108}
2109
2111 // Do not keep invalid information around.
2112 SetOfBrokenHints.remove(&LI);
2113}
2114
2115void RAGreedy::initializeCSRCost() {
2116 // We use the larger one out of the command-line option and the value report
2117 // by TRI.
2118 CSRCost = BlockFrequency(
2119 std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
2120 if (!CSRCost.getFrequency())
2121 return;
2122
2123 // Raw cost is relative to Entry == 2^14; scale it appropriately.
2124 uint64_t ActualEntry = MBFI->getEntryFreq();
2125 if (!ActualEntry) {
2126 CSRCost = 0;
2127 return;
2128 }
2129 uint64_t FixedEntry = 1 << 14;
2130 if (ActualEntry < FixedEntry)
2131 CSRCost *= BranchProbability(ActualEntry, FixedEntry);
2132 else if (ActualEntry <= UINT32_MAX)
2133 // Invert the fraction and divide.
2134 CSRCost /= BranchProbability(FixedEntry, ActualEntry);
2135 else
2136 // Can't use BranchProbability in general, since it takes 32-bit numbers.
2137 CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
2138}
2139
2140/// Collect the hint info for \p Reg.
2141/// The results are stored into \p Out.
2142/// \p Out is not cleared before being populated.
2143void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
2144 for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
2145 if (!Instr.isFullCopy())
2146 continue;
2147 // Look for the other end of the copy.
2148 Register OtherReg = Instr.getOperand(0).getReg();
2149 if (OtherReg == Reg) {
2150 OtherReg = Instr.getOperand(1).getReg();
2151 if (OtherReg == Reg)
2152 continue;
2153 }
2154 // Get the current assignment.
2155 MCRegister OtherPhysReg =
2156 OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
2157 // Push the collected information.
2158 Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
2159 OtherPhysReg));
2160 }
2161}
2162
2163/// Using the given \p List, compute the cost of the broken hints if
2164/// \p PhysReg was used.
2165/// \return The cost of \p List for \p PhysReg.
2166BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
2167 MCRegister PhysReg) {
2168 BlockFrequency Cost = 0;
2169 for (const HintInfo &Info : List) {
2170 if (Info.PhysReg != PhysReg)
2171 Cost += Info.Freq;
2172 }
2173 return Cost;
2174}
2175
2176/// Using the register assigned to \p VirtReg, try to recolor
2177/// all the live ranges that are copy-related with \p VirtReg.
2178/// The recoloring is then propagated to all the live-ranges that have
2179/// been recolored and so on, until no more copies can be coalesced or
2180/// it is not profitable.
2181/// For a given live range, profitability is determined by the sum of the
2182/// frequencies of the non-identity copies it would introduce with the old
2183/// and new register.
2184void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
2185 // We have a broken hint, check if it is possible to fix it by
2186 // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
2187 // some register and PhysReg may be available for the other live-ranges.
2188 SmallSet<Register, 4> Visited;
2189 SmallVector<unsigned, 2> RecoloringCandidates;
2190 HintsInfo Info;
2191 Register Reg = VirtReg.reg();
2192 MCRegister PhysReg = VRM->getPhys(Reg);
2193 // Start the recoloring algorithm from the input live-interval, then
2194 // it will propagate to the ones that are copy-related with it.
2195 Visited.insert(Reg);
2196 RecoloringCandidates.push_back(Reg);
2197
2198 LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
2199 << '(' << printReg(PhysReg, TRI) << ")\n");
2200
2201 do {
2202 Reg = RecoloringCandidates.pop_back_val();
2203
2204 // We cannot recolor physical register.
2205 if (Reg.isPhysical())
2206 continue;
2207
2208 // This may be a skipped class
2209 if (!VRM->hasPhys(Reg)) {
2211 "We have an unallocated variable which should have been handled");
2212 continue;
2213 }
2214
2215 // Get the live interval mapped with this virtual register to be able
2216 // to check for the interference with the new color.
2217 LiveInterval &LI = LIS->getInterval(Reg);
2218 MCRegister CurrPhys = VRM->getPhys(Reg);
2219 // Check that the new color matches the register class constraints and
2220 // that it is free for this live range.
2221 if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
2222 Matrix->checkInterference(LI, PhysReg)))
2223 continue;
2224
2225 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
2226 << ") is recolorable.\n");
2227
2228 // Gather the hint info.
2229 Info.clear();
2230 collectHintInfo(Reg, Info);
2231 // Check if recoloring the live-range will increase the cost of the
2232 // non-identity copies.
2233 if (CurrPhys != PhysReg) {
2234 LLVM_DEBUG(dbgs() << "Checking profitability:\n");
2235 BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
2236 BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
2237 LLVM_DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
2238 << "\nNew Cost: " << NewCopiesCost.getFrequency()
2239 << '\n');
2240 if (OldCopiesCost < NewCopiesCost) {
2241 LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
2242 continue;
2243 }
2244 // At this point, the cost is either cheaper or equal. If it is
2245 // equal, we consider this is profitable because it may expose
2246 // more recoloring opportunities.
2247 LLVM_DEBUG(dbgs() << "=> Profitable.\n");
2248 // Recolor the live-range.
2249 Matrix->unassign(LI);
2250 Matrix->assign(LI, PhysReg);
2251 }
2252 // Push all copy-related live-ranges to keep reconciling the broken
2253 // hints.
2254 for (const HintInfo &HI : Info) {
2255 if (Visited.insert(HI.Reg).second)
2256 RecoloringCandidates.push_back(HI.Reg);
2257 }
2258 } while (!RecoloringCandidates.empty());
2259}
2260
2261/// Try to recolor broken hints.
2262/// Broken hints may be repaired by recoloring when an evicted variable
2263/// freed up a register for a larger live-range.
2264/// Consider the following example:
2265/// BB1:
2266/// a =
2267/// b =
2268/// BB2:
2269/// ...
2270/// = b
2271/// = a
2272/// Let us assume b gets split:
2273/// BB1:
2274/// a =
2275/// b =
2276/// BB2:
2277/// c = b
2278/// ...
2279/// d = c
2280/// = d
2281/// = a
2282/// Because of how the allocation work, b, c, and d may be assigned different
2283/// colors. Now, if a gets evicted later:
2284/// BB1:
2285/// a =
2286/// st a, SpillSlot
2287/// b =
2288/// BB2:
2289/// c = b
2290/// ...
2291/// d = c
2292/// = d
2293/// e = ld SpillSlot
2294/// = e
2295/// This is likely that we can assign the same register for b, c, and d,
2296/// getting rid of 2 copies.
2297void RAGreedy::tryHintsRecoloring() {
2298 for (const LiveInterval *LI : SetOfBrokenHints) {
2299 assert(LI->reg().isVirtual() &&
2300 "Recoloring is possible only for virtual registers");
2301 // Some dead defs may be around (e.g., because of debug uses).
2302 // Ignore those.
2303 if (!VRM->hasPhys(LI->reg()))
2304 continue;
2305 tryHintRecoloring(*LI);
2306 }
2307}
2308
2309MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
2310 SmallVectorImpl<Register> &NewVRegs,
2311 SmallVirtRegSet &FixedRegisters,
2312 RecoloringStack &RecolorStack,
2313 unsigned Depth) {
2314 uint8_t CostPerUseLimit = uint8_t(~0u);
2315 // First try assigning a free register.
2316 auto Order =
2318 if (MCRegister PhysReg =
2319 tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
2320 // When NewVRegs is not empty, we may have made decisions such as evicting
2321 // a virtual register, go with the earlier decisions and use the physical
2322 // register.
2323 if (CSRCost.getFrequency() &&
2324 EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
2325 MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
2326 CostPerUseLimit, NewVRegs);
2327 if (CSRReg || !NewVRegs.empty())
2328 // Return now if we decide to use a CSR or create new vregs due to
2329 // pre-splitting.
2330 return CSRReg;
2331 } else
2332 return PhysReg;
2333 }
2334
2335 LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
2336 LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
2337 << ExtraInfo->getCascade(VirtReg.reg()) << '\n');
2338
2339 // Try to evict a less worthy live range, but only for ranges from the primary
2340 // queue. The RS_Split ranges already failed to do this, and they should not
2341 // get a second chance until they have been split.
2342 if (Stage != RS_Split)
2343 if (Register PhysReg =
2344 tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
2345 FixedRegisters)) {
2346 Register Hint = MRI->getSimpleHint(VirtReg.reg());
2347 // If VirtReg has a hint and that hint is broken record this
2348 // virtual register as a recoloring candidate for broken hint.
2349 // Indeed, since we evicted a variable in its neighborhood it is
2350 // likely we can at least partially recolor some of the
2351 // copy-related live-ranges.
2352 if (Hint && Hint != PhysReg)
2353 SetOfBrokenHints.insert(&VirtReg);
2354 return PhysReg;
2355 }
2356
2357 assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
2358
2359 // The first time we see a live range, don't try to split or spill.
2360 // Wait until the second time, when all smaller ranges have been allocated.
2361 // This gives a better picture of the interference to split around.
2362 if (Stage < RS_Split) {
2363 ExtraInfo->setStage(VirtReg, RS_Split);
2364 LLVM_DEBUG(dbgs() << "wait for second round\n");
2365 NewVRegs.push_back(VirtReg.reg());
2366 return 0;
2367 }
2368
2369 if (Stage < RS_Spill) {
2370 // Try splitting VirtReg or interferences.
2371 unsigned NewVRegSizeBefore = NewVRegs.size();
2372 Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
2373 if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
2374 return PhysReg;
2375 }
2376
2377 // If we couldn't allocate a register from spilling, there is probably some
2378 // invalid inline assembly. The base class will report it.
2379 if (Stage >= RS_Done || !VirtReg.isSpillable()) {
2380 return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
2381 RecolorStack, Depth);
2382 }
2383
2384 // Finally spill VirtReg itself.
2386 TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) &&
2387 ExtraInfo->getStage(VirtReg) < RS_Memory) {
2388 // TODO: This is experimental and in particular, we do not model
2389 // the live range splitting done by spilling correctly.
2390 // We would need a deep integration with the spiller to do the
2391 // right thing here. Anyway, that is still good for early testing.
2392 ExtraInfo->setStage(VirtReg, RS_Memory);
2393 LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
2394 NewVRegs.push_back(VirtReg.reg());
2395 } else {
2396 NamedRegionTimer T("spill", "Spiller", TimerGroupName,
2398 LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
2399 spiller().spill(LRE);
2400 ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
2401
2402 // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
2403 // the new regs are kept in LDV (still mapping to the old register), until
2404 // we rewrite spilled locations in LDV at a later stage.
2405 DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS);
2406
2407 if (VerifyEnabled)
2408 MF->verify(this, "After spilling");
2409 }
2410
2411 // The live virtual register requesting allocation was spilled, so tell
2412 // the caller not to allocate anything during this round.
2413 return 0;
2414}
2415
2416void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
2417 using namespace ore;
2418 if (Spills) {
2419 R << NV("NumSpills", Spills) << " spills ";
2420 R << NV("TotalSpillsCost", SpillsCost) << " total spills cost ";
2421 }
2422 if (FoldedSpills) {
2423 R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
2424 R << NV("TotalFoldedSpillsCost", FoldedSpillsCost)
2425 << " total folded spills cost ";
2426 }
2427 if (Reloads) {
2428 R << NV("NumReloads", Reloads) << " reloads ";
2429 R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost ";
2430 }
2431 if (FoldedReloads) {
2432 R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
2433 R << NV("TotalFoldedReloadsCost", FoldedReloadsCost)
2434 << " total folded reloads cost ";
2435 }
2436 if (ZeroCostFoldedReloads)
2437 R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
2438 << " zero cost folded reloads ";
2439 if (Copies) {
2440 R << NV("NumVRCopies", Copies) << " virtual registers copies ";
2441 R << NV("TotalCopiesCost", CopiesCost) << " total copies cost ";
2442 }
2443}
2444
2445RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
2446 RAGreedyStats Stats;
2447 const MachineFrameInfo &MFI = MF->getFrameInfo();
2448 int FI;
2449
2450 auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
2451 return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>(
2452 A->getPseudoValue())->getFrameIndex());
2453 };
2454 auto isPatchpointInstr = [](const MachineInstr &MI) {
2455 return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
2456 MI.getOpcode() == TargetOpcode::STACKMAP ||
2457 MI.getOpcode() == TargetOpcode::STATEPOINT;
2458 };
2459 for (MachineInstr &MI : MBB) {
2460 if (MI.isCopy()) {
2461 const MachineOperand &Dest = MI.getOperand(0);
2462 const MachineOperand &Src = MI.getOperand(1);
2463 Register SrcReg = Src.getReg();
2464 Register DestReg = Dest.getReg();
2465 // Only count `COPY`s with a virtual register as source or destination.
2466 if (SrcReg.isVirtual() || DestReg.isVirtual()) {
2467 if (SrcReg.isVirtual()) {
2468 SrcReg = VRM->getPhys(SrcReg);
2469 if (Src.getSubReg())
2470 SrcReg = TRI->getSubReg(SrcReg, Src.getSubReg());
2471 }
2472 if (DestReg.isVirtual()) {
2473 DestReg = VRM->getPhys(DestReg);
2474 if (Dest.getSubReg())
2475 DestReg = TRI->getSubReg(DestReg, Dest.getSubReg());
2476 }
2477 if (SrcReg != DestReg)
2478 ++Stats.Copies;
2479 }
2480 continue;
2481 }
2482
2484 if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2485 ++Stats.Reloads;
2486 continue;
2487 }
2488 if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2489 ++Stats.Spills;
2490 continue;
2491 }
2492 if (TII->hasLoadFromStackSlot(MI, Accesses) &&
2493 llvm::any_of(Accesses, isSpillSlotAccess)) {
2494 if (!isPatchpointInstr(MI)) {
2495 Stats.FoldedReloads += Accesses.size();
2496 continue;
2497 }
2498 // For statepoint there may be folded and zero cost folded stack reloads.
2499 std::pair<unsigned, unsigned> NonZeroCostRange =
2501 SmallSet<unsigned, 16> FoldedReloads;
2502 SmallSet<unsigned, 16> ZeroCostFoldedReloads;
2503 for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
2504 MachineOperand &MO = MI.getOperand(Idx);
2505 if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
2506 continue;
2507 if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
2508 FoldedReloads.insert(MO.getIndex());
2509 else
2510 ZeroCostFoldedReloads.insert(MO.getIndex());
2511 }
2512 // If stack slot is used in folded reload it is not zero cost then.
2513 for (unsigned Slot : FoldedReloads)
2514 ZeroCostFoldedReloads.erase(Slot);
2515 Stats.FoldedReloads += FoldedReloads.size();
2516 Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
2517 continue;
2518 }
2519 Accesses.clear();
2520 if (TII->hasStoreToStackSlot(MI, Accesses) &&
2521 llvm::any_of(Accesses, isSpillSlotAccess)) {
2522 Stats.FoldedSpills += Accesses.size();
2523 }
2524 }
2525 // Set cost of collected statistic by multiplication to relative frequency of
2526 // this basic block.
2527 float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB);
2528 Stats.ReloadsCost = RelFreq * Stats.Reloads;
2529 Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads;
2530 Stats.SpillsCost = RelFreq * Stats.Spills;
2531 Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills;
2532 Stats.CopiesCost = RelFreq * Stats.Copies;
2533 return Stats;
2534}
2535
2536RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) {
2537 RAGreedyStats Stats;
2538
2539 // Sum up the spill and reloads in subloops.
2540 for (MachineLoop *SubLoop : *L)
2541 Stats.add(reportStats(SubLoop));
2542
2543 for (MachineBasicBlock *MBB : L->getBlocks())
2544 // Handle blocks that were not included in subloops.
2545 if (Loops->getLoopFor(MBB) == L)
2546 Stats.add(computeStats(*MBB));
2547
2548 if (!Stats.isEmpty()) {
2549 using namespace ore;
2550
2551 ORE->emit([&]() {
2552 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies",
2553 L->getStartLoc(), L->getHeader());
2554 Stats.report(R);
2555 R << "generated in loop";
2556 return R;
2557 });
2558 }
2559 return Stats;
2560}
2561
2562void RAGreedy::reportStats() {
2563 if (!ORE->allowExtraAnalysis(DEBUG_TYPE))
2564 return;
2565 RAGreedyStats Stats;
2566 for (MachineLoop *L : *Loops)
2567 Stats.add(reportStats(L));
2568 // Process non-loop blocks.
2569 for (MachineBasicBlock &MBB : *MF)
2570 if (!Loops->getLoopFor(&MBB))
2571 Stats.add(computeStats(MBB));
2572 if (!Stats.isEmpty()) {
2573 using namespace ore;
2574
2575 ORE->emit([&]() {
2576 DebugLoc Loc;
2577 if (auto *SP = MF->getFunction().getSubprogram())
2578 Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
2579 MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc,
2580 &MF->front());
2581 Stats.report(R);
2582 R << "generated in function";
2583 return R;
2584 });
2585 }
2586}
2587
2588bool RAGreedy::hasVirtRegAlloc() {
2589 for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
2591 if (MRI->reg_nodbg_empty(Reg))
2592 continue;
2593 const TargetRegisterClass *RC = MRI->getRegClass(Reg);
2594 if (!RC)
2595 continue;
2596 if (ShouldAllocateClass(*TRI, *RC))
2597 return true;
2598 }
2599
2600 return false;
2601}
2602
2604 LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
2605 << "********** Function: " << mf.getName() << '\n');
2606
2607 MF = &mf;
2608 TII = MF->getSubtarget().getInstrInfo();
2609
2610 if (VerifyEnabled)
2611 MF->verify(this, "Before greedy register allocator");
2612
2613 RegAllocBase::init(getAnalysis<VirtRegMap>(),
2614 getAnalysis<LiveIntervals>(),
2615 getAnalysis<LiveRegMatrix>());
2616
2617 // Early return if there is no virtual register to be allocated to a
2618 // physical register.
2619 if (!hasVirtRegAlloc())
2620 return false;
2621
2622 Indexes = &getAnalysis<SlotIndexes>();
2623 MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
2624 DomTree = &getAnalysis<MachineDominatorTree>();
2625 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
2626 Loops = &getAnalysis<MachineLoopInfo>();
2627 Bundles = &getAnalysis<EdgeBundles>();
2628 SpillPlacer = &getAnalysis<SpillPlacement>();
2629 DebugVars = &getAnalysis<LiveDebugVariables>();
2630
2631 initializeCSRCost();
2632
2633 RegCosts = TRI->getRegisterCosts(*MF);
2634 RegClassPriorityTrumpsGlobalness =
2635 GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
2638
2639 ReverseLocalAssignment = GreedyReverseLocalAssignment.getNumOccurrences()
2642
2643 ExtraInfo.emplace();
2644 EvictAdvisor =
2645 getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
2646 PriorityAdvisor =
2647 getAnalysis<RegAllocPriorityAdvisorAnalysis>().getAdvisor(*MF, *this);
2648
2649 VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
2650 SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI));
2651
2652 VRAI->calculateSpillWeightsAndHints();
2653
2654 LLVM_DEBUG(LIS->dump());
2655
2656 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
2657 SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
2658
2659 IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
2660 GlobalCand.resize(32); // This will grow as needed.
2661 SetOfBrokenHints.clear();
2662
2664 tryHintsRecoloring();
2665
2666 if (VerifyEnabled)
2667 MF->verify(this, "Before post optimization");
2669 reportStats();
2670
2671 releaseMemory();
2672 return true;
2673}
unsigned SubReg
MachineBasicBlock & MBB
SmallPtrSet< MachineInstr *, 2 > Uses
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:678
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
#define DEBUG_TYPE
const HexagonInstrInfo * TII
Hexagon Hardware Loops
IRTranslator LLVM IR MI
This file implements an indexed map.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
block placement Basic Block Placement Stats
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
static cl::opt< bool > GreedyRegClassPriorityTrumpsGlobalness("greedy-regclass-priority-trumps-globalness", cl::desc("Change the greedy register allocator's live range priority " "calculation to make the AllocationPriority of the register class " "more important then whether the range is global"), cl::Hidden)
static cl::opt< bool > ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, cl::desc("Exhaustive Search for registers bypassing the depth " "and interference cutoffs of last chance recoloring"), cl::Hidden)
const float Hysteresis
static cl::opt< unsigned > LastChanceRecoloringMaxInterference("lcr-max-interf", cl::Hidden, cl::desc("Last chance recoloring maximum number of considered" " interference at a time"), cl::init(8))
static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg)
Return true if reg has any tied def operand.
static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI, const VirtRegMap &VRM, MCRegister PhysReg, const LiveInterval &Intf)
Return true if the existing assignment of Intf overlaps, but is not the same, as PhysReg.
static cl::opt< unsigned > CSRFirstTimeCost("regalloc-csr-first-time-cost", cl::desc("Cost for first time use of callee-saved register."), cl::init(0), cl::Hidden)
static cl::opt< unsigned > LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, cl::desc("Last chance recoloring max depth"), cl::init(5))
static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator)
static cl::opt< unsigned long > GrowRegionComplexityBudget("grow-region-complexity-budget", cl::desc("growRegion() does not scale with the number of BB edges, so " "limit its budget and bail out once we reach the limit."), cl::init(10000), cl::Hidden)
Greedy Register Allocator
static cl::opt< SplitEditor::ComplementSpillMode > SplitSpillMode("split-spill-mode", cl::Hidden, cl::desc("Spill mode for splitting live ranges"), cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")), cl::init(SplitEditor::SM_Speed))
static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const MachineInstr &MI, Register Reg)
static bool readsLaneSubset(const MachineRegisterInfo &MRI, const MachineInstr *MI, const LiveInterval &VirtReg, const TargetRegisterInfo *TRI, SlotIndex Use)
Return true if MI at \P Use reads a subset of the lanes live in VirtReg.
static unsigned getNumAllocatableRegsForConstraints(const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const RegisterClassInfo &RCI)
Get the number of allocatable registers that match the constraints of Reg on MI and that are also in ...
static cl::opt< bool > EnableDeferredSpilling("enable-deferred-spilling", cl::Hidden, cl::desc("Instead of spilling a variable right away, defer the actual " "code insertion to the end of the allocation. That way the " "allocator might still find a suitable coloring for this " "variable because of other evicted variables."), cl::init(false))
static cl::opt< bool > GreedyReverseLocalAssignment("greedy-reverse-local-assignment", cl::desc("Reverse allocation order of local live ranges, such that " "shorter local live ranges will tend to be allocated first"), cl::Hidden)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
SI optimize exec mask operations pre RA
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
bool isHint(Register Reg) const
Return true if Reg is a preferred physical register.
ArrayRef< MCPhysReg > getOrder() const
Get the allocation order without reordered hints.
static AllocationOrder create(unsigned VirtReg, const VirtRegMap &VRM, const RegisterClassInfo &RegClassInfo, const LiveRegMatrix *Matrix)
Create a new AllocationOrder for VirtReg.
Iterator end() const
Iterator begin() const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:152
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
iterator begin() const
Definition: ArrayRef.h:151
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:193
bool test(unsigned Idx) const
Definition: BitVector.h:454
BitVector & reset()
Definition: BitVector.h:385
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
static uint64_t getMaxFrequency()
Returns the maximum possible frequency, the saturation value.
A debug info location.
Definition: DebugLoc.h:33
ArrayRef< unsigned > getBlocks(unsigned Bundle) const
getBlocks - Return an array of blocks that are connected to Bundle.
Definition: EdgeBundles.h:47
unsigned getBundle(unsigned N, bool Out) const
getBundle - Return the ingoing (Out = false) or outgoing (Out = true) bundle number for basic block N
Definition: EdgeBundles.h:41
unsigned getNumBundles() const
getNumBundles - Return the total number of bundles in the CFG.
Definition: EdgeBundles.h:44
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:319
Cursor - The primary query interface for the block interference cache.
SlotIndex first()
first - Return the starting index of the first interfering range in the current block.
SlotIndex last()
last - Return the ending index of the last interfering range in the current block.
bool hasInterference()
hasInterference - Return true if the current block has any interference.
void moveToBlock(unsigned MBBNum)
moveTo - Move cursor to basic block MBBNum.
void init(MachineFunction *mf, LiveIntervalUnion *liuarray, SlotIndexes *indexes, LiveIntervals *lis, const TargetRegisterInfo *tri)
init - Prepare cache for a new function.
unsigned getMaxCursors() const
getMaxCursors - Return the maximum number of concurrent cursors that can be supported.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
void splitRegister(Register OldReg, ArrayRef< Register > NewRegs, LiveIntervals &LIS)
splitRegister - Move any user variables in OldReg to the live ranges in NewRegs where they are live.
Query interferences between a single live virtual register and a live interval union.
const SmallVectorImpl< const LiveInterval * > & interferingVRegs(unsigned MaxInterferingRegs=std::numeric_limits< unsigned >::max())
SegmentIter find(SlotIndex x)
LiveSegments::iterator SegmentIter
A live range for subregisters.
Definition: LiveInterval.h:693
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:686
Register reg() const
Definition: LiveInterval.h:717
bool isSpillable() const
isSpillable - Can this interval be spilled?
Definition: LiveInterval.h:819
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:803
unsigned getSize() const
getSize - Returns the sum of sizes of all the LiveRange's.
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:775
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
ArrayRef< SlotIndex > getRegMaskSlotsInBlock(unsigned MBBNum) const
Returns a sorted array of slot indices of all instructions with register mask operands in the basic b...
LiveInterval & getInterval(Register Reg)
MachineBasicBlock * intervalIsInOneMBB(const LiveInterval &LI) const
If LI is confined to a single basic block, return a pointer to that block.
bool empty() const
unsigned size() const
Register get(unsigned idx) const
ArrayRef< Register > regs() const
iterator end() const
iterator begin() const
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
bool empty() const
Definition: LiveInterval.h:382
iterator end()
Definition: LiveInterval.h:216
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:392
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
bool checkRegMaskInterference(const LiveInterval &VirtReg, MCRegister PhysReg=MCRegister::NoRegister)
Check for regmask interference only.
void unassign(const LiveInterval &VirtReg)
Unassign VirtReg from its PhysReg.
LiveIntervalUnion::Query & query(const LiveRange &LR, MCRegister RegUnit)
Query a line of the assigned virtual register matrix directly.
bool isPhysRegUsed(MCRegister PhysReg) const
Returns true if the given PhysReg has any live intervals assigned.
@ IK_VirtReg
Virtual register interference.
Definition: LiveRegMatrix.h:90
void assign(const LiveInterval &VirtReg, MCRegister PhysReg)
Assign VirtReg to PhysReg.
InterferenceKind checkInterference(const LiveInterval &VirtReg, MCRegister PhysReg)
Check for interference before assigning VirtReg to PhysReg.
LiveIntervalUnion * getLiveUnions()
Directly access the live interval unions per regunit.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
static constexpr unsigned NoRegister
Definition: MCRegister.h:43
bool isValid() const
Definition: MCRegister.h:76
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1399
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator getFirstNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the first non-debug instruction in the basic block, or end().
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
raw_ostream & printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const
uint64_t getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
float getBlockFreqRelativeToEntryBlock(const MachineBasicBlock *MBB) const
Compute the frequency of the block, relative to the entry block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool isSpillSlotObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a spill slot.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineBasicBlock * getBlockNumbered(unsigned N) const
getBlockNumbered - MachineBasicBlocks are automatically numbered when they are inserted into the mach...
Function & getFunction()
Return the LLVM function that this machine code represents.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
Representation of each machine instruction.
Definition: MachineInstr.h:68
bool isImplicitDef() const
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
bool allowExtraAnalysis(StringRef PassName) const
Whether we allow for extra compile-time budget to perform more analysis to be more informative.
Diagnostic information for missed-optimization remarks.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register getSimpleHint(Register VReg) const
getSimpleHint - same as getRegAllocationHint except it will only return a target independent hint.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
bool reg_nodbg_empty(Register RegNo) const
reg_nodbg_empty - Return true if the only instructions using or defining Reg are Debug instructions.
iterator_range< def_iterator > def_operands(Register Reg) const
LaneBitmask getMaxLaneMaskForVReg(Register Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
iterator_range< reg_instr_nodbg_iterator > reg_nodbg_instructions(Register Reg) const
void dump() const
Definition: Pass.cpp:136
void LRE_DidCloneVirtReg(Register New, Register Old)
Spiller & spiller() override
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
MCRegister selectOrSplit(const LiveInterval &, SmallVectorImpl< Register > &) override
bool runOnMachineFunction(MachineFunction &mf) override
Perform register allocation.
const LiveInterval * dequeue() override
dequeue - Return the next unassigned register, or NULL.
void enqueueImpl(const LiveInterval *LI) override
enqueue - Add VirtReg to the priority queue of unassigned registers.
RAGreedy(const RegClassFilterFunc F=allocateAllRegClasses)
static char ID
void getAnalysisUsage(AnalysisUsage &AU) const override
RAGreedy analysis usage.
void aboutToRemoveInterval(const LiveInterval &) override
Method called when the allocator is about to remove a LiveInterval.
RegAllocBase provides the register allocation driver and interface that can be extended to add intere...
Definition: RegAllocBase.h:61
void enqueue(const LiveInterval *LI)
enqueue - Add VirtReg to the priority queue of unassigned registers.
void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat)
SmallPtrSet< MachineInstr *, 32 > DeadRemats
Inst which is a def of an original reg and whose defs are already all dead after remat is saved in De...
Definition: RegAllocBase.h:77
const TargetRegisterInfo * TRI
Definition: RegAllocBase.h:65
LiveIntervals * LIS
Definition: RegAllocBase.h:68
static const char TimerGroupName[]
Definition: RegAllocBase.h:115
static const char TimerGroupDescription[]
Definition: RegAllocBase.h:116
LiveRegMatrix * Matrix
Definition: RegAllocBase.h:69
virtual void postOptimization()
VirtRegMap * VRM
Definition: RegAllocBase.h:67
RegisterClassInfo RegClassInfo
Definition: RegAllocBase.h:70
MachineRegisterInfo * MRI
Definition: RegAllocBase.h:66
static bool VerifyEnabled
VerifyEnabled - True when -verify-regalloc is given.
Definition: RegAllocBase.h:123
const RegClassFilterFunc ShouldAllocateClass
Definition: RegAllocBase.h:71
ImmutableAnalysis abstraction for fetching the Eviction Advisor.
std::optional< unsigned > getOrderLimit(const LiveInterval &VirtReg, const AllocationOrder &Order, unsigned CostPerUseLimit) const
bool isUnusedCalleeSavedReg(MCRegister PhysReg) const
Returns true if the given PhysReg is a callee saved register and has not been used for allocation yet...
bool canAllocatePhysReg(unsigned CostPerUseLimit, MCRegister PhysReg) const
Register canReassign(const LiveInterval &VirtReg, Register PrevReg) const
unsigned getLastCostChange(const TargetRegisterClass *RC) const
Get the position of the last cost change in getOrder(RC).
bool isProperSubClass(const TargetRegisterClass *RC) const
isProperSubClass - Returns true if RC has a legal super-class with more allocatable registers.
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
uint8_t getMinCost(const TargetRegisterClass *RC) const
Get the minimum register cost in RC's allocation order.
MCRegister getLastCalleeSavedAlias(MCRegister PhysReg) const
getLastCalleeSavedAlias - Returns the last callee saved register that overlaps PhysReg,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
unsigned id() const
Definition: Register.h:111
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:120
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:82
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:198
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
Definition: SlotIndexes.h:204
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:152
@ InstrDist
The default distance between instructions as returned by distance().
Definition: SlotIndexes.h:133
SlotIndex getBoundaryIndex() const
Returns the boundary index for associated with this index.
Definition: SlotIndexes.h:253
SlotIndex getBaseIndex() const
Returns the base index for associated with this index.
Definition: SlotIndexes.h:246
int getApproxInstrDistance(SlotIndex other) const
Return the scaled distance from this index to the given one, where all slots on the same instruction ...
Definition: SlotIndexes.h:225
SlotIndexes pass.
Definition: SlotIndexes.h:319
SlotIndex getLastIndex()
Returns the base index of the last slot in this analysis.
Definition: SlotIndexes.h:379
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
Definition: SlotIndexes.h:471
SlotIndex getZeroIndex()
Returns the zero index for this analysis.
Definition: SlotIndexes.h:373
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction for the given index, or null if the given index has no instruction associated...
Definition: SlotIndexes.h:408
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
bool erase(const T &V)
Definition: SmallSet.h:205
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:177
size_type size() const
Definition: SmallSet.h:159
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:708
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
void resize(size_type N)
Definition: SmallVector.h:642
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
void addConstraints(ArrayRef< BlockConstraint > LiveBlocks)
addConstraints - Add constraints and biases.
bool finish()
finish - Compute the optimal spill code placement given the constraints.
void addPrefSpill(ArrayRef< unsigned > Blocks, bool Strong)
addPrefSpill - Add PrefSpill constraints to all blocks listed.
void prepare(BitVector &RegBundles)
prepare - Reset state and prepare for a new spill placement computation.
bool scanActiveBundles()
scanActiveBundles - Perform an initial scan of all bundles activated by addConstraints and addLinks,...
void addLinks(ArrayRef< unsigned > Links)
addLinks - Add transparent blocks with the given numbers.
void iterate()
iterate - Update the network iteratively until convergence, or new bundles are found.
@ MustSpill
A register is impossible, variable must be spilled.
@ DontCare
Block doesn't care / variable not live.
@ PrefReg
Block entry/exit prefers a register.
@ PrefSpill
Block entry/exit prefers a stack slot.
ArrayRef< unsigned > getRecentPositive()
getRecentPositive - Return an array of bundles that became positive during the previous call to scanA...
BlockFrequency getBlockFrequency(unsigned Number) const
getBlockFrequency - Return the estimated block execution frequency per function invocation.
virtual void spill(LiveRangeEdit &LRE)=0
spill - Spill the LRE.getParent() live interval.
SplitAnalysis - Analyze a LiveInterval, looking for live range splitting opportunities.
Definition: SplitKit.h:96
SplitEditor - Edit machine code and LiveIntervals for live range splitting.
Definition: SplitKit.h:257
@ SM_Partition
SM_Partition(Default) - Try to create the complement interval so it doesn't overlap any other interva...
Definition: SplitKit.h:280
@ SM_Speed
SM_Speed - Overlap intervals to minimize the expected execution frequency of the inserted copies.
Definition: SplitKit.h:292
@ SM_Size
SM_Size - Overlap intervals to minimize the number of inserted COPY instructions.
Definition: SplitKit.h:287
TargetInstrInfo - Interface to description of machine instruction set.
virtual unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const
If the specified machine instruction is a direct store to a stack slot, return the virtual or physica...
virtual std::pair< unsigned, unsigned > getPatchpointUnfoldableRange(const MachineInstr &MI) const
For a patchpoint, stackmap, or statepoint intrinsic, return the range of operands which can't be fold...
virtual bool hasStoreToStackSlot(const MachineInstr &MI, SmallVectorImpl< const MachineMemOperand * > &Accesses) const
If the specified machine instruction has a store to a stack slot, return true along with the FrameInd...
virtual bool hasLoadFromStackSlot(const MachineInstr &MI, SmallVectorImpl< const MachineMemOperand * > &Accesses) const
If the specified machine instruction has a load from a stack slot, return true along with the FrameIn...
virtual unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const
If the specified machine instruction is a direct load from a stack slot, return the virtual or physic...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
const uint8_t AllocationPriority
Classes with a higher priority value are assigned first by register allocators using a greedy heurist...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool shouldUseDeferredSpillingForVirtReg(const MachineFunction &MF, const LiveInterval &VirtReg) const
Deferred spilling delays the spill insertion of a virtual register after every other allocation.
virtual bool shouldRegionSplitForVirtReg(const MachineFunction &MF, const LiveInterval &VirtReg) const
Region split has a high compile time cost especially for large live range.
virtual bool shouldUseLastChanceRecoloringForVirtReg(const MachineFunction &MF, const LiveInterval &VirtReg) const
Last chance recoloring has a high compile time cost especially for targets with a lot of registers.
virtual unsigned getCSRFirstUseCost() const
Allow the target to override the cost of using a callee-saved register for the first time.
LaneBitmask getCoveringLanes() const
The lane masks returned by getSubRegIndexLaneMask() above can only be used to determine if sub-regist...
ArrayRef< uint8_t > getRegisterCosts(const MachineFunction &MF) const
Get a list of cost values for all registers that correspond to the index returned by RegisterCostTabl...
virtual bool reverseLocalAssignment() const
Allow the target to reverse allocation order of local live ranges.
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
bool regsOverlap(Register RegA, Register RegB) const
Returns true if the two registers are equal or alias each other.
const char * getRegClassName(const TargetRegisterClass *Class) const
Returns the name of the register class.
virtual bool regClassPriorityTrumpsGlobalness(const MachineFunction &MF) const
When prioritizing live ranges in register allocation, if this hook returns true then the AllocationPr...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
bool hasKnownPreference(Register VirtReg) const
returns true if VirtReg has a known preferred register.
Definition: VirtRegMap.cpp:118
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
Definition: VirtRegMap.h:105
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
Definition: VirtRegMap.h:99
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:703
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createGreedyRegisterAllocator()
Greedy register allocation pass - This pass implements a global register allocator for optimized buil...
bool TimePassesIsEnabled
If the user specifies the -time-passes argument on an LLVM tool command line then the value of this b...
char & RAGreedyID
Greedy register allocator.
Spiller * createInlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap &VRM, VirtRegAuxInfo &VRAI)
Create and return a spiller that will insert spill code directly instead of deferring though VirtRegM...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1789
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:495
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
@ RS_Split2
Attempt more aggressive live range splitting that is guaranteed to make progress.
@ RS_Spill
Live range will be spilled. No more splitting will be attempted.
@ RS_Split
Attempt live range splitting if assignment is impossible.
@ RS_New
Newly created live range that has never been queued.
@ RS_Done
There is nothing more we can do to this live range.
@ RS_Assign
Only attempt assignment and eviction. Then requeue as RS_Split.
@ RS_Memory
Live range is in memory.
const float huge_valf
Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
Definition: MathExtras.cpp:28
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2001
static float normalizeSpillWeight(float UseDefFreq, unsigned Size, unsigned NumInstr)
Normalize the spill weight of a live interval.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
std::function< bool(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC)> RegClassFilterFunc
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
Definition: MathExtras.h:229
constexpr bool any() const
Definition: LaneBitmask.h:53
This class is basically a combination of TimeRegion and Timer.
Definition: Timer.h:165
BlockConstraint - Entry and exit constraints for a basic block.
BorderConstraint Exit
Constraint on block exit.
bool ChangesValue
True when this block changes the value of the live range.
BorderConstraint Entry
Constraint on block entry.
unsigned Number
Basic block number (from MBB::getNumber()).
Additional information about basic blocks where the current variable is live.
Definition: SplitKit.h:121
SlotIndex FirstDef
First non-phi valno->def, or SlotIndex().
Definition: SplitKit.h:125
bool LiveOut
Current reg is live out.
Definition: SplitKit.h:127
bool LiveIn
Current reg is live in.
Definition: SplitKit.h:126
MachineBasicBlock * MBB
Definition: SplitKit.h:122
SlotIndex LastInstr
Last instr accessing current reg.
Definition: SplitKit.h:124
SlotIndex FirstInstr
First instr accessing current reg.
Definition: SplitKit.h:123