LLVM 23.0.0git
RegAllocPBQP.cpp
Go to the documentation of this file.
1//===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
10// register allocator for LLVM. This allocator works by constructing a PBQP
11// problem representing the register allocation problem under consideration,
12// solving this using a PBQP solver, and mapping the solution back to a
13// register assignment. If any variables are selected for spilling then spill
14// code is inserted and the process repeated.
15//
16// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
17// for register allocation. For more information on PBQP for register
18// allocation, see the following papers:
19//
20// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
21// PBQP. In Proceedings of the 7th Joint Modular Languages Conference
22// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
23//
24// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
25// architectures. In Proceedings of the Joint Conference on Languages,
26// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
27// NY, USA, 139-148.
28//
29//===----------------------------------------------------------------------===//
30
32#include "RegisterCoalescer.h"
33#include "llvm/ADT/ArrayRef.h"
34#include "llvm/ADT/BitVector.h"
35#include "llvm/ADT/DenseMap.h"
36#include "llvm/ADT/DenseSet.h"
37#include "llvm/ADT/STLExtras.h"
40#include "llvm/ADT/StringRef.h"
64#include "llvm/Config/llvm-config.h"
65#include "llvm/IR/Function.h"
66#include "llvm/IR/Module.h"
67#include "llvm/Pass.h"
70#include "llvm/Support/Debug.h"
74#include <algorithm>
75#include <cassert>
76#include <cstddef>
77#include <limits>
78#include <map>
79#include <memory>
80#include <queue>
81#include <set>
82#include <sstream>
83#include <string>
84#include <system_error>
85#include <tuple>
86#include <utility>
87#include <vector>
88
89using namespace llvm;
90
91#define DEBUG_TYPE "regalloc"
92
94RegisterPBQPRepAlloc("pbqp", "PBQP register allocator",
96
97static cl::opt<bool>
98PBQPCoalescing("pbqp-coalescing",
99 cl::desc("Attempt coalescing during PBQP register allocation."),
100 cl::init(false), cl::Hidden);
101
102#ifndef NDEBUG
103static cl::opt<bool>
104PBQPDumpGraphs("pbqp-dump-graphs",
105 cl::desc("Dump graphs for each function/round in the compilation unit."),
106 cl::init(false), cl::Hidden);
107#endif
108
109namespace {
110
111///
112/// PBQP based allocators solve the register allocation problem by mapping
113/// register allocation problems to Partitioned Boolean Quadratic
114/// Programming problems.
115class RegAllocPBQP : public MachineFunctionPass {
116public:
117 static char ID;
118
119 /// Construct a PBQP register allocator.
120 RegAllocPBQP(char *cPassID = nullptr)
121 : MachineFunctionPass(ID), customPassID(cPassID) {}
122
123 /// Return the pass name.
124 StringRef getPassName() const override { return "PBQP Register Allocator"; }
125
126 /// PBQP analysis usage.
127 void getAnalysisUsage(AnalysisUsage &au) const override;
128
129 /// Perform register allocation
130 bool runOnMachineFunction(MachineFunction &MF) override;
131
132 MachineFunctionProperties getRequiredProperties() const override {
133 return MachineFunctionProperties().setNoPHIs();
134 }
135
136 MachineFunctionProperties getClearedProperties() const override {
137 return MachineFunctionProperties().setIsSSA();
138 }
139
140private:
141 using RegSet = std::set<Register>;
142
143 char *customPassID;
144
145 RegSet VRegsToAlloc, EmptyIntervalVRegs;
146
147 /// Inst which is a def of an original reg and whose defs are already all
148 /// dead after remat is saved in DeadRemats. The deletion of such inst is
149 /// postponed till all the allocations are done, so its remat expr is
150 /// always available for the remat of all the siblings of the original reg.
151 SmallPtrSet<MachineInstr *, 32> DeadRemats;
152
153 /// Finds the initial set of vreg intervals to allocate.
154 void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS);
155
156 /// Constructs an initial graph.
157 void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller);
158
159 /// Spill the given VReg.
160 void spillVReg(Register VReg, SmallVectorImpl<Register> &NewIntervals,
161 MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
162 Spiller &VRegSpiller);
163
164 /// Given a solved PBQP problem maps this solution back to a register
165 /// assignment.
166 bool mapPBQPToRegAlloc(const PBQPRAGraph &G,
167 const PBQP::Solution &Solution,
168 VirtRegMap &VRM,
169 Spiller &VRegSpiller);
170
171 /// Postprocessing before final spilling. Sets basic block "live in"
172 /// variables.
173 void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS,
174 VirtRegMap &VRM) const;
175
176 void postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS);
177};
178
179char RegAllocPBQP::ID = 0;
180
181/// Set spill costs for each node in the PBQP reg-alloc graph.
182class SpillCosts : public PBQPRAConstraint {
183public:
184 void apply(PBQPRAGraph &G) override {
185 LiveIntervals &LIS = G.getMetadata().LIS;
186
187 // A minimum spill costs, so that register constraints can be set
188 // without normalization in the [0.0:MinSpillCost( interval.
189 const PBQP::PBQPNum MinSpillCost = 10.0;
190
191 for (auto NId : G.nodeIds()) {
192 PBQP::PBQPNum SpillCost =
193 LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight();
194 if (SpillCost == 0.0)
195 SpillCost = std::numeric_limits<PBQP::PBQPNum>::min();
196 else
197 SpillCost += MinSpillCost;
198 PBQPRAGraph::RawVector NodeCosts(G.getNodeCosts(NId));
199 NodeCosts[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost;
200 G.setNodeCosts(NId, std::move(NodeCosts));
201 }
202 }
203};
204
205/// Add interference edges between overlapping vregs.
206class Interference : public PBQPRAConstraint {
207private:
208 using AllowedRegVecPtr = const PBQP::RegAlloc::AllowedRegVector *;
209 using IKey = std::pair<AllowedRegVecPtr, AllowedRegVecPtr>;
210 using IMatrixCache = DenseMap<IKey, PBQPRAGraph::MatrixPtr>;
211 using DisjointAllowedRegsCache = DenseSet<IKey>;
212 using IEdgeKey = std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId>;
213 using IEdgeCache = DenseSet<IEdgeKey>;
214
215 bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
217 const DisjointAllowedRegsCache &D) const {
218 const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
219 const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
220
221 if (NRegs == MRegs)
222 return false;
223
224 if (NRegs < MRegs)
225 return D.contains(IKey(NRegs, MRegs));
226
227 return D.contains(IKey(MRegs, NRegs));
228 }
229
230 void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
232 DisjointAllowedRegsCache &D) {
233 const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs();
234 const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs();
235
236 assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself");
237
238 if (NRegs < MRegs)
239 D.insert(IKey(NRegs, MRegs));
240 else
241 D.insert(IKey(MRegs, NRegs));
242 }
243
244 // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
245 // for the fast interference graph construction algorithm. The last is there
246 // to save us from looking up node ids via the VRegToNode map in the graph
247 // metadata.
248 using IntervalInfo =
249 std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>;
250
251 static SlotIndex getStartPoint(const IntervalInfo &I) {
252 return std::get<0>(I)->segments[std::get<1>(I)].start;
253 }
254
255 static SlotIndex getEndPoint(const IntervalInfo &I) {
256 return std::get<0>(I)->segments[std::get<1>(I)].end;
257 }
258
259 static PBQP::GraphBase::NodeId getNodeId(const IntervalInfo &I) {
260 return std::get<2>(I);
261 }
262
263 static bool lowestStartPoint(const IntervalInfo &I1,
264 const IntervalInfo &I2) {
265 // Condition reversed because priority queue has the *highest* element at
266 // the front, rather than the lowest.
267 return getStartPoint(I1) > getStartPoint(I2);
268 }
269
270 static bool lowestEndPoint(const IntervalInfo &I1,
271 const IntervalInfo &I2) {
272 SlotIndex E1 = getEndPoint(I1);
273 SlotIndex E2 = getEndPoint(I2);
274
275 if (E1 < E2)
276 return true;
277
278 if (E1 > E2)
279 return false;
280
281 // If two intervals end at the same point, we need a way to break the tie or
282 // the set will assume they're actually equal and refuse to insert a
283 // "duplicate". Just compare the vregs - fast and guaranteed unique.
284 return std::get<0>(I1)->reg() < std::get<0>(I2)->reg();
285 }
286
287 static bool isAtLastSegment(const IntervalInfo &I) {
288 return std::get<1>(I) == std::get<0>(I)->size() - 1;
289 }
290
291 static IntervalInfo nextSegment(const IntervalInfo &I) {
292 return std::make_tuple(std::get<0>(I), std::get<1>(I) + 1, std::get<2>(I));
293 }
294
295public:
296 void apply(PBQPRAGraph &G) override {
297 // The following is loosely based on the linear scan algorithm introduced in
298 // "Linear Scan Register Allocation" by Poletto and Sarkar. This version
299 // isn't linear, because the size of the active set isn't bound by the
300 // number of registers, but rather the size of the largest clique in the
301 // graph. Still, we expect this to be better than N^2.
302 LiveIntervals &LIS = G.getMetadata().LIS;
303
304 // Interferenc matrices are incredibly regular - they're only a function of
305 // the allowed sets, so we cache them to avoid the overhead of constructing
306 // and uniquing them.
307 IMatrixCache C;
308
309 // Finding an edge is expensive in the worst case (O(max_clique(G))). So
310 // cache locally edges we have already seen.
311 IEdgeCache EC;
312
313 // Cache known disjoint allowed registers pairs
314 DisjointAllowedRegsCache D;
315
316 using IntervalSet = std::set<IntervalInfo, decltype(&lowestEndPoint)>;
317 using IntervalQueue =
318 std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
319 decltype(&lowestStartPoint)>;
320 IntervalSet Active(lowestEndPoint);
321 IntervalQueue Inactive(lowestStartPoint);
322
323 // Start by building the inactive set.
324 for (auto NId : G.nodeIds()) {
325 Register VReg = G.getNodeMetadata(NId).getVReg();
326 LiveInterval &LI = LIS.getInterval(VReg);
327 assert(!LI.empty() && "PBQP graph contains node for empty interval");
328 Inactive.push(std::make_tuple(&LI, 0, NId));
329 }
330
331 while (!Inactive.empty()) {
332 // Tentatively grab the "next" interval - this choice may be overriden
333 // below.
334 IntervalInfo Cur = Inactive.top();
335
336 // Retire any active intervals that end before Cur starts.
337 IntervalSet::iterator RetireItr = Active.begin();
338 while (RetireItr != Active.end() &&
339 (getEndPoint(*RetireItr) <= getStartPoint(Cur))) {
340 // If this interval has subsequent segments, add the next one to the
341 // inactive list.
342 if (!isAtLastSegment(*RetireItr))
343 Inactive.push(nextSegment(*RetireItr));
344
345 ++RetireItr;
346 }
347 Active.erase(Active.begin(), RetireItr);
348
349 // One of the newly retired segments may actually start before the
350 // Cur segment, so re-grab the front of the inactive list.
351 Cur = Inactive.top();
352 Inactive.pop();
353
354 // At this point we know that Cur overlaps all active intervals. Add the
355 // interference edges.
356 PBQP::GraphBase::NodeId NId = getNodeId(Cur);
357 for (const auto &A : Active) {
358 PBQP::GraphBase::NodeId MId = getNodeId(A);
359
360 // Do not add an edge when the nodes' allowed registers do not
361 // intersect: there is obviously no interference.
362 if (haveDisjointAllowedRegs(G, NId, MId, D))
363 continue;
364
365 // Check that we haven't already added this edge
366 IEdgeKey EK(std::min(NId, MId), std::max(NId, MId));
367 if (EC.count(EK))
368 continue;
369
370 // This is a new edge - add it to the graph.
371 if (!createInterferenceEdge(G, NId, MId, C))
372 setDisjointAllowedRegs(G, NId, MId, D);
373 else
374 EC.insert(EK);
375 }
376
377 // Finally, add Cur to the Active set.
378 Active.insert(Cur);
379 }
380 }
381
382private:
383 // Create an Interference edge and add it to the graph, unless it is
384 // a null matrix, meaning the nodes' allowed registers do not have any
385 // interference. This case occurs frequently between integer and floating
386 // point registers for example.
387 // return true iff both nodes interferes.
388 bool createInterferenceEdge(PBQPRAGraph &G,
390 IMatrixCache &C) {
391 const TargetRegisterInfo &TRI =
392 *G.getMetadata().MF.getSubtarget().getRegisterInfo();
393 const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
394 const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs();
395
396 // Try looking the edge costs up in the IMatrixCache first.
397 IKey K(&NRegs, &MRegs);
398 IMatrixCache::iterator I = C.find(K);
399 if (I != C.end()) {
400 G.addEdgeBypassingCostAllocator(NId, MId, I->second);
401 return true;
402 }
403
404 PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0);
405 bool NodesInterfere = false;
406 for (unsigned I = 0; I != NRegs.size(); ++I) {
407 MCRegister PRegN = NRegs[I];
408 for (unsigned J = 0; J != MRegs.size(); ++J) {
409 MCRegister PRegM = MRegs[J];
410 if (TRI.regsOverlap(PRegN, PRegM)) {
411 M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
412 NodesInterfere = true;
413 }
414 }
415 }
416
417 if (!NodesInterfere)
418 return false;
419
420 PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M));
421 C[K] = G.getEdgeCostsPtr(EId);
422
423 return true;
424 }
425};
426
427class Coalescing : public PBQPRAConstraint {
428public:
429 void apply(PBQPRAGraph &G) override {
430 MachineFunction &MF = G.getMetadata().MF;
431 MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI;
432 CoalescerPair CP(*MF.getSubtarget().getRegisterInfo());
433
434 // Scan the machine function and add a coalescing cost whenever CoalescerPair
435 // gives the Ok.
436 for (const auto &MBB : MF) {
437 for (const auto &MI : MBB) {
438 // Skip not-coalescable or already coalesced copies.
439 if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg())
440 continue;
441
442 Register DstReg = CP.getDstReg();
443 Register SrcReg = CP.getSrcReg();
444
446
447 if (CP.isPhys()) {
448 if (!MF.getRegInfo().isAllocatable(DstReg))
449 continue;
450
451 PBQPRAGraph::NodeId NId = G.getMetadata().getNodeIdForVReg(SrcReg);
452
453 const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed =
454 G.getNodeMetadata(NId).getAllowedRegs();
455
456 unsigned PRegOpt = 0;
457 while (PRegOpt < Allowed.size() && Allowed[PRegOpt].id() != DstReg)
458 ++PRegOpt;
459
460 if (PRegOpt < Allowed.size()) {
461 PBQPRAGraph::RawVector NewCosts(G.getNodeCosts(NId));
462 NewCosts[PRegOpt + 1] -= CBenefit;
463 G.setNodeCosts(NId, std::move(NewCosts));
464 }
465 } else {
466 PBQPRAGraph::NodeId N1Id = G.getMetadata().getNodeIdForVReg(DstReg);
467 PBQPRAGraph::NodeId N2Id = G.getMetadata().getNodeIdForVReg(SrcReg);
468 const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 =
469 &G.getNodeMetadata(N1Id).getAllowedRegs();
470 const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 =
471 &G.getNodeMetadata(N2Id).getAllowedRegs();
472
473 PBQPRAGraph::EdgeId EId = G.findEdge(N1Id, N2Id);
474 if (EId == G.invalidEdgeId()) {
475 PBQPRAGraph::RawMatrix Costs(Allowed1->size() + 1,
476 Allowed2->size() + 1, 0);
477 addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
478 G.addEdge(N1Id, N2Id, std::move(Costs));
479 } else {
480 if (G.getEdgeNode1Id(EId) == N2Id) {
481 std::swap(N1Id, N2Id);
482 std::swap(Allowed1, Allowed2);
483 }
484 PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId));
485 addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
486 G.updateEdgeCosts(EId, std::move(Costs));
487 }
488 }
489 }
490 }
491 }
492
493private:
494 void addVirtRegCoalesce(
495 PBQPRAGraph::RawMatrix &CostMat,
496 const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1,
497 const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2,
498 PBQP::PBQPNum Benefit) {
499 assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch.");
500 assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch.");
501 for (unsigned I = 0; I != Allowed1.size(); ++I) {
502 MCRegister PReg1 = Allowed1[I];
503 for (unsigned J = 0; J != Allowed2.size(); ++J) {
504 MCRegister PReg2 = Allowed2[J];
505 if (PReg1 == PReg2)
506 CostMat[I + 1][J + 1] -= Benefit;
507 }
508 }
509 }
510};
511
512/// PBQP-specific implementation of weight normalization.
513class PBQPVirtRegAuxInfo final : public VirtRegAuxInfo {
514 float normalize(float UseDefFreq, unsigned Size, unsigned NumInstr) override {
515 // All intervals have a spill weight that is mostly proportional to the
516 // number of uses, with uses in loops having a bigger weight.
517 return NumInstr * VirtRegAuxInfo::normalize(UseDefFreq, Size, 1);
518 }
519
520public:
521 PBQPVirtRegAuxInfo(MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM,
522 const MachineLoopInfo &Loops,
523 const MachineBlockFrequencyInfo &MBFI)
524 : VirtRegAuxInfo(MF, LIS, VRM, Loops, MBFI) {}
525};
526} // end anonymous namespace
527
528// Out-of-line destructor/anchor for PBQPRAConstraint.
530
531void PBQPRAConstraint::anchor() {}
532
533void PBQPRAConstraintList::anchor() {}
534
535void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
536 au.setPreservesCFG();
543 //au.addRequiredID(SplitCriticalEdgesID);
544 if (customPassID)
545 au.addRequiredID(*customPassID);
557}
558
559void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
560 LiveIntervals &LIS) {
561 const MachineRegisterInfo &MRI = MF.getRegInfo();
562
563 // Iterate over all live ranges.
564 for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
566 if (MRI.reg_nodbg_empty(Reg))
567 continue;
568 VRegsToAlloc.insert(Reg);
569 }
570}
571
573 const TargetRegisterInfo &TRI,
574 const MachineFunction &MF) {
575 const MCPhysReg *CSR = MF.getRegInfo().getCalleeSavedRegs();
576 for (unsigned i = 0; CSR[i] != 0; ++i)
577 if (TRI.regsOverlap(Reg, CSR[i]))
578 return true;
579 return false;
580}
581
582void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
583 Spiller &VRegSpiller) {
584 MachineFunction &MF = G.getMetadata().MF;
585
586 LiveIntervals &LIS = G.getMetadata().LIS;
587 const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
588 const TargetRegisterInfo &TRI =
589 *G.getMetadata().MF.getSubtarget().getRegisterInfo();
590
591 std::vector<Register> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end());
592
593 std::map<Register, std::vector<MCRegister>> VRegAllowedMap;
594
595 while (!Worklist.empty()) {
596 Register VReg = Worklist.back();
597 Worklist.pop_back();
598
599 LiveInterval &VRegLI = LIS.getInterval(VReg);
600
601 // If this is an empty interval move it to the EmptyIntervalVRegs set then
602 // continue.
603 if (VRegLI.empty()) {
604 EmptyIntervalVRegs.insert(VRegLI.reg());
605 VRegsToAlloc.erase(VRegLI.reg());
606 continue;
607 }
608
609 const TargetRegisterClass *TRC = MRI.getRegClass(VReg);
610
611 // Record any overlaps with regmask operands.
612 BitVector RegMaskOverlaps;
613 LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps);
614
615 // Compute an initial allowed set for the current vreg.
616 std::vector<MCRegister> VRegAllowed;
617 ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF);
618 for (MCPhysReg R : RawPRegOrder) {
619 MCRegister PReg(R);
620 if (MRI.isReserved(PReg))
621 continue;
622
623 // vregLI crosses a regmask operand that clobbers preg.
624 if (!RegMaskOverlaps.empty() && !RegMaskOverlaps.test(PReg))
625 continue;
626
627 // vregLI overlaps fixed regunit interference.
628 bool Interference = false;
629 for (MCRegUnit Unit : TRI.regunits(PReg)) {
630 if (VRegLI.overlaps(LIS.getRegUnit(Unit))) {
631 Interference = true;
632 break;
633 }
634 }
635 if (Interference)
636 continue;
637
638 // preg is usable for this virtual register.
639 VRegAllowed.push_back(PReg);
640 }
641
642 // Check for vregs that have no allowed registers. These should be
643 // pre-spilled and the new vregs added to the worklist.
644 if (VRegAllowed.empty()) {
646 spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
647 llvm::append_range(Worklist, NewVRegs);
648 continue;
649 }
650
651 VRegAllowedMap[VReg.id()] = std::move(VRegAllowed);
652 }
653
654 for (auto &KV : VRegAllowedMap) {
655 auto VReg = KV.first;
656
657 // Move empty intervals to the EmptyIntervalVReg set.
658 if (LIS.getInterval(VReg).empty()) {
659 EmptyIntervalVRegs.insert(VReg);
660 VRegsToAlloc.erase(VReg);
661 continue;
662 }
663
664 auto &VRegAllowed = KV.second;
665
666 PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0);
667
668 // Tweak cost of callee saved registers, as using then force spilling and
669 // restoring them. This would only happen in the prologue / epilogue though.
670 for (unsigned i = 0; i != VRegAllowed.size(); ++i)
671 if (isACalleeSavedRegister(VRegAllowed[i], TRI, MF))
672 NodeCosts[1 + i] += 1.0;
673
674 PBQPRAGraph::NodeId NId = G.addNode(std::move(NodeCosts));
675 G.getNodeMetadata(NId).setVReg(VReg);
676 G.getNodeMetadata(NId).setAllowedRegs(
677 G.getMetadata().getAllowedRegs(std::move(VRegAllowed)));
678 G.getMetadata().setNodeIdForVReg(VReg, NId);
679 }
680}
681
682void RegAllocPBQP::spillVReg(Register VReg,
683 SmallVectorImpl<Register> &NewIntervals,
685 VirtRegMap &VRM, Spiller &VRegSpiller) {
686 VRegsToAlloc.erase(VReg);
687 LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM,
688 nullptr, &DeadRemats);
689 VRegSpiller.spill(LRE);
690
692 (void)TRI;
693 LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> SPILLED (Cost: "
694 << LRE.getParent().weight() << ", New vregs: ");
695
696 // Copy any newly inserted live intervals into the list of regs to
697 // allocate.
698 for (const Register &R : LRE) {
699 const LiveInterval &LI = LIS.getInterval(R);
700 assert(!LI.empty() && "Empty spill range.");
701 LLVM_DEBUG(dbgs() << printReg(LI.reg(), &TRI) << " ");
702 VRegsToAlloc.insert(LI.reg());
703 }
704
705 LLVM_DEBUG(dbgs() << ")\n");
706}
707
708bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
709 const PBQP::Solution &Solution,
710 VirtRegMap &VRM,
711 Spiller &VRegSpiller) {
712 MachineFunction &MF = G.getMetadata().MF;
713 LiveIntervals &LIS = G.getMetadata().LIS;
715 (void)TRI;
716
717 // Set to true if we have any spills
718 bool AnotherRoundNeeded = false;
719
720 // Clear the existing allocation.
721 VRM.clearAllVirt();
722
723 // Iterate over the nodes mapping the PBQP solution to a register
724 // assignment.
725 for (auto NId : G.nodeIds()) {
726 Register VReg = G.getNodeMetadata(NId).getVReg();
727 unsigned AllocOpt = Solution.getSelection(NId);
728
729 if (AllocOpt != PBQP::RegAlloc::getSpillOptionIdx()) {
730 MCRegister PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOpt - 1];
731 LLVM_DEBUG(dbgs() << "VREG " << printReg(VReg, &TRI) << " -> "
732 << TRI.getName(PReg) << "\n");
733 assert(PReg != 0 && "Invalid preg selected.");
734 VRM.assignVirt2Phys(VReg, PReg);
735 } else {
736 // Spill VReg. If this introduces new intervals we'll need another round
737 // of allocation.
739 spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
740 AnotherRoundNeeded |= !NewVRegs.empty();
741 }
742 }
743
744 return !AnotherRoundNeeded;
745}
746
747void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
748 LiveIntervals &LIS,
749 VirtRegMap &VRM) const {
751
752 // First allocate registers for the empty intervals.
753 for (const Register &R : EmptyIntervalVRegs) {
754 LiveInterval &LI = LIS.getInterval(R);
755
756 Register PReg = MRI.getSimpleHint(LI.reg());
757
758 if (PReg == 0) {
759 const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg());
760 const ArrayRef<MCPhysReg> RawPRegOrder = RC.getRawAllocationOrder(MF);
761 for (MCRegister CandidateReg : RawPRegOrder) {
762 if (!VRM.getRegInfo().isReserved(CandidateReg)) {
763 PReg = CandidateReg;
764 break;
765 }
766 }
767 assert(PReg &&
768 "No un-reserved physical registers in this register class");
769 }
770
771 VRM.assignVirt2Phys(LI.reg(), PReg);
772 }
773}
774
775void RegAllocPBQP::postOptimization(Spiller &VRegSpiller, LiveIntervals &LIS) {
776 VRegSpiller.postOptimization();
777 /// Remove dead defs because of rematerialization.
778 for (auto *DeadInst : DeadRemats) {
779 LIS.RemoveMachineInstrFromMaps(*DeadInst);
780 DeadInst->eraseFromParent();
781 }
782 DeadRemats.clear();
783}
784
785bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
786 LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
788 getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
789
790 auto &LiveStks = getAnalysis<LiveStacksWrapperLegacy>().getLS();
791 auto &MDT = getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
792
793 VirtRegMap &VRM = getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
794
795 PBQPVirtRegAuxInfo VRAI(
796 MF, LIS, VRM, getAnalysis<MachineLoopInfoWrapperPass>().getLI(), MBFI);
797 VRAI.calculateSpillWeightsAndHints();
798
799 // FIXME: we create DefaultVRAI here to match existing behavior pre-passing
800 // the VRAI through the spiller to the live range editor. However, it probably
801 // makes more sense to pass the PBQP VRAI. The existing behavior had
802 // LiveRangeEdit make its own VirtRegAuxInfo object.
803 VirtRegAuxInfo DefaultVRAI(
804 MF, LIS, VRM, getAnalysis<MachineLoopInfoWrapperPass>().getLI(), MBFI);
805 std::unique_ptr<Spiller> VRegSpiller(
806 createInlineSpiller({LIS, LiveStks, MDT, MBFI}, MF, VRM, DefaultVRAI));
807
809
810 LLVM_DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n");
811
812 // Allocator main loop:
813 //
814 // * Map current regalloc problem to a PBQP problem
815 // * Solve the PBQP problem
816 // * Map the solution back to a register allocation
817 // * Spill if necessary
818 //
819 // This process is continued till no more spills are generated.
820
821 // Find the vreg intervals in need of allocation.
822 findVRegIntervalsToAlloc(MF, LIS);
823
824#ifndef NDEBUG
825 const Function &F = MF.getFunction();
826 std::string FullyQualifiedName =
827 F.getParent()->getModuleIdentifier() + "." + F.getName().str();
828#endif
829
830 // If there are non-empty intervals allocate them using pbqp.
831 if (!VRegsToAlloc.empty()) {
832 const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
833 std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
834 std::make_unique<PBQPRAConstraintList>();
835 ConstraintsRoot->addConstraint(std::make_unique<SpillCosts>());
836 ConstraintsRoot->addConstraint(std::make_unique<Interference>());
837 if (PBQPCoalescing)
838 ConstraintsRoot->addConstraint(std::make_unique<Coalescing>());
839 ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints());
840
841 bool PBQPAllocComplete = false;
842 unsigned Round = 0;
843
844 while (!PBQPAllocComplete) {
845 LLVM_DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n");
846 (void) Round;
847
849 initializeGraph(G, VRM, *VRegSpiller);
850 ConstraintsRoot->apply(G);
851
852#ifndef NDEBUG
853 if (PBQPDumpGraphs) {
854 std::ostringstream RS;
855 RS << Round;
856 std::string GraphFileName = FullyQualifiedName + "." + RS.str() +
857 ".pbqpgraph";
858 std::error_code EC;
859 raw_fd_ostream OS(GraphFileName, EC, sys::fs::OF_TextWithCRLF);
860 LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
861 << GraphFileName << "\"\n");
862 G.dump(OS);
863 }
864#endif
865
867 PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller);
868 ++Round;
869 }
870 }
871
872 // Finalise allocation, allocate empty ranges.
873 finalizeAlloc(MF, LIS, VRM);
874 postOptimization(*VRegSpiller, LIS);
875 VRegsToAlloc.clear();
876 EmptyIntervalVRegs.clear();
877
878 LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n");
879
880 return true;
881}
882
883/// Create Printable object for node and register info.
886 return Printable([NId, &G](raw_ostream &OS) {
887 const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
888 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
889 Register VReg = G.getNodeMetadata(NId).getVReg();
890 const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
891 OS << NId << " (" << RegClassName << ':' << printReg(VReg, TRI) << ')';
892 });
893}
894
895#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
897 for (auto NId : nodeIds()) {
898 const Vector &Costs = getNodeCosts(NId);
899 assert(Costs.getLength() != 0 && "Empty vector in graph.");
900 OS << PrintNodeInfo(NId, *this) << ": " << Costs << '\n';
901 }
902 OS << '\n';
903
904 for (auto EId : edgeIds()) {
905 NodeId N1Id = getEdgeNode1Id(EId);
906 NodeId N2Id = getEdgeNode2Id(EId);
907 assert(N1Id != N2Id && "PBQP graphs should not have self-edges.");
908 const Matrix &M = getEdgeCosts(EId);
909 assert(M.getRows() != 0 && "No rows in matrix.");
910 assert(M.getCols() != 0 && "No cols in matrix.");
911 OS << PrintNodeInfo(N1Id, *this) << ' ' << M.getRows() << " rows / ";
912 OS << PrintNodeInfo(N2Id, *this) << ' ' << M.getCols() << " cols:\n";
913 OS << M << '\n';
914 }
915}
916
920#endif
921
923 OS << "graph {\n";
924 for (auto NId : nodeIds()) {
925 OS << " node" << NId << " [ label=\""
926 << PrintNodeInfo(NId, *this) << "\\n"
927 << getNodeCosts(NId) << "\" ]\n";
928 }
929
930 OS << " edge [ len=" << nodeIds().size() << " ]\n";
931 for (auto EId : edgeIds()) {
932 OS << " node" << getEdgeNode1Id(EId)
933 << " -- node" << getEdgeNode2Id(EId)
934 << " [ label=\"";
935 const Matrix &EdgeCosts = getEdgeCosts(EId);
936 for (unsigned i = 0; i < EdgeCosts.getRows(); ++i) {
937 OS << EdgeCosts.getRowAsVector(i) << "\\n";
938 }
939 OS << "\" ]\n";
940 }
941 OS << "}\n";
942}
943
945 return new RegAllocPBQP(customPassID);
946}
947
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
Hexagon Hardware Loops
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId, const PBQP::RegAlloc::PBQPRAGraph &G)
Create Printable object for node and register info.
static cl::opt< bool > PBQPDumpGraphs("pbqp-dump-graphs", cl::desc("Dump graphs for each function/round in the compilation unit."), cl::init(false), cl::Hidden)
static cl::opt< bool > PBQPCoalescing("pbqp-coalescing", cl::desc("Attempt coalescing during PBQP register allocation."), cl::init(false), cl::Hidden)
static bool isACalleeSavedRegister(MCRegister Reg, const TargetRegisterInfo &TRI, const MachineFunction &MF)
static RegisterRegAlloc RegisterPBQPRepAlloc("pbqp", "PBQP register allocator", createDefaultPBQPRegisterAllocator)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
Definition Pass.cpp:284
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool test(unsigned Idx) const
Definition BitVector.h:480
bool empty() const
empty - Tests whether there are no bits in this bitvector.
Definition BitVector.h:175
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:74
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
Module * getParent()
Get the module that this global value is contained inside of...
LiveInterval - This class represents the liveness of a register, or stack slot.
float weight() const
Register reg() const
LLVM_ABI bool checkRegMaskInterference(const LiveInterval &LI, BitVector &UsableRegs)
Test if LI is live across any register mask instructions, and compute a bit mask of physical register...
void RemoveMachineInstrFromMaps(MachineInstr &MI)
LiveInterval & getInterval(Register Reg)
LiveRange & getRegUnit(MCRegUnit Unit)
Return the live range for register unit Unit.
bool empty() const
bool overlaps(const LiveRange &other) const
overlaps - Return true if the intersection of the two live ranges is not empty.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
double getBlockFreqRelativeToEntryBlock(const MachineBasicBlock *MBB) const
Compute the frequency of the block, relative to the entry block.
Analysis pass which computes a MachineDominatorTree.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI void freezeReservedRegs()
freezeReservedRegs - Called by the register allocator to freeze the set of reserved registers before ...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
const std::string & getModuleIdentifier() const
Get the module identifier which is, essentially, the name of the module.
Definition Module.h:252
Abstract base for classes implementing PBQP register allocation constraints (e.g.
virtual ~PBQPRAConstraint()=0
unsigned EdgeId
Definition Graph.h:29
unsigned NodeId
Definition Graph.h:28
typename RegAllocSolverImpl::GraphMetadata GraphMetadata
Definition Graph.h:59
typename RegAllocSolverImpl::Matrix Matrix
Definition Graph.h:54
NodeId getEdgeNode2Id(EdgeId EId) const
Definition Graph.h:552
NodeId getEdgeNode1Id(EdgeId EId) const
Definition Graph.h:545
const Matrix & getEdgeCosts(EdgeId EId) const
Definition Graph.h:530
typename RegAllocSolverImpl::Vector Vector
Definition Graph.h:53
typename RegAllocSolverImpl::RawMatrix RawMatrix
Definition Graph.h:52
typename RegAllocSolverImpl::RawVector RawVector
Definition Graph.h:51
const Vector & getNodeCosts(NodeId NId) const
Definition Graph.h:488
void printDot(raw_ostream &OS) const
Print a representation of this graph in DOT format.
void dump() const
Dump this graph to dbgs().
Represents a solution to a PBQP problem.
Definition Solution.h:26
unsigned getSelection(GraphBase::NodeId nodeId) const
Get a node's selection.
Definition Solution.h:45
Simple wrapper around std::function<void(raw_ostream&)>.
Definition Printable.h:38
Wrapper class representing virtual and physical registers.
Definition Register.h:20
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition Register.h:72
constexpr unsigned id() const
Definition Register.h:100
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Spiller interface.
Definition Spiller.h:33
virtual void postOptimization()
Definition Spiller.h:49
virtual void spill(LiveRangeEdit &LRE, AllocationOrder *Order=nullptr)=0
spill - Spill the LRE.getParent() live interval.
ArrayRef< MCPhysReg > getRawAllocationOrder(const MachineFunction &MF, bool Rev=false) const
Returns the preferred order for allocating registers from this register class in MF.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual std::unique_ptr< PBQPRAConstraint > getCustomPBQPConstraints() const
Return PBQPConstraint(s) for the target.
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Calculate auxiliary information for a virtual register such as its spill weight and allocation hint.
virtual float normalize(float UseDefFreq, unsigned Size, unsigned NumInstr)
Weight normalization function.
void clearAllVirt()
clears all virtual to physical register mappings
Definition VirtRegMap.h:125
MachineRegisterInfo & getRegInfo() const
Definition VirtRegMap.h:80
LLVM_ABI void assignVirt2Phys(Register virtReg, MCRegister physReg)
creates a mapping for the specified virtual register to the specified physical register
A raw_ostream that writes to a file descriptor.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
Solution solve(PBQPRAGraph &G)
unsigned getSpillOptionIdx()
Spill option index.
float PBQPNum
Definition Math.h:25
void apply(Opt *O, const Mod &M, const Mods &... Ms)
initializer< Ty > init(const Ty &Val)
@ OF_TextWithCRLF
The file should be opened in text mode and use a carriage linefeed '\r '.
Definition FileSystem.h:764
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
PBQP::RegAlloc::PBQPRAGraph PBQPRAGraph
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI FunctionPass * createDefaultPBQPRegisterAllocator()
PBQPRegisterAllocation Pass - This pass implements the Partitioned Boolean Quadratic Prograaming (PBQ...
FunctionPass * createPBQPRegisterAllocator(char *customPassID=nullptr)
Create a PBQP register allocator instance.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
Spiller * createInlineSpiller(const Spiller::RequiredAnalyses &Analyses, MachineFunction &MF, VirtRegMap &VRM, VirtRegAuxInfo &VRAI, LiveRegMatrix *Matrix=nullptr)
Create and return a spiller that will insert spill code directly instead of deferring though VirtRegM...
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872