Go to the documentation of this file.
64 #include "llvm/Config/llvm-config.h"
85 #include <system_error>
92 #define DEBUG_TYPE "regalloc"
100 cl::desc(
"Attempt coalescing during PBQP register allocation."),
106 cl::desc(
"Dump graphs for each function/round in the compilation unit."),
121 RegAllocPBQP(
char *cPassID =
nullptr)
130 StringRef getPassName()
const override {
return "PBQP Register Allocator"; }
149 using RegSet = std::set<Register>;
153 RegSet VRegsToAlloc, EmptyIntervalVRegs;
199 for (
auto NId :
G.nodeIds()) {
202 if (SpillCost == 0.0)
205 SpillCost += MinSpillCost;
217 using IKey = std::pair<AllowedRegVecPtr, AllowedRegVecPtr>;
220 using IEdgeKey = std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId>;
225 const DisjointAllowedRegsCache &
D)
const {
226 const auto *NRegs = &
G.getNodeMetadata(NId).getAllowedRegs();
227 const auto *MRegs = &
G.getNodeMetadata(MId).getAllowedRegs();
233 return D.contains(IKey(NRegs, MRegs));
235 return D.contains(IKey(MRegs, NRegs));
240 DisjointAllowedRegsCache &
D) {
241 const auto *NRegs = &
G.getNodeMetadata(NId).getAllowedRegs();
242 const auto *MRegs = &
G.getNodeMetadata(MId).getAllowedRegs();
244 assert(NRegs != MRegs &&
"AllowedRegs can not be disjoint with itself");
247 D.insert(IKey(NRegs, MRegs));
249 D.insert(IKey(MRegs, NRegs));
257 std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>;
259 static SlotIndex getStartPoint(
const IntervalInfo &
I) {
260 return std::get<0>(
I)->segments[std::get<1>(
I)].start;
263 static SlotIndex getEndPoint(
const IntervalInfo &
I) {
264 return std::get<0>(
I)->segments[std::get<1>(
I)].end;
268 return std::get<2>(
I);
271 static bool lowestStartPoint(
const IntervalInfo &
I1,
272 const IntervalInfo &I2) {
275 return getStartPoint(
I1) > getStartPoint(I2);
278 static bool lowestEndPoint(
const IntervalInfo &
I1,
279 const IntervalInfo &I2) {
292 return std::get<0>(
I1)->reg() < std::get<0>(I2)->reg();
295 static bool isAtLastSegment(
const IntervalInfo &
I) {
296 return std::get<1>(
I) == std::get<0>(
I)->size() - 1;
299 static IntervalInfo nextSegment(
const IntervalInfo &
I) {
300 return std::make_tuple(std::get<0>(
I), std::get<1>(
I) + 1, std::get<2>(
I));
322 DisjointAllowedRegsCache
D;
324 using IntervalSet =
std::set<IntervalInfo, decltype(&lowestEndPoint)>;
325 using IntervalQueue =
326 std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
327 decltype(&lowestStartPoint)>;
328 IntervalSet Active(lowestEndPoint);
329 IntervalQueue Inactive(lowestStartPoint);
332 for (
auto NId :
G.nodeIds()) {
333 Register VReg =
G.getNodeMetadata(NId).getVReg();
335 assert(!LI.
empty() &&
"PBQP graph contains node for empty interval");
336 Inactive.push(std::make_tuple(&LI, 0, NId));
339 while (!Inactive.empty()) {
342 IntervalInfo Cur = Inactive.top();
345 IntervalSet::iterator RetireItr = Active.begin();
346 while (RetireItr != Active.end() &&
347 (getEndPoint(*RetireItr) <= getStartPoint(Cur))) {
350 if (!isAtLastSegment(*RetireItr))
351 Inactive.push(nextSegment(*RetireItr));
355 Active.erase(Active.begin(), RetireItr);
359 Cur = Inactive.top();
365 for (
const auto &A : Active) {
370 if (haveDisjointAllowedRegs(
G, NId, MId,
D))
379 if (!createInterferenceEdge(
G, NId, MId,
C))
380 setDisjointAllowedRegs(
G, NId, MId,
D);
400 *
G.getMetadata().MF.getSubtarget().getRegisterInfo();
401 const auto &NRegs =
G.getNodeMetadata(NId).getAllowedRegs();
402 const auto &MRegs =
G.getNodeMetadata(MId).getAllowedRegs();
405 IKey K(&NRegs, &MRegs);
406 IMatrixCache::iterator
I =
C.find(K);
408 G.addEdgeBypassingCostAllocator(NId, MId,
I->second);
413 bool NodesInterfere =
false;
414 for (
unsigned I = 0;
I != NRegs.size(); ++
I) {
416 for (
unsigned J = 0; J != MRegs.size(); ++J) {
419 M[
I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
420 NodesInterfere =
true;
429 C[K] =
G.getEdgeCostsPtr(EId);
444 for (
const auto &
MBB : MF) {
445 for (
const auto &
MI :
MBB) {
447 if (!
CP.setRegisters(&
MI) ||
CP.getSrcReg() ==
CP.getDstReg())
456 if (!MF.getRegInfo().isAllocatable(DstReg))
461 const PBQPRAGraph::NodeMetadata::AllowedRegVector &
Allowed =
462 G.getNodeMetadata(NId).getAllowedRegs();
464 unsigned PRegOpt = 0;
465 while (PRegOpt <
Allowed.size() &&
Allowed[PRegOpt].id() != DstReg)
468 if (PRegOpt <
Allowed.size()) {
470 NewCosts[PRegOpt + 1] -= CBenefit;
476 const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 =
477 &
G.getNodeMetadata(N1Id).getAllowedRegs();
478 const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 =
479 &
G.getNodeMetadata(N2Id).getAllowedRegs();
482 if (EId ==
G.invalidEdgeId()) {
484 Allowed2->size() + 1, 0);
485 addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
488 if (
G.getEdgeNode1Id(EId) == N2Id) {
493 addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit);
502 void addVirtRegCoalesce(
504 const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1,
505 const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2,
507 assert(CostMat.getRows() == Allowed1.size() + 1 &&
"Size mismatch.");
508 assert(CostMat.getCols() == Allowed2.size() + 1 &&
"Size mismatch.");
509 for (
unsigned I = 0;
I != Allowed1.size(); ++
I) {
511 for (
unsigned J = 0; J != Allowed2.size(); ++J) {
514 CostMat[
I + 1][J + 1] -= Benefit;
522 float normalize(
float UseDefFreq,
unsigned Size,
unsigned NumInstr)
override {
539 void PBQPRAConstraint::anchor() {}
541 void PBQPRAConstraintList::anchor() {}
543 void RegAllocPBQP::getAnalysisUsage(
AnalysisUsage &au)
const {
576 VRegsToAlloc.insert(
Reg);
584 for (
unsigned i = 0; CSR[
i] != 0; ++
i)
597 *
G.getMetadata().MF.getSubtarget().getRegisterInfo();
599 std::vector<Register> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end());
601 std::map<Register, std::vector<MCRegister>> VRegAllowedMap;
603 while (!Worklist.empty()) {
611 if (VRegLI.
empty()) {
612 EmptyIntervalVRegs.insert(VRegLI.
reg());
613 VRegsToAlloc.erase(VRegLI.
reg());
624 std::vector<MCRegister> VRegAllowed;
632 if (!RegMaskOverlaps.
empty() && !RegMaskOverlaps.
test(PReg))
636 bool Interference =
false;
647 VRegAllowed.push_back(PReg);
652 if (VRegAllowed.empty()) {
654 spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
659 VRegAllowedMap[VReg.
id()] =
std::move(VRegAllowed);
662 for (
auto &KV : VRegAllowedMap) {
663 auto VReg = KV.first;
667 EmptyIntervalVRegs.insert(VReg);
668 VRegsToAlloc.erase(VReg);
672 auto &VRegAllowed = KV.second;
678 for (
unsigned i = 0;
i != VRegAllowed.size(); ++
i)
680 NodeCosts[1 +
i] += 1.0;
683 G.getNodeMetadata(NId).setVReg(VReg);
684 G.getNodeMetadata(NId).setAllowedRegs(
685 G.getMetadata().getAllowedRegs(
std::move(VRegAllowed)));
686 G.getMetadata().setNodeIdForVReg(VReg, NId);
690 void RegAllocPBQP::spillVReg(
Register VReg,
694 VRegsToAlloc.erase(VReg);
696 nullptr, &DeadRemats);
697 VRegSpiller.
spill(LRE);
702 << LRE.getParent().weight() <<
", New vregs: ");
710 VRegsToAlloc.insert(LI.
reg());
716 bool RegAllocPBQP::mapPBQPToRegAlloc(
const PBQPRAGraph &
G,
726 bool AnotherRoundNeeded =
false;
733 for (
auto NId :
G.nodeIds()) {
734 Register VReg =
G.getNodeMetadata(NId).getVReg();
738 MCRegister PReg =
G.getNodeMetadata(NId).getAllowedRegs()[AllocOpt - 1];
741 assert(PReg != 0 &&
"Invalid preg selected.");
747 spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller);
748 AnotherRoundNeeded |= !NewVRegs.empty();
752 return !AnotherRoundNeeded;
761 for (
const Register &R : EmptyIntervalVRegs) {
769 for (
MCRegister CandidateReg : RawPRegOrder) {
776 "No un-reserved physical registers in this register class");
786 for (
auto DeadInst : DeadRemats) {
788 DeadInst->eraseFromParent();
796 getAnalysis<MachineBlockFrequencyInfo>();
800 PBQPVirtRegAuxInfo VRAI(MF, LIS, VRM, getAnalysis<MachineLoopInfo>(), MBFI);
801 VRAI.calculateSpillWeightsAndHints();
807 VirtRegAuxInfo DefaultVRAI(MF, LIS, VRM, getAnalysis<MachineLoopInfo>(),
809 std::unique_ptr<Spiller> VRegSpiller(
826 findVRegIntervalsToAlloc(MF, LIS);
830 std::string FullyQualifiedName =
831 F.getParent()->getModuleIdentifier() +
"." +
F.getName().str();
835 if (!VRegsToAlloc.empty()) {
837 std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
838 std::make_unique<PBQPRAConstraintList>();
839 ConstraintsRoot->addConstraint(std::make_unique<SpillCosts>());
840 ConstraintsRoot->addConstraint(std::make_unique<Interference>());
842 ConstraintsRoot->addConstraint(std::make_unique<Coalescing>());
845 bool PBQPAllocComplete =
false;
848 while (!PBQPAllocComplete) {
853 initializeGraph(
G, VRM, *VRegSpiller);
854 ConstraintsRoot->apply(
G);
858 std::ostringstream RS;
860 std::string GraphFileName = FullyQualifiedName +
"." + RS.str() +
864 LLVM_DEBUG(
dbgs() <<
"Dumping graph for round " << Round <<
" to \""
865 << GraphFileName <<
"\"\n");
871 PBQPAllocComplete = mapPBQPToRegAlloc(
G, Solution, VRM, *VRegSpiller);
877 finalizeAlloc(MF, LIS, VRM);
878 postOptimization(*VRegSpiller, LIS);
879 VRegsToAlloc.clear();
880 EmptyIntervalVRegs.clear();
893 Register VReg =
G.getNodeMetadata(NId).getVReg();
895 OS << NId <<
" (" << RegClassName <<
':' <<
printReg(VReg,
TRI) <<
')';
899 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
903 assert(Costs.getLength() != 0 &&
"Empty vector in graph.");
911 assert(N1Id != N2Id &&
"PBQP graphs should not have self-edges.");
913 assert(
M.getRows() != 0 &&
"No rows in matrix.");
914 assert(
M.getCols() != 0 &&
"No cols in matrix.");
915 OS <<
PrintNodeInfo(N1Id, *
this) <<
' ' <<
M.getRows() <<
" rows / ";
916 OS <<
PrintNodeInfo(N2Id, *
this) <<
' ' <<
M.getCols() <<
" cols:\n";
928 for (
auto NId : nodeIds()) {
929 OS <<
" node" << NId <<
" [ label=\""
931 << getNodeCosts(NId) <<
"\" ]\n";
934 OS <<
" edge [ len=" << nodeIds().size() <<
" ]\n";
935 for (
auto EId : edgeIds()) {
936 OS <<
" node" << getEdgeNode1Id(EId)
937 <<
" -- node" << getEdgeNode2Id(EId)
939 const Matrix &EdgeCosts = getEdgeCosts(EId);
940 for (
unsigned i = 0;
i < EdgeCosts.getRows(); ++
i) {
941 OS << EdgeCosts.getRowAsVector(
i) <<
"\\n";
949 return new RegAllocPBQP(customPassID);
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 atomic and others It is also currently not done for read modify write instructions It is also current not done if the OF or CF flags are needed The shift operators have the complication that when the shift count is EFLAGS is not set
void dump() const
Dump this graph to dbgs().
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This is an optimization pass for GlobalISel generic memory operations.
const char * getName(MCRegister RegNo) const
Return the human-readable symbolic target-specific name for the specified physical register.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
typename RegAllocSolverImpl ::RawVector RawVector
NodeId getEdgeNode1Id(EdgeId EId) const
Get the first node connected to this edge.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
FunctionPass * createPBQPRegisterAllocator(char *customPassID=nullptr)
Create a PBQP register allocator instance.
Abstract base for classes implementing PBQP register allocation constraints (e.g.
FunctionPass * createDefaultPBQPRegisterAllocator()
PBQPRegisterAllocation Pass - This pass implements the Partitioned Boolean Quadratic Prograaming (PBQ...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Calculate auxiliary information for a virtual register such as its spill weight and allocation hint.
typename RegAllocSolverImpl ::Matrix Matrix
Reg
All possible values of the reg field in the ModR/M byte.
const TargetRegisterInfo * getTargetRegisterInfo() const
float getBlockFreqRelativeToEntryBlock(const MachineBasicBlock *MBB) const
Compute the frequency of the block, relative to the entry block.
void assignVirt2Phys(Register virtReg, MCPhysReg physReg)
creates a mapping for the specified virtual register to the specified physical register
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void initializeLiveStacksPass(PassRegistry &)
static bool isACalleeSavedRegister(MCRegister Reg, const TargetRegisterInfo &TRI, const MachineFunction &MF)
virtual float normalize(float UseDefFreq, unsigned Size, unsigned NumInstr)
Weight normalization function.
virtual ~PBQPRAConstraint()=0
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
NodeId getEdgeNode2Id(EdgeId EId) const
Get the second node connected to this edge.
typename RegAllocSolverImpl ::GraphMetadata GraphMetadata
Properties which a MachineFunction may have at a given point in time.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
void initializeSlotIndexesPass(PassRegistry &)
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
unsigned const TargetRegisterInfo * TRI
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Spiller * createInlineSpiller(MachineFunctionPass &Pass, MachineFunction &MF, VirtRegMap &VRM, VirtRegAuxInfo &VRAI)
Create and return a spiller that will insert spill code directly instead of deferring though VirtRegM...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
void apply(Opt *O, const Mod &M, const Mods &... Ms)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
virtual void spill(LiveRangeEdit &LRE)=0
spill - Spill the LRE.getParent() live interval.
(vector float) vec_cmpeq(*A, *B) C
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
const Vector & getNodeCosts(NodeId NId) const
Get a node's cost vector.
ArrayRef< MCPhysReg > getRawAllocationOrder(const MachineFunction &MF) const
Returns the preferred order for allocating registers from this register class in MF.
Represent the analysis usage information of a pass.
MachineFunctionProperties & set(Property P)
typename RegAllocSolverImpl ::Vector Vector
This class implements an extremely fast bulk output stream that can only output to a stream.
void printDot(raw_ostream &OS) const
Print a representation of this graph in DOT format.
void freezeReservedRegs(const MachineFunction &)
freezeReservedRegs - Called by the register allocator to freeze the set of reserved registers before ...
LiveInterval - This class represents the liveness of a register, or stack slot.
SlotIndex - An opaque wrapper around machine indexes.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool empty() const
empty - Tests whether there are no bits in this bitvector.
@ OF_TextWithCRLF
The file should be opened in text mode and use a carriage linefeed '\r '.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineRegisterInfo & getRegInfo() const
Implements a dense probed hash-table based set.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const Matrix & getEdgeCosts(EdgeId EId) const
Get an edge's cost matrix.
bool regsOverlap(Register RegA, Register RegB) const
Returns true if the two registers are equal or alias each other.
Represents a solution to a PBQP problem.
void initializeVirtRegMapPass(PassRegistry &)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
unsigned getSelection(GraphBase::NodeId nodeId) const
Get a node's selection.
const char * getRegClassName(const TargetRegisterClass *Class) const
Returns the name of the register class.
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
static cl::opt< bool > PBQPDumpGraphs("pbqp-dump-graphs", cl::desc("Dump graphs for each function/round in the compilation unit."), cl::init(false), cl::Hidden)
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
initializer< Ty > init(const Ty &Val)
bool overlaps(const LiveRange &other) const
overlaps - Return true if the intersection of the two live ranges is not empty.
typename RegAllocSolverImpl ::RawMatrix RawMatrix
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
NodeIdSet nodeIds() const
unsigned getSpillOptionIdx()
Spill option index.
bool checkRegMaskInterference(const LiveInterval &LI, BitVector &UsableRegs)
Test if LI is live across any register mask instructions, and compute a bit mask of physical register...
LiveInterval & getInterval(Register Reg)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
void setPreservesCFG()
This function should be called by the pass, iff they do not:
StringRef - Represent a constant reference to a string, i.e.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void clearAllVirt()
clears all virtual to physical register mappings
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
A helper class for register coalescers.
static cl::opt< bool > PBQPCoalescing("pbqp-coalescing", cl::desc("Attempt coalescing during PBQP register allocation."), cl::init(false), cl::Hidden)
virtual void postOptimization()
Holds a vector of the allowed physical regs for a vreg.
static RegisterRegAlloc RegisterPBQPRepAlloc("pbqp", "PBQP register allocator", createDefaultPBQPRegisterAllocator)
EdgeIdSet edgeIds() const
A raw_ostream that writes to a file descriptor.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual std::unique_ptr< PBQPRAConstraint > getCustomPBQPConstraints() const
Return PBQPConstraint(s) for the target.
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
Solution solve(PBQPRAGraph &G)
bool test(unsigned Idx) const
Register getSimpleHint(Register VReg) const
getSimpleHint - same as getRegAllocationHint except it will only return a target independent hint.
simple register Simple Register Coalescing
Function & getFunction()
Return the LLVM function that this machine code represents.
void initializeLiveIntervalsPass(PassRegistry &)
Simple wrapper around std::function<void(raw_ostream&)>.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Align max(MaybeAlign Lhs, Align Rhs)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
FunctionPass class - This class is used to implement most global optimizations.
AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addRequired()
bool reg_nodbg_empty(Register RegNo) const
reg_nodbg_empty - Return true if the only instructions using or defining Reg are Debug instructions.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId, const PBQP::RegAlloc::PBQPRAGraph &G)
Create Printable object for node and register info.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
Wrapper class representing physical registers. Should be passed by value.