38 #include "llvm/Config/llvm-config.h"
62 #define DEBUG_TYPE "pre-RA-sched"
64 STATISTIC(NumBacktracks,
"Number of times scheduler backtracked");
65 STATISTIC(NumUnfolds,
"Number of nodes unfolded");
66 STATISTIC(NumDups,
"Number of duplicated nodes");
67 STATISTIC(NumPRCopies,
"Number of physical register copies");
71 "Bottom-up register reduction list scheduling",
76 "Similar to list-burr but schedules in source "
77 "order when possible",
82 "Bottom-up register pressure aware list scheduling "
83 "which tries to balance latency and register pressure",
88 "Bottom-up register pressure aware list scheduling "
89 "which tries to balance ILP and register pressure",
94 cl::desc(
"Disable cycle-level precision during preRA scheduling"));
100 cl::desc(
"Disable regpressure priority in sched=list-ilp"));
103 cl::desc(
"Disable live use priority in sched=list-ilp"));
106 cl::desc(
"Disable virtual register cycle interference checks"));
109 cl::desc(
"Disable physreg def-use affinity"));
112 cl::desc(
"Disable no-stall priority in sched=list-ilp"));
115 cl::desc(
"Disable critical path priority in sched=list-ilp"));
118 cl::desc(
"Disable scheduled-height priority in sched=list-ilp"));
121 cl::desc(
"Disable scheduler's two-address hack"));
125 cl::desc(
"Number of instructions to allow ahead of the critical path "
126 "in sched=list-ilp"));
130 cl::desc(
"Average inst/cycle whan no target itinerary exists."));
150 std::vector<SUnit *> PendingQueue;
156 unsigned CurCycle = 0;
159 unsigned MinAvailableCycle;
168 unsigned NumLiveRegs;
169 std::unique_ptr<SUnit*[]> LiveRegDefs;
170 std::unique_ptr<SUnit*[]> LiveRegGens;
193 NeedLatency(needlatency), AvailableQueue(availqueue),
194 Topo(SUnits, nullptr) {
202 ~ScheduleDAGRRList()
override {
204 delete AvailableQueue;
207 void Schedule()
override;
212 bool IsReachable(
const SUnit *SU,
const SUnit *TargetSU) {
218 bool WillCreateCycle(
SUnit *SU,
SUnit *TargetSU) {
225 void AddPredQueued(
SUnit *SU,
const SDep &
D) {
247 bool isReady(
SUnit *SU) {
252 void ReleasePred(
SUnit *SU,
const SDep *PredEdge);
253 void ReleasePredecessors(
SUnit *SU);
254 void ReleasePending();
255 void AdvanceToCycle(
unsigned NextCycle);
256 void AdvancePastStalls(
SUnit *SU);
257 void EmitNode(
SUnit *SU);
258 void ScheduleNodeBottomUp(
SUnit*);
259 void CapturePred(
SDep *PredEdge);
260 void UnscheduleNodeBottomUp(
SUnit*);
261 void RestoreHazardCheckerBottomUp();
265 void InsertCopiesAndMoveSuccs(
SUnit*,
unsigned,
271 void releaseInterferences(
unsigned Reg = 0);
273 SUnit *PickNodeToScheduleBottomUp();
274 void ListScheduleBottomUp();
278 unsigned NumSUnits = SUnits.size();
279 SUnit *NewNode = newSUnit(
N);
281 if (NewNode->
NodeNum >= NumSUnits)
288 unsigned NumSUnits = SUnits.size();
289 SUnit *NewNode = Clone(
N);
291 if (NewNode->
NodeNum >= NumSUnits)
298 bool forceUnitLatencies()
const override {
313 unsigned &RegClass,
unsigned &Cost,
324 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
326 RegClass = RC->
getID();
331 unsigned Opcode = Node->getMachineOpcode();
332 if (Opcode == TargetOpcode::REG_SEQUENCE) {
333 unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
335 RegClass = RC->
getID();
340 unsigned Idx = RegDefPos.
GetIdx();
343 RegClass = RC->
getID();
354 void ScheduleDAGRRList::Schedule() {
356 <<
" '" <<
BB->getName() <<
"' **********\n");
367 CallSeqEndForStart.
clear();
368 assert(Interferences.empty() && LRegsMap.empty() &&
"stale Interferences");
371 BuildSchedGraph(
nullptr);
381 ListScheduleBottomUp();
386 dbgs() <<
"*** Final schedule ***\n";
398 void ScheduleDAGRRList::ReleasePred(
SUnit *SU,
const SDep *PredEdge) {
403 dbgs() <<
"*** Scheduling failed! ***\n";
405 dbgs() <<
" has been released too many times!\n";
411 if (!forceUnitLatencies()) {
423 if (Height < MinAvailableCycle)
424 MinAvailableCycle = Height;
426 if (isReady(PredSU)) {
427 AvailableQueue->
push(PredSU);
433 PendingQueue.push_back(PredSU);
457 if (
N->isMachineOpcode()) {
458 if (
N->getMachineOpcode() ==
TII->getCallFrameDestroyOpcode()) {
460 }
else if (
N->getMachineOpcode() ==
TII->getCallFrameSetupOpcode()) {
470 goto found_chain_operand;
473 found_chain_operand:;
497 unsigned BestMaxNest = MaxNest;
499 unsigned MyNestLevel = NestLevel;
500 unsigned MyMaxNest = MaxNest;
502 MyNestLevel, MyMaxNest,
TII))
503 if (!Best || (MyMaxNest > BestMaxNest)) {
505 BestMaxNest = MyMaxNest;
509 MaxNest = BestMaxNest;
513 if (
N->isMachineOpcode()) {
514 if (
N->getMachineOpcode() ==
TII->getCallFrameDestroyOpcode()) {
516 MaxNest =
std::max(MaxNest, NestLevel);
517 }
else if (
N->getMachineOpcode() ==
TII->getCallFrameSetupOpcode()) {
528 goto found_chain_operand;
531 found_chain_operand:;
554 void ScheduleDAGRRList::ReleasePredecessors(
SUnit *SU) {
557 ReleasePred(SU, &Pred);
563 SUnit *RegDef = LiveRegDefs[Pred.
getReg()]; (void)RegDef;
565 "interference on register dependence");
567 if (!LiveRegGens[Pred.
getReg()]) {
569 LiveRegGens[Pred.
getReg()] = SU;
578 if (!LiveRegDefs[CallResource])
579 for (
SDNode *Node = SU->
getNode(); Node; Node = Node->getGluedNode())
580 if (Node->isMachineOpcode() &&
581 Node->getMachineOpcode() ==
TII->getCallFrameDestroyOpcode()) {
582 unsigned NestLevel = 0;
583 unsigned MaxNest = 0;
585 assert(
N &&
"Must find call sequence start");
588 CallSeqEndForStart[
Def] = SU;
591 LiveRegDefs[CallResource] =
Def;
592 LiveRegGens[CallResource] = SU;
599 void ScheduleDAGRRList::ReleasePending() {
601 assert(PendingQueue.empty() &&
"pending instrs not allowed in this mode");
606 if (AvailableQueue->
empty())
611 for (
unsigned i = 0,
e = PendingQueue.size();
i !=
e; ++
i) {
612 unsigned ReadyCycle = PendingQueue[
i]->getHeight();
613 if (ReadyCycle < MinAvailableCycle)
614 MinAvailableCycle = ReadyCycle;
617 if (!isReady(PendingQueue[
i]))
619 AvailableQueue->
push(PendingQueue[
i]);
621 PendingQueue[
i]->isPending =
false;
622 PendingQueue[
i] = PendingQueue.back();
623 PendingQueue.pop_back();
629 void ScheduleDAGRRList::AdvanceToCycle(
unsigned NextCycle) {
630 if (NextCycle <= CurCycle)
637 CurCycle = NextCycle;
640 for (; CurCycle != NextCycle; ++CurCycle) {
651 void ScheduleDAGRRList::AdvancePastStalls(
SUnit *SU) {
668 AdvanceToCycle(ReadyCycle);
688 AdvanceToCycle(CurCycle + Stalls);
693 void ScheduleDAGRRList::EmitNode(
SUnit *SU) {
704 "This target-independent node should not be scheduled.");
736 void ScheduleDAGRRList::ScheduleNodeBottomUp(
SUnit *SU) {
741 if (CurCycle < SU->getHeight())
743 <<
"] pipeline stall!\n");
763 AdvanceToCycle(CurCycle + 1);
767 ReleasePredecessors(SU);
773 assert(NumLiveRegs > 0 &&
"NumLiveRegs is already zero!");
775 LiveRegDefs[Succ.
getReg()] =
nullptr;
776 LiveRegGens[Succ.
getReg()] =
nullptr;
777 releaseInterferences(Succ.
getReg());
783 if (LiveRegDefs[CallResource] == SU)
786 if (SUNode->isMachineOpcode() &&
787 SUNode->getMachineOpcode() ==
TII->getCallFrameSetupOpcode()) {
788 assert(NumLiveRegs > 0 &&
"NumLiveRegs is already zero!");
790 LiveRegDefs[CallResource] =
nullptr;
791 LiveRegGens[CallResource] =
nullptr;
792 releaseInterferences(CallResource);
813 AdvanceToCycle(CurCycle + 1);
820 void ScheduleDAGRRList::CapturePred(
SDep *PredEdge) {
825 AvailableQueue->
remove(PredSU);
829 "NumSuccsLeft will overflow!");
835 void ScheduleDAGRRList::UnscheduleNodeBottomUp(
SUnit *SU) {
842 assert(NumLiveRegs > 0 &&
"NumLiveRegs is already zero!");
844 "Physical register dependency violated?");
846 LiveRegDefs[Pred.
getReg()] =
nullptr;
847 LiveRegGens[Pred.
getReg()] =
nullptr;
848 releaseInterferences(Pred.
getReg());
857 if (SUNode->isMachineOpcode() &&
858 SUNode->getMachineOpcode() ==
TII->getCallFrameSetupOpcode()) {
859 SUnit *SeqEnd = CallSeqEndForStart[SU];
860 assert(SeqEnd &&
"Call sequence start/end must be known");
861 assert(!LiveRegDefs[CallResource]);
862 assert(!LiveRegGens[CallResource]);
864 LiveRegDefs[CallResource] = SU;
865 LiveRegGens[CallResource] = SeqEnd;
871 if (LiveRegGens[CallResource] == SU)
874 if (SUNode->isMachineOpcode() &&
875 SUNode->getMachineOpcode() ==
TII->getCallFrameDestroyOpcode()) {
876 assert(NumLiveRegs > 0 &&
"NumLiveRegs is already zero!");
877 assert(LiveRegDefs[CallResource]);
878 assert(LiveRegGens[CallResource]);
880 LiveRegDefs[CallResource] =
nullptr;
881 LiveRegGens[CallResource] =
nullptr;
882 releaseInterferences(CallResource);
886 for (
auto &Succ : SU->
Succs) {
889 if (!LiveRegDefs[
Reg])
893 LiveRegDefs[
Reg] = SU;
897 if (!LiveRegGens[
Reg]) {
900 for (
auto &Succ2 : SU->
Succs) {
901 if (Succ2.isAssignedRegDep() && Succ2.getReg() ==
Reg &&
902 Succ2.getSUnit()->getHeight() < LiveRegGens[
Reg]->getHeight())
903 LiveRegGens[
Reg] = Succ2.getSUnit();
917 PendingQueue.push_back(SU);
920 AvailableQueue->
push(SU);
927 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
935 std::vector<SUnit *>::const_iterator
I = (
Sequence.end() - LookAhead);
936 unsigned HazardCycle = (*I)->getHeight();
939 for (; SU->
getHeight() > HazardCycle; ++HazardCycle) {
948 void ScheduleDAGRRList::BacktrackBottomUp(
SUnit *SU,
SUnit *BtSU) {
954 UnscheduleNodeBottomUp(OldSU);
963 RestoreHazardCheckerBottomUp();
973 if (SUNode->isOperandOf(
N))
980 SUnit *ScheduleDAGRRList::TryUnfoldSU(
SUnit *SU) {
984 if (!
TII->unfoldMemoryOperand(*DAG,
N, NewNodes))
989 if (NewNodes.size() == 3)
992 assert(NewNodes.size() == 2 &&
"Expected a load folding node!");
995 SDNode *LoadNode = NewNodes[0];
996 unsigned NumVals =
N->getNumValues();
1002 bool isNewLoad =
true;
1005 LoadSU = &SUnits[LoadNode->
getNodeId()];
1012 LoadSU = CreateNewSUnit(LoadNode);
1015 InitNumRegDefsLeft(LoadSU);
1016 computeLatency(LoadSU);
1022 if (
N->getNodeId() != -1) {
1023 NewSU = &SUnits[
N->getNodeId()];
1031 NewSU = CreateNewSUnit(
N);
1044 InitNumRegDefsLeft(NewSU);
1045 computeLatency(NewSU);
1051 for (
unsigned i = 0;
i != NumVals; ++
i)
1053 DAG->ReplaceAllUsesOfValueWith(
SDValue(SU->
getNode(), OldNumVals - 1),
1064 ChainPreds.push_back(Pred);
1066 LoadPreds.push_back(Pred);
1068 NodePreds.push_back(Pred);
1072 ChainSuccs.push_back(Succ);
1074 NodeSuccs.push_back(Succ);
1078 for (
const SDep &Pred : ChainPreds) {
1079 RemovePred(SU, Pred);
1081 AddPredQueued(LoadSU, Pred);
1083 for (
const SDep &Pred : LoadPreds) {
1084 RemovePred(SU, Pred);
1086 AddPredQueued(LoadSU, Pred);
1088 for (
const SDep &Pred : NodePreds) {
1089 RemovePred(SU, Pred);
1090 AddPredQueued(NewSU, Pred);
1092 for (
SDep D : NodeSuccs) {
1093 SUnit *SuccDep =
D.getSUnit();
1095 RemovePred(SuccDep,
D);
1097 AddPredQueued(SuccDep,
D);
1103 for (
SDep D : ChainSuccs) {
1104 SUnit *SuccDep =
D.getSUnit();
1106 RemovePred(SuccDep,
D);
1109 AddPredQueued(SuccDep,
D);
1117 AddPredQueued(NewSU,
D);
1120 AvailableQueue->
addNode(LoadSU);
1122 AvailableQueue->
addNode(NewSU);
1134 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(
SUnit *SU) {
1142 if (
N->getGluedNode() &&
1143 !
TII->canCopyGluedNodeDuringSchedule(
N)) {
1146 <<
"Giving up because it has incoming glue and the target does not "
1147 "want to copy it\n");
1152 bool TryUnfold =
false;
1153 for (
unsigned i = 0,
e =
N->getNumValues();
i !=
e; ++
i) {
1154 MVT VT =
N->getSimpleValueType(
i);
1156 LLVM_DEBUG(
dbgs() <<
"Giving up because it has outgoing glue\n");
1162 MVT VT =
Op.getNode()->getSimpleValueType(
Op.getResNo());
1163 if (VT ==
MVT::Glue && !
TII->canCopyGluedNodeDuringSchedule(
N)) {
1165 dbgs() <<
"Giving up because it one of the operands is glue and "
1166 "the target does not want to copy it\n");
1173 SUnit *UnfoldSU = TryUnfoldSU(SU);
1184 NewSU = CreateClone(SU);
1189 AddPredQueued(NewSU, Pred);
1205 AddPredQueued(SuccSU,
D);
1207 DelDeps.push_back(std::make_pair(SuccSU,
D));
1210 for (
auto &DelDep : DelDeps)
1211 RemovePred(DelDep.first, DelDep.second);
1214 AvailableQueue->
addNode(NewSU);
1222 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(
SUnit *SU,
unsigned Reg,
1226 SUnit *CopyFromSU = CreateNewSUnit(
nullptr);
1230 SUnit *CopyToSU = CreateNewSUnit(
nullptr);
1243 D.setSUnit(CopyToSU);
1244 AddPredQueued(SuccSU,
D);
1245 DelDeps.push_back(std::make_pair(SuccSU, Succ));
1254 for (
auto &DelDep : DelDeps)
1255 RemovePred(DelDep.first, DelDep.second);
1258 FromDep.setLatency(SU->
Latency);
1259 AddPredQueued(CopyFromSU, FromDep);
1261 ToDep.setLatency(CopyFromSU->
Latency);
1262 AddPredQueued(CopyToSU, ToDep);
1265 AvailableQueue->
addNode(CopyFromSU);
1266 AvailableQueue->
addNode(CopyToSU);
1267 Copies.push_back(CopyFromSU);
1268 Copies.push_back(CopyToSU);
1292 return N->getSimpleValueType(NumRes);
1301 const SDNode *Node =
nullptr) {
1305 if (!LiveRegDefs[*AliasI])
continue;
1308 if (LiveRegDefs[*AliasI] == SU)
continue;
1311 if (Node && LiveRegDefs[*AliasI]->getNode() == Node)
1315 if (RegAdded.
insert(*AliasI).second) {
1316 LRegs.push_back(*AliasI);
1328 for (
unsigned i = 1,
e = LiveRegDefs.
size()-1;
i !=
e; ++
i) {
1329 if (!LiveRegDefs[
i])
continue;
1330 if (LiveRegDefs[
i] == SU)
continue;
1332 if (RegAdded.
insert(
i).second)
1340 if (
const auto *RegOp = dyn_cast<RegisterMaskSDNode>(
Op.getNode()))
1341 return RegOp->getRegMask();
1349 bool ScheduleDAGRRList::
1351 if (NumLiveRegs == 0)
1362 RegAdded, LRegs,
TRI);
1365 for (
SDNode *Node = SU->
getNode(); Node; Node = Node->getGluedNode()) {
1369 unsigned NumOps = Node->getNumOperands();
1370 if (Node->getOperand(NumOps-1).getValueType() ==
MVT::Glue)
1375 cast<ConstantSDNode>(Node->getOperand(
i))->getZExtValue();
1383 for (; NumVals; --NumVals, ++
i) {
1384 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(
i))->getReg();
1395 Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
1396 if (
Reg.isPhysical()) {
1397 SDNode *SrcNode = Node->getOperand(2).getNode();
1403 if (!Node->isMachineOpcode())
1408 if (Node->getMachineOpcode() ==
TII->getCallFrameDestroyOpcode()) {
1411 if (LiveRegDefs[CallResource]) {
1412 SDNode *Gen = LiveRegGens[CallResource]->getNode();
1416 RegAdded.
insert(CallResource).second)
1417 LRegs.push_back(CallResource);
1444 return !LRegs.empty();
1447 void ScheduleDAGRRList::releaseInterferences(
unsigned Reg) {
1449 for (
unsigned i = Interferences.size();
i > 0; --
i) {
1450 SUnit *SU = Interferences[
i-1];
1451 LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1463 AvailableQueue->
push(SU);
1465 if (
i < Interferences.size())
1466 Interferences[
i-1] = Interferences.back();
1467 Interferences.pop_back();
1468 LRegsMap.
erase(LRegsPos);
1476 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1477 SUnit *CurSU = AvailableQueue->
empty() ? nullptr : AvailableQueue->
pop();
1478 auto FindAvailableNode = [&]() {
1481 if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1487 std::pair<LRegsMapT::iterator, bool> LRegsPair =
1488 LRegsMap.insert(std::make_pair(CurSU, LRegs));
1489 if (LRegsPair.second) {
1491 Interferences.push_back(CurSU);
1496 LRegsPair.first->second = LRegs;
1498 CurSU = AvailableQueue->
pop();
1501 FindAvailableNode();
1513 for (
SUnit *TrySU : Interferences) {
1518 SUnit *BtSU =
nullptr;
1520 for (
unsigned Reg : LRegs) {
1521 if (LiveRegGens[
Reg]->getHeight() < LiveCycle) {
1522 BtSU = LiveRegGens[
Reg];
1526 if (!WillCreateCycle(TrySU, BtSU)) {
1528 BacktrackBottomUp(TrySU, BtSU);
1535 AvailableQueue->
remove(BtSU);
1538 <<
") to SU(" << TrySU->NodeNum <<
")\n");
1543 if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
1544 LLVM_DEBUG(
dbgs() <<
"TrySU not available; choosing node from queue\n");
1545 CurSU = AvailableQueue->
pop();
1549 AvailableQueue->
remove(TrySU);
1552 FindAvailableNode();
1564 SUnit *TrySU = Interferences[0];
1566 assert(LRegs.size() == 1 &&
"Can't handle this yet!");
1567 unsigned Reg = LRegs[0];
1581 SUnit *NewDef =
nullptr;
1583 NewDef = CopyAndMoveSuccessors(LRDef);
1584 if (!DestRC && !NewDef)
1590 InsertCopiesAndMoveSuccs(LRDef,
Reg, DestRC, RC,
Copies);
1592 <<
" to SU #" <<
Copies.front()->NodeNum <<
"\n");
1598 <<
" to SU #" << TrySU->
NodeNum <<
"\n");
1599 LiveRegDefs[
Reg] = NewDef;
1604 assert(CurSU &&
"Unable to resolve live physical register dependencies!");
1610 void ScheduleDAGRRList::ListScheduleBottomUp() {
1612 ReleasePredecessors(&ExitSU);
1615 if (!SUnits.empty()) {
1616 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1617 assert(RootSU->
Succs.empty() &&
"Graph root shouldn't have successors!");
1619 AvailableQueue->
push(RootSU);
1625 while (!AvailableQueue->
empty() || !Interferences.empty()) {
1627 AvailableQueue->
dump(
this));
1631 SUnit *SU = PickNodeToScheduleBottomUp();
1633 AdvancePastStalls(SU);
1635 ScheduleNodeBottomUp(SU);
1637 while (AvailableQueue->
empty() && !PendingQueue.empty()) {
1640 "MinAvailableCycle uninitialized");
1641 AdvanceToCycle(
std::max(CurCycle + 1, MinAvailableCycle));
1649 VerifyScheduledSequence(
true);
1655 class RegReductionPQBase;
1658 bool isReady(
SUnit* SU,
unsigned CurCycle)
const {
return true; }
1663 struct reverse_sort :
public queue_sort {
1666 reverse_sort(SF &sf) : SortFunc(sf) {}
1671 return SortFunc(
right, left);
1678 struct bu_ls_rr_sort :
public queue_sort {
1681 HasReadyFilter =
false
1684 RegReductionPQBase *SPQ;
1686 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1692 struct src_ls_rr_sort :
public queue_sort {
1695 HasReadyFilter =
false
1698 RegReductionPQBase *SPQ;
1700 src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1706 struct hybrid_ls_rr_sort :
public queue_sort {
1709 HasReadyFilter =
false
1712 RegReductionPQBase *SPQ;
1714 hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1716 bool isReady(
SUnit *SU,
unsigned CurCycle)
const;
1723 struct ilp_ls_rr_sort :
public queue_sort {
1726 HasReadyFilter =
false
1729 RegReductionPQBase *SPQ;
1731 ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1733 bool isReady(
SUnit *SU,
unsigned CurCycle)
const;
1740 std::vector<SUnit *>
Queue;
1741 unsigned CurQueueId = 0;
1742 bool TracksRegPressure;
1746 std::vector<SUnit> *SUnits;
1752 ScheduleDAGRRList *scheduleDAG =
nullptr;
1755 std::vector<unsigned> SethiUllmanNumbers;
1762 std::vector<unsigned> RegLimit;
1766 bool hasReadyFilter,
1773 SrcOrder(srcorder), MF(mf),
TII(tii),
TRI(tri), TLI(tli) {
1774 if (TracksRegPressure) {
1776 RegLimit.resize(NumRC);
1778 std::fill(RegLimit.begin(), RegLimit.end(), 0);
1785 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1786 scheduleDAG = scheduleDag;
1790 return scheduleDAG->getHazardRec();
1793 void initNodes(std::vector<SUnit> &sunits)
override;
1795 void addNode(
const SUnit *SU)
override;
1797 void updateNode(
const SUnit *SU)
override;
1799 void releaseState()
override {
1801 SethiUllmanNumbers.clear();
1805 unsigned getNodePriority(
const SUnit *SU)
const;
1807 unsigned getNodeOrdering(
const SUnit *SU)
const {
1813 bool empty()
const override {
return Queue.empty(); }
1824 std::vector<SUnit *>::iterator
I =
llvm::find(Queue, SU);
1825 if (
I != std::prev(
Queue.end()))
1831 bool tracksRegPressure()
const override {
return TracksRegPressure; }
1833 void dumpRegPressure()
const;
1835 bool HighRegPressure(
const SUnit *SU)
const;
1837 bool MayReduceRegPressure(
SUnit *SU)
const;
1839 int RegPressureDiff(
SUnit *SU,
unsigned &LiveUses)
const;
1841 void scheduledNode(
SUnit *SU)
override;
1843 void unscheduledNode(
SUnit *SU)
override;
1847 void AddPseudoTwoAddrDeps();
1848 void PrescheduleNodesWithMultipleUses();
1849 void CalculateSethiUllmanNumbers();
1853 static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
1854 unsigned BestIdx = 0;
1857 for (
unsigned I = 1,
E =
std::min(Q.size(), (decltype(Q.size()))1000);
I !=
E;
1859 if (Picker(Q[BestIdx], Q[
I]))
1861 SUnit *V = Q[BestIdx];
1862 if (BestIdx + 1 != Q.size())
1869 SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker,
ScheduleDAG *DAG) {
1872 reverse_sort<SF> RPicker(Picker);
1873 return popFromQueueImpl(Q, RPicker);
1877 return popFromQueueImpl(Q, Picker);
1888 class RegReductionPriorityQueue :
public RegReductionPQBase {
1898 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1902 bool isBottomUp()
const override {
return SF::IsBottomUp; }
1904 bool isReady(
SUnit *U)
const override {
1905 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1908 SUnit *pop()
override {
1909 if (
Queue.empty())
return nullptr;
1911 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1916 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1919 std::vector<SUnit *> DumpQueue =
Queue;
1920 SF DumpPicker = Picker;
1921 while (!DumpQueue.empty()) {
1922 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1930 using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
1931 using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
1932 using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
1933 using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
1949 bool RSchedLow =
right->isScheduleLow;
1950 if (LSchedLow != RSchedLow)
1951 return LSchedLow < RSchedLow ? 1 : -1;
1959 if (SUNumbers[SU->
NodeNum] != 0)
1960 return SUNumbers[SU->
NodeNum];
1964 WorkState(
const SUnit *SU) : SU(SU) {}
1966 unsigned PredsProcessed = 0;
1970 WorkList.push_back(SU);
1971 while (!WorkList.empty()) {
1972 auto &Temp = WorkList.back();
1973 auto *TempSU = Temp.SU;
1974 bool AllPredsKnown =
true;
1976 for (
unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++
P) {
1977 auto &Pred = TempSU->Preds[
P];
1978 if (Pred.
isCtrl())
continue;
1980 if (SUNumbers[PredSU->
NodeNum] == 0) {
1983 for (
auto It : WorkList)
1984 assert(It.SU != PredSU &&
"Trying to push an element twice?");
1987 Temp.PredsProcessed =
P + 1;
1988 WorkList.push_back(PredSU);
1989 AllPredsKnown =
false;
1998 unsigned SethiUllmanNumber = 0;
2000 for (
const SDep &Pred : TempSU->Preds) {
2001 if (Pred.
isCtrl())
continue;
2003 unsigned PredSethiUllman = SUNumbers[PredSU->
NodeNum];
2004 assert(PredSethiUllman > 0 &&
"We should have evaluated this pred!");
2005 if (PredSethiUllman > SethiUllmanNumber) {
2006 SethiUllmanNumber = PredSethiUllman;
2008 }
else if (PredSethiUllman == SethiUllmanNumber)
2012 SethiUllmanNumber += Extra;
2013 if (SethiUllmanNumber == 0)
2014 SethiUllmanNumber = 1;
2015 SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
2016 WorkList.pop_back();
2019 assert(SUNumbers[SU->
NodeNum] > 0 &&
"SethiUllman should never be zero!");
2020 return SUNumbers[SU->
NodeNum];
2025 void RegReductionPQBase::CalculateSethiUllmanNumbers() {
2026 SethiUllmanNumbers.assign(SUnits->size(), 0);
2028 for (
const SUnit &SU : *SUnits)
2032 void RegReductionPQBase::addNode(
const SUnit *SU) {
2033 unsigned SUSize = SethiUllmanNumbers.size();
2034 if (SUnits->size() > SUSize)
2035 SethiUllmanNumbers.resize(SUSize*2, 0);
2039 void RegReductionPQBase::updateNode(
const SUnit *SU) {
2040 SethiUllmanNumbers[SU->
NodeNum] = 0;
2046 unsigned RegReductionPQBase::getNodePriority(
const SUnit *SU)
const {
2053 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2054 Opc == TargetOpcode::SUBREG_TO_REG ||
2055 Opc == TargetOpcode::INSERT_SUBREG)
2071 return SethiUllmanNumbers[SU->
NodeNum];
2073 unsigned Priority = SethiUllmanNumbers[SU->
NodeNum];
2077 return (NP > 0) ? NP : 0;
2087 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2094 << RegLimit[
Id] <<
'\n');
2099 bool RegReductionPQBase::HighRegPressure(
const SUnit *SU)
const {
2113 RegDefPos.
IsValid(); RegDefPos.Advance()) {
2114 unsigned RCId, Cost;
2124 bool RegReductionPQBase::MayReduceRegPressure(
SUnit *SU)
const {
2127 if (!
N->isMachineOpcode() || !SU->
NumSuccs)
2130 unsigned NumDefs =
TII->get(
N->getMachineOpcode()).getNumDefs();
2131 for (
unsigned i = 0;
i != NumDefs; ++
i) {
2132 MVT VT =
N->getSimpleValueType(
i);
2133 if (!
N->hasAnyUseOfValue(
i))
2149 int RegReductionPQBase::RegPressureDiff(
SUnit *SU,
unsigned &LiveUses)
const {
2164 RegDefPos.
IsValid(); RegDefPos.Advance()) {
2165 MVT VT = RegDefPos.GetValue();
2173 if (!
N || !
N->isMachineOpcode() || !SU->
NumSuccs)
2176 unsigned NumDefs =
TII->get(
N->getMachineOpcode()).getNumDefs();
2177 for (
unsigned i = 0;
i != NumDefs; ++
i) {
2178 MVT VT =
N->getSimpleValueType(
i);
2179 if (!
N->hasAnyUseOfValue(
i))
2188 void RegReductionPQBase::scheduledNode(
SUnit *SU) {
2189 if (!TracksRegPressure)
2222 RegDefPos.
IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2226 unsigned RCId, Cost;
2238 RegDefPos.
IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2239 if (SkipRegDefs > 0)
2241 unsigned RCId, Cost;
2247 <<
") has too many regdefs\n");
2257 void RegReductionPQBase::unscheduledNode(
SUnit *SU) {
2258 if (!TracksRegPressure)
2264 if (!
N->isMachineOpcode()) {
2268 unsigned Opc =
N->getMachineOpcode();
2269 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2270 Opc == TargetOpcode::INSERT_SUBREG ||
2271 Opc == TargetOpcode::SUBREG_TO_REG ||
2272 Opc == TargetOpcode::REG_SEQUENCE ||
2273 Opc == TargetOpcode::IMPLICIT_DEF)
2295 if (POpc == TargetOpcode::IMPLICIT_DEF)
2297 if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2298 POpc == TargetOpcode::INSERT_SUBREG ||
2299 POpc == TargetOpcode::SUBREG_TO_REG) {
2306 for (
unsigned i = 0;
i != NumDefs; ++
i) {
2321 if (SU->
NumSuccs &&
N->isMachineOpcode()) {
2322 unsigned NumDefs =
TII->get(
N->getMachineOpcode()).getNumDefs();
2323 for (
unsigned i = NumDefs,
e =
N->getNumValues();
i !=
e; ++
i) {
2324 MVT VT =
N->getSimpleValueType(
i);
2327 if (!
N->hasAnyUseOfValue(
i))
2344 unsigned MaxHeight = 0;
2346 if (Succ.
isCtrl())
continue;
2353 if (Height > MaxHeight)
2362 unsigned Scratches = 0;
2364 if (Pred.
isCtrl())
continue;
2373 bool RetVal =
false;
2375 if (Pred.
isCtrl())
continue;
2395 bool RetVal =
false;
2397 if (Succ.
isCtrl())
continue;
2434 if (Pred.
isCtrl())
continue;
2446 if (Pred.
isCtrl())
continue;
2450 "VRegCycle def must be CopyFromReg");
2464 if (Pred.
isCtrl())
continue;
2478 if ((
int)SPQ->getCurCycle() < Height)
return true;
2479 if (SPQ->getHazardRec()->getHazardType(SU, 0)
2488 RegReductionPQBase *SPQ) {
2494 int RHeight = (
int)
right->getHeight() + RPenalty;
2507 if (LHeight != RHeight)
2508 return LHeight > RHeight ? 1 : -1;
2520 if (!SPQ->getHazardRec()->isEnabled()) {
2521 if (LHeight != RHeight)
2522 return LHeight > RHeight ? 1 : -1;
2524 int LDepth = left->
getDepth() - LPenalty;
2525 int RDepth =
right->getDepth() - RPenalty;
2526 if (LDepth != RDepth) {
2528 <<
") depth " << LDepth <<
" vs SU (" <<
right->NodeNum
2529 <<
") depth " << RDepth <<
"\n");
2530 return LDepth < RDepth ? 1 : -1;
2545 bool RHasPhysReg =
right->hasPhysRegDefs;
2546 if (LHasPhysReg != RHasPhysReg) {
2548 static const char *
const PhysRegMsg[] = {
" has no physreg",
2549 " defines a physreg" };
2552 << PhysRegMsg[LHasPhysReg] <<
" SU(" <<
right->NodeNum
2553 <<
") " << PhysRegMsg[RHasPhysReg] <<
"\n");
2554 return LHasPhysReg < RHasPhysReg;
2559 unsigned LPriority = SPQ->getNodePriority(left);
2560 unsigned RPriority = SPQ->getNodePriority(
right);
2565 unsigned RNumVals =
right->getNode()->getNumValues();
2566 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2570 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2573 if (LPriority != RPriority)
2574 return LPriority > RPriority;
2579 unsigned LOrder = SPQ->getNodeOrdering(left);
2580 unsigned ROrder = SPQ->getNodeOrdering(
right);
2584 if ((LOrder || ROrder) && LOrder != ROrder)
2585 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2608 return LDist < RDist;
2613 if (LScratch != RScratch)
2614 return LScratch > RScratch;
2618 if ((left->
isCall && RPriority > 0) || (
right->isCall && LPriority > 0))
2637 "NodeQueueId cannot be zero");
2654 unsigned LOrder = SPQ->getNodeOrdering(left);
2655 unsigned ROrder = SPQ->getNodeOrdering(
right);
2659 if ((LOrder || ROrder) && LOrder != ROrder)
2660 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2669 bool hybrid_ls_rr_sort::isReady(
SUnit *SU,
unsigned CurCycle)
const {
2670 static const unsigned ReadyDelay = 3;
2672 if (SPQ->MayReduceRegPressure(SU))
return true;
2674 if (SU->
getHeight() > (CurCycle + ReadyDelay))
return false;
2676 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2692 bool LHigh = SPQ->HighRegPressure(left);
2693 bool RHigh = SPQ->HighRegPressure(
right);
2696 if (LHigh && !RHigh) {
2698 <<
right->NodeNum <<
")\n");
2701 else if (!LHigh && RHigh) {
2706 if (!LHigh && !RHigh) {
2716 bool ilp_ls_rr_sort::isReady(
SUnit *SU,
unsigned CurCycle)
const {
2717 if (SU->
getHeight() > CurCycle)
return false;
2719 if (SPQ->getHazardRec()->getHazardType(SU, 0)
2733 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2734 Opc == TargetOpcode::SUBREG_TO_REG ||
2735 Opc == TargetOpcode::INSERT_SUBREG)
2758 unsigned LLiveUses = 0, RLiveUses = 0;
2759 int LPDiff = 0, RPDiff = 0;
2761 LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2762 RPDiff = SPQ->RegPressureDiff(
right, RLiveUses);
2766 <<
"): " << LPDiff <<
" != SU(" <<
right->NodeNum
2767 <<
"): " << RPDiff <<
"\n");
2768 return LPDiff > RPDiff;
2774 if (LReduce && !RReduce)
return false;
2775 if (RReduce && !LReduce)
return true;
2780 <<
" != SU(" <<
right->NodeNum <<
"): " << RLiveUses
2782 return LLiveUses < RLiveUses;
2788 if (LStall != RStall)
2797 <<
"): " <<
right->getDepth() <<
"\n");
2811 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2815 AddPseudoTwoAddrDeps();
2817 if (!TracksRegPressure && !SrcOrder)
2818 PrescheduleNodesWithMultipleUses();
2820 CalculateSethiUllmanNumbers();
2823 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2824 for (
SUnit &SU : sunits)
2832 bool RegReductionPQBase::canClobber(
const SUnit *SU,
const SUnit *
Op) {
2838 for (
unsigned i = 0;
i != NumOps; ++
i) {
2854 ScheduleDAGRRList *scheduleDAG,
2860 if(!ImpDefs && !RegMask)
2865 for (
const SDep &SuccPred : SuccSU->
Preds) {
2871 scheduleDAG->IsReachable(DepSU, SuccPred.
getSUnit()))
2875 for (
const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
2880 scheduleDAG->IsReachable(DepSU, SuccPred.
getSUnit()))
2893 unsigned NumDefs =
TII->get(
N->getMachineOpcode()).getNumDefs();
2894 const MCPhysReg *ImpDefs =
TII->get(
N->getMachineOpcode()).getImplicitDefs();
2895 assert(ImpDefs &&
"Caller should check hasPhysRegDefs");
2898 if (!SUNode->isMachineOpcode())
2901 TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2903 if (!SUImpDefs && !SURegMask)
2905 for (
unsigned i = NumDefs,
e =
N->getNumValues();
i !=
e; ++
i) {
2906 MVT VT =
N->getSimpleValueType(
i);
2909 if (!
N->hasAnyUseOfValue(
i))
2911 unsigned Reg = ImpDefs[
i - NumDefs];
2916 for (;*SUImpDefs; ++SUImpDefs) {
2917 unsigned SUReg = *SUImpDefs;
2956 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2958 for (
SUnit &SU : *SUnits) {
2972 cast<RegisterSDNode>(
N->getOperand(1))->getReg()))
2975 SDNode *PredFrameSetup =
nullptr;
2990 PredFrameSetup = PredND;
2995 if (PredFrameSetup !=
nullptr)
2999 SUnit *PredSU =
nullptr;
3019 cast<RegisterSDNode>(
N->getOperand(1))->getReg()))
3023 for (
const SDep &PredSucc : PredSU->
Succs) {
3025 if (PredSuccSU == &SU)
continue;
3029 goto outer_loop_continue;
3033 goto outer_loop_continue;
3035 if (scheduleDAG->IsReachable(&SU, PredSuccSU))
3036 goto outer_loop_continue;
3042 dbgs() <<
" Prescheduling SU #" << SU.
NodeNum <<
" next to PredSU #"
3044 <<
" to guide scheduling in the presence of multiple uses\n");
3045 for (
unsigned i = 0;
i != PredSU->
Succs.size(); ++
i) {
3049 if (SuccSU != &SU) {
3051 scheduleDAG->RemovePred(SuccSU, Edge);
3052 scheduleDAG->AddPredQueued(&SU, Edge);
3054 scheduleDAG->AddPredQueued(SuccSU, Edge);
3058 outer_loop_continue:;
3069 void RegReductionPQBase::AddPseudoTwoAddrDeps() {
3070 for (
SUnit &SU : *SUnits) {
3079 unsigned Opc = Node->getMachineOpcode();
3083 for (
unsigned j = 0;
j != NumOps; ++
j) {
3107 while (SuccSU->
Succs.size() == 1 &&
3110 TargetOpcode::COPY_TO_REGCLASS)
3111 SuccSU = SuccSU->
Succs.front().getSUnit();
3124 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3125 SuccOpc == TargetOpcode::INSERT_SUBREG ||
3126 SuccOpc == TargetOpcode::SUBREG_TO_REG)
3129 (!canClobber(SuccSU, DUSU) ||
3132 !scheduleDAG->IsReachable(SuccSU, &SU)) {
3134 <<
" Adding a pseudo-two-addr edge from SU #"
3154 BURegReductionPriorityQueue *PQ =
3155 new BURegReductionPriorityQueue(*IS->
MF,
false,
false,
TII,
TRI,
nullptr);
3156 ScheduleDAGRRList *SD =
new ScheduleDAGRRList(*IS->
MF,
false, PQ, OptLevel);
3157 PQ->setScheduleDAG(SD);
3168 SrcRegReductionPriorityQueue *PQ =
3169 new SrcRegReductionPriorityQueue(*IS->
MF,
false,
true,
TII,
TRI,
nullptr);
3170 ScheduleDAGRRList *SD =
new ScheduleDAGRRList(*IS->
MF,
false, PQ, OptLevel);
3171 PQ->setScheduleDAG(SD);
3183 HybridBURRPriorityQueue *PQ =
3184 new HybridBURRPriorityQueue(*IS->
MF,
true,
false,
TII,
TRI, TLI);
3186 ScheduleDAGRRList *SD =
new ScheduleDAGRRList(*IS->
MF,
true, PQ, OptLevel);
3187 PQ->setScheduleDAG(SD);
3199 ILPBURRPriorityQueue *PQ =
3200 new ILPBURRPriorityQueue(*IS->
MF,
true,
false,
TII,
TRI, TLI);
3201 ScheduleDAGRRList *SD =
new ScheduleDAGRRList(*IS->
MF,
true, PQ, OptLevel);
3202 PQ->setScheduleDAG(SD);