22 #define DEBUG_TYPE "machine-scheduler"
126 case NoCand:
return "NOCAND";
128 case Latency:
return "LATENCY";
130 case Depth:
return "DEPTH";
144 if (TryVal < CandVal) {
148 if (TryVal > CandVal) {
161 if (TryVal > CandVal) {
165 if (TryVal < CandVal) {
179 NodeNum2Index[SU->
NodeNum] = SUnits.size();
180 SUnits.push_back(SU);
184 void SIScheduleBlock::traceCandidate(
const SISchedCandidate &Cand) {
191 void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
192 SISchedCandidate &TryCand) {
194 if (!Cand.isValid()) {
199 if (Cand.SGPRUsage > 60 &&
220 Cand.HasLowLatencyNonWaitedParent,
228 if (TryCand.IsLowLatency &&
238 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
243 SUnit* SIScheduleBlock::pickNode() {
244 SISchedCandidate TopCand;
246 for (
SUnit* SU : TopReadySUs) {
247 SISchedCandidate TryCand;
248 std::vector<unsigned> pressure;
249 std::vector<unsigned> MaxPressure;
253 TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];
254 TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];
257 TryCand.HasLowLatencyNonWaitedParent =
258 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
259 tryCandidateTopDown(TopCand, TryCand);
260 if (TryCand.Reason !=
NoCand)
261 TopCand.setBest(TryCand);
274 for (
SUnit* SU : SUnits) {
275 if (!SU->NumPredsLeft)
276 TopReadySUs.push_back(SU);
279 while (!TopReadySUs.empty()) {
280 SUnit *SU = TopReadySUs[0];
281 ScheduledSUnits.push_back(SU);
297 if (
MI->isDebugValue())
300 if (InstSlot >=
First && InstSlot <= Last)
318 for (
SUnit* SU : ScheduledSUnits) {
319 RPTracker.setPos(SU->getInstr());
324 RPTracker.closeRegion();
327 TopRPTracker.
addLiveRegs(RPTracker.getPressure().LiveInRegs);
328 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
331 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
333 LiveInRegs.insert(RegMaskPair.RegUnit);
358 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
360 if (
Reg.isVirtual() &&
364 LiveOutRegs.insert(
Reg);
385 initRegPressure(BeginBlock, EndBlock);
392 for (
SUnit* SU : SUnits) {
393 if (!SU->NumPredsLeft)
394 TopReadySUs.push_back(SU);
397 while (!TopReadySUs.empty()) {
398 SUnit *SU = pickNode();
399 ScheduledSUnits.push_back(SU);
406 InternalAdditionalPressure.resize(TopPressure.
MaxSetPressure.size());
410 assert(SUnits.size() == ScheduledSUnits.size() &&
411 TopReadySUs.empty());
412 for (
SUnit* SU : SUnits) {
414 SU->NumPredsLeft == 0);
421 void SIScheduleBlock::undoSchedule() {
422 for (
SUnit* SU : SUnits) {
423 SU->isScheduled =
false;
424 for (
SDep& Succ : SU->Succs) {
426 undoReleaseSucc(SU, &Succ);
429 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
430 ScheduledSUnits.clear();
434 void SIScheduleBlock::undoReleaseSucc(
SUnit *SU,
SDep *SuccEdge) {
444 void SIScheduleBlock::releaseSucc(
SUnit *SU,
SDep *SuccEdge) {
453 dbgs() <<
"*** Scheduling failed! ***\n";
455 dbgs() <<
" has been released too many times!\n";
464 void SIScheduleBlock::releaseSuccessors(
SUnit *SU,
bool InOrOutBlock) {
474 releaseSucc(SU, &Succ);
476 TopReadySUs.push_back(SuccSU);
480 void SIScheduleBlock::nodeScheduled(
SUnit *SU) {
483 std::vector<SUnit *>::iterator
I =
llvm::find(TopReadySUs, SU);
484 if (
I == TopReadySUs.end()) {
485 dbgs() <<
"Data Structure Bug in SI Scheduler\n";
488 TopReadySUs.erase(
I);
490 releaseSuccessors(SU,
true);
493 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->
NodeNum]])
494 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
498 std::map<unsigned, unsigned>::iterator
I =
500 if (
I != NodeNum2Index.end())
501 HasLowLatencyNonWaitedParent[
I->second] = 1;
509 for (
SUnit* SU : SUnits) {
510 releaseSuccessors(SU,
false);
512 HighLatencyBlock =
true;
514 HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
519 unsigned PredID = Pred->
getID();
523 if (PredID ==
P->getID())
526 Preds.push_back(Pred);
531 return PredID ==
S.first->getID();
533 "Loop in the Block Graph!");
538 unsigned SuccID = Succ->
getID();
541 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &
S : Succs) {
542 if (SuccID ==
S.first->getID()) {
550 ++NumHighLatencySuccessors;
551 Succs.push_back(std::make_pair(Succ,
Kind));
555 "Loop in the Block Graph!");
560 dbgs() <<
"Block (" <<
ID <<
")\n";
564 dbgs() <<
"\nContains High Latency Instruction: "
565 << HighLatencyBlock <<
'\n';
566 dbgs() <<
"\nDepends On:\n";
568 P->printDebug(
false);
571 dbgs() <<
"\nSuccessors:\n";
572 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>
S : Succs) {
574 dbgs() <<
"(Data Dep) ";
575 S.first->printDebug(
false);
579 dbgs() <<
"LiveInPressure "
580 << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
' '
581 << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
'\n';
582 dbgs() <<
"LiveOutPressure "
583 << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
' '
584 << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n\n";
585 dbgs() <<
"LiveIns:\n";
586 for (
unsigned Reg : LiveInRegs)
589 dbgs() <<
"\nLiveOuts:\n";
590 for (
unsigned Reg : LiveOutRegs)
594 dbgs() <<
"\nInstructions:\n";
595 for (
const SUnit* SU : SUnits)
598 dbgs() <<
"///////////////////////\n";
609 std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator
B =
610 Blocks.find(BlockVariant);
611 if (
B == Blocks.end()) {
613 createBlocksForVariant(BlockVariant);
615 scheduleInsideBlocks();
617 Res.
Blocks = CurrentBlocks;
620 Blocks[BlockVariant] = Res;
630 return CurrentBlocks[Node2CurrentBlock[SU->
NodeNum]]->getID() ==
ID;
633 void SIScheduleBlockCreator::colorHighLatenciesAlone() {
634 unsigned DAGSize = DAG->
SUnits.size();
636 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
639 CurrentColoring[SU->
NodeNum] = NextReservedID++;
646 for (
const auto &PredDep : SU.
Preds) {
647 if (PredDep.getSUnit() == &FromSU &&
654 void SIScheduleBlockCreator::colorHighLatenciesGroups() {
655 unsigned DAGSize = DAG->
SUnits.size();
656 unsigned NumHighLatencies = 0;
658 int Color = NextReservedID;
660 std::set<unsigned> FormingGroup;
662 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
668 if (NumHighLatencies == 0)
671 if (NumHighLatencies <= 6)
673 else if (NumHighLatencies <= 12)
681 unsigned CompatibleGroup =
true;
682 int ProposedColor = Color;
683 std::vector<int> AdditionalElements;
695 for (
unsigned j : FormingGroup) {
697 std::vector<int> SubGraph;
710 else if (SubGraph.size() > 5) {
712 CompatibleGroup =
false;
717 for (
unsigned k : SubGraph) {
723 (CurrentColoring[k] != ProposedColor &&
724 CurrentColoring[k] != 0)) {
725 CompatibleGroup =
false;
731 CompatibleGroup =
false;
735 if (!CompatibleGroup)
739 CompatibleGroup =
false;
750 if (CompatibleGroup) {
751 FormingGroup.insert(SU.
NodeNum);
752 for (
unsigned j : AdditionalElements)
753 CurrentColoring[
j] = ProposedColor;
754 CurrentColoring[SU.
NodeNum] = ProposedColor;
760 if (!CompatibleGroup) {
761 FormingGroup.clear();
762 Color = ++NextReservedID;
763 ProposedColor = Color;
764 FormingGroup.insert(SU.
NodeNum);
765 CurrentColoring[SU.
NodeNum] = ProposedColor;
767 }
else if (Count == GroupSize) {
768 FormingGroup.clear();
769 Color = ++NextReservedID;
770 ProposedColor = Color;
777 void SIScheduleBlockCreator::colorComputeReservedDependencies() {
778 unsigned DAGSize = DAG->
SUnits.size();
779 std::map<std::set<unsigned>,
unsigned> ColorCombinations;
781 CurrentTopDownReservedDependencyColoring.clear();
782 CurrentBottomUpReservedDependencyColoring.clear();
784 CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
785 CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
792 std::set<unsigned> SUColors;
795 if (CurrentColoring[SU->
NodeNum]) {
796 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
805 if (CurrentTopDownReservedDependencyColoring[Pred->
NodeNum] > 0)
806 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->
NodeNum]);
809 if (SUColors.empty())
812 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
813 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
816 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
817 ColorCombinations.find(SUColors);
818 if (Pos != ColorCombinations.end()) {
819 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] = Pos->second;
821 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
823 ColorCombinations[SUColors] = NextNonReservedID++;
828 ColorCombinations.clear();
834 std::set<unsigned> SUColors;
837 if (CurrentColoring[SU->
NodeNum]) {
838 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
847 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0)
848 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum]);
851 if (SUColors.empty())
854 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
855 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
858 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
859 ColorCombinations.find(SUColors);
860 if (Pos != ColorCombinations.end()) {
861 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] = Pos->second;
863 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
865 ColorCombinations[SUColors] = NextNonReservedID++;
871 void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
872 std::map<std::pair<unsigned, unsigned>,
unsigned> ColorCombinations;
878 std::pair<unsigned, unsigned> SUColors;
881 if (CurrentColoring[SU.
NodeNum])
884 SUColors.first = CurrentTopDownReservedDependencyColoring[SU.
NodeNum];
885 SUColors.second = CurrentBottomUpReservedDependencyColoring[SU.
NodeNum];
887 std::map<std::pair<unsigned, unsigned>,
unsigned>::iterator Pos =
888 ColorCombinations.find(SUColors);
889 if (Pos != ColorCombinations.end()) {
890 CurrentColoring[SU.
NodeNum] = Pos->second;
892 CurrentColoring[SU.
NodeNum] = NextNonReservedID;
893 ColorCombinations[SUColors] = NextNonReservedID++;
898 void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
899 unsigned DAGSize = DAG->
SUnits.size();
900 std::vector<int> PendingColoring = CurrentColoring;
903 CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&
904 CurrentTopDownReservedDependencyColoring.size() == DAGSize);
907 if (*std::max_element(CurrentBottomUpReservedDependencyColoring.begin(),
908 CurrentBottomUpReservedDependencyColoring.end()) == 0 &&
909 *std::max_element(CurrentTopDownReservedDependencyColoring.begin(),
910 CurrentTopDownReservedDependencyColoring.end()) == 0)
915 std::set<unsigned> SUColors;
916 std::set<unsigned> SUColorsPending;
918 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
921 if (CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] > 0 ||
922 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] > 0)
929 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0 ||
930 CurrentTopDownReservedDependencyColoring[Succ->
NodeNum] > 0)
931 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
932 SUColorsPending.insert(PendingColoring[Succ->
NodeNum]);
937 if (SUColors.size() == 1 && SUColorsPending.size() == 1)
938 PendingColoring[SU->
NodeNum] = *SUColors.begin();
941 PendingColoring[SU->
NodeNum] = NextNonReservedID++;
943 CurrentColoring = PendingColoring;
947 void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
948 unsigned DAGSize = DAG->
SUnits.size();
949 unsigned PreviousColor;
950 std::set<unsigned> SeenColors;
955 PreviousColor = CurrentColoring[0];
957 for (
unsigned i = 1,
e = DAGSize;
i !=
e; ++
i) {
959 unsigned CurrentColor = CurrentColoring[
i];
960 unsigned PreviousColorSave = PreviousColor;
963 if (CurrentColor != PreviousColor)
964 SeenColors.insert(PreviousColor);
965 PreviousColor = CurrentColor;
967 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
970 if (SeenColors.find(CurrentColor) == SeenColors.end())
973 if (PreviousColorSave != CurrentColor)
974 CurrentColoring[
i] = NextNonReservedID++;
976 CurrentColoring[
i] = CurrentColoring[
i-1];
980 void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
981 unsigned DAGSize = DAG->
SUnits.size();
985 std::set<unsigned> SUColors;
987 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
999 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1001 if (SUColors.size() == 1)
1002 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1006 void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
1007 unsigned DAGSize = DAG->
SUnits.size();
1011 std::set<unsigned> SUColors;
1013 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1020 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1022 if (SUColors.size() == 1)
1023 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1027 void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
1028 unsigned DAGSize = DAG->
SUnits.size();
1032 std::set<unsigned> SUColors;
1034 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1041 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1043 if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
1044 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1048 void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
1049 unsigned DAGSize = DAG->
SUnits.size();
1050 std::map<unsigned, unsigned> ColorCount;
1054 unsigned color = CurrentColoring[SU->
NodeNum];
1055 ++ColorCount[color];
1060 unsigned color = CurrentColoring[SU->
NodeNum];
1061 std::set<unsigned> SUColors;
1063 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1066 if (ColorCount[color] > 1)
1073 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1075 if (SUColors.size() == 1 && *SUColors.begin() != color) {
1076 --ColorCount[color];
1077 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1078 ++ColorCount[*SUColors.begin()];
1083 void SIScheduleBlockCreator::cutHugeBlocks() {
1087 void SIScheduleBlockCreator::regroupNoUserInstructions() {
1088 unsigned DAGSize = DAG->
SUnits.size();
1089 int GroupID = NextNonReservedID++;
1093 bool hasSuccessor =
false;
1095 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1102 hasSuccessor =
true;
1105 CurrentColoring[SU->
NodeNum] = GroupID;
1109 void SIScheduleBlockCreator::colorExports() {
1110 unsigned ExportColor = NextNonReservedID++;
1135 "SUnit unexpectedly not representing an instruction!");
1146 ExpGroup.push_back(SUNum);
1151 for (
unsigned j : ExpGroup)
1152 CurrentColoring[
j] = ExportColor;
1156 unsigned DAGSize = DAG->
SUnits.size();
1157 std::map<unsigned,unsigned> RealID;
1159 CurrentBlocks.clear();
1160 CurrentColoring.clear();
1161 CurrentColoring.resize(DAGSize, 0);
1162 Node2CurrentBlock.clear();
1168 NextNonReservedID = DAGSize + 1;
1173 colorHighLatenciesGroups();
1175 colorHighLatenciesAlone();
1176 colorComputeReservedDependencies();
1177 colorAccordingToReservedDependencies();
1178 colorEndsAccordingToDependencies();
1180 colorForceConsecutiveOrderInGroup();
1181 regroupNoUserInstructions();
1182 colorMergeConstantLoadsNextGroup();
1183 colorMergeIfPossibleNextGroupOnlyForReserved();
1187 Node2CurrentBlock.resize(DAGSize, -1);
1188 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
1190 unsigned Color = CurrentColoring[SU->
NodeNum];
1191 if (RealID.find(Color) == RealID.end()) {
1192 int ID = CurrentBlocks.size();
1193 BlockPtrs.push_back(std::make_unique<SIScheduleBlock>(DAG,
this,
ID));
1194 CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1197 CurrentBlocks[RealID[Color]]->addUnit(SU);
1198 Node2CurrentBlock[SU->
NodeNum] = RealID[Color];
1202 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
1204 int SUID = Node2CurrentBlock[
i];
1209 if (Node2CurrentBlock[Succ->
NodeNum] != SUID)
1210 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->
NodeNum]],
1217 if (Node2CurrentBlock[Pred->
NodeNum] != SUID)
1218 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->
NodeNum]]);
1224 Block->finalizeUnits();
1226 dbgs() <<
"Blocks created:\n\n";
1228 Block->printDebug(
true);
1238 for (;
I != End; ++
I) {
1239 if (!
I->isDebugInstr())
1245 void SIScheduleBlockCreator::topologicalSort() {
1246 unsigned DAGSize = CurrentBlocks.size();
1247 std::vector<int> WorkList;
1251 WorkList.reserve(DAGSize);
1252 TopDownIndex2Block.resize(DAGSize);
1253 TopDownBlock2Index.resize(DAGSize);
1254 BottomUpIndex2Block.resize(DAGSize);
1256 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
1258 unsigned Degree =
Block->getSuccs().size();
1259 TopDownBlock2Index[
i] = Degree;
1261 WorkList.push_back(
i);
1266 while (!WorkList.empty()) {
1267 int i = WorkList.back();
1269 WorkList.pop_back();
1270 TopDownBlock2Index[
i] = --
Id;
1271 TopDownIndex2Block[
Id] =
i;
1273 if (!--TopDownBlock2Index[Pred->getID()])
1274 WorkList.push_back(Pred->getID());
1280 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
1283 assert(TopDownBlock2Index[
i] > TopDownBlock2Index[Pred->getID()] &&
1284 "Wrong Top Down topological sorting");
1289 BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
1290 TopDownIndex2Block.rend());
1293 void SIScheduleBlockCreator::scheduleInsideBlocks() {
1294 unsigned DAGSize = CurrentBlocks.size();
1300 LLVM_DEBUG(
dbgs() <<
"First phase: Fast scheduling for Reg Liveness\n");
1301 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
1303 Block->fastSchedule();
1311 std::vector<MachineBasicBlock::iterator> PosOld;
1312 std::vector<MachineBasicBlock::iterator> PosNew;
1313 PosOld.reserve(DAG->
SUnits.size());
1314 PosNew.reserve(DAG->
SUnits.size());
1316 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
1317 int BlockIndice = TopDownIndex2Block[
i];
1319 std::vector<SUnit*> SUs =
Block->getScheduledUnits();
1321 for (
SUnit* SU : SUs) {
1324 PosOld.push_back(Pos);
1325 if (&*CurrentTopFastSched ==
MI) {
1326 PosNew.push_back(Pos);
1327 CurrentTopFastSched =
nextIfDebug(++CurrentTopFastSched,
1339 PosNew.push_back(CurrentTopFastSched);
1348 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
1350 std::vector<SUnit*> SUs =
Block->getScheduledUnits();
1351 Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
1356 for (
unsigned i = PosOld.size(),
e = 0;
i !=
e; --
i) {
1370 Block->printDebug(
true);
1374 void SIScheduleBlockCreator::fillStats() {
1375 unsigned DAGSize = CurrentBlocks.size();
1377 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
1378 int BlockIndice = TopDownIndex2Block[
i];
1380 if (
Block->getPreds().empty())
1385 if (Depth < Pred->
Depth + Pred->getCost())
1386 Depth = Pred->Depth + Pred->getCost();
1392 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
1393 int BlockIndice = BottomUpIndex2Block[
i];
1395 if (
Block->getSuccs().empty())
1398 unsigned Height = 0;
1399 for (
const auto &Succ :
Block->getSuccs())
1400 Height =
std::max(Height, Succ.first->Height + Succ.first->getCost());
1401 Block->Height = Height;
1411 DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
1412 LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1413 SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1425 LiveOutRegsNumUsages.resize(Blocks.size());
1427 for (
unsigned Reg : Block->getInRegs()) {
1431 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1432 std::set<unsigned>::iterator RegPos = PredOutRegs.find(
Reg);
1434 if (RegPos != PredOutRegs.end()) {
1446 ++LiveOutRegsNumUsages[PredID][
Reg];
1450 LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
1451 BlockNumPredsLeft.resize(Blocks.size());
1452 BlockNumSuccsLeft.resize(Blocks.size());
1454 for (
unsigned i = 0,
e = Blocks.size();
i !=
e; ++
i) {
1456 BlockNumPredsLeft[
i] = Block->getPreds().size();
1457 BlockNumSuccsLeft[
i] = Block->getSuccs().size();
1461 for (
unsigned i = 0,
e = Blocks.size();
i !=
e; ++
i) {
1467 std::set<unsigned> InRegs = DAG->
getInRegs();
1468 addLiveRegs(InRegs);
1474 for (
unsigned i = 0,
e = Blocks.size();
i !=
e; ++
i) {
1478 const std::set<unsigned> &OutRegs = Block->getOutRegs();
1480 if (OutRegs.find(
Reg) == OutRegs.end())
1483 ++LiveOutRegsNumUsages[
ID][
Reg];
1491 for (
unsigned Reg : Block->getInRegs()) {
1494 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1495 std::set<unsigned>::iterator RegPos = PredOutRegs.find(
Reg);
1497 if (RegPos != PredOutRegs.end()) {
1504 ++LiveRegsConsumers[
Reg];
1508 for (
unsigned i = 0,
e = Blocks.size();
i !=
e; ++
i) {
1510 if (BlockNumPredsLeft[
i] == 0) {
1511 ReadyBlocks.push_back(Block);
1516 BlocksScheduled.push_back(Block);
1517 blockScheduled(Block);
1521 : BlocksScheduled) {
1522 dbgs() <<
' ' << Block->getID();
1526 bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1527 SIBlockSchedCandidate &TryCand) {
1528 if (!Cand.isValid()) {
1535 Cand.LastPosHighLatParentScheduled, TryCand, Cand,
Latency))
1542 TryCand, Cand,
Depth))
1545 Cand.NumHighLatencySuccessors,
1551 bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1552 SIBlockSchedCandidate &TryCand) {
1553 if (!Cand.isValid()) {
1562 Cand.NumSuccessors > 0,
1574 SIBlockSchedCandidate Cand;
1575 std::vector<SIScheduleBlock*>::iterator Best;
1577 if (ReadyBlocks.empty())
1581 VregCurrentUsage, SregCurrentUsage);
1582 if (VregCurrentUsage > maxVregUsage)
1583 maxVregUsage = VregCurrentUsage;
1584 if (SregCurrentUsage > maxSregUsage)
1585 maxSregUsage = SregCurrentUsage;
1588 : ReadyBlocks)
dbgs()
1589 <<
Block->getID() <<
' ';
1590 dbgs() <<
"\nCurrent Live:\n";
1595 dbgs() <<
"Current VGPRs: " << VregCurrentUsage <<
'\n';
1596 dbgs() <<
"Current SGPRs: " << SregCurrentUsage <<
'\n';);
1598 Cand.Block =
nullptr;
1599 for (std::vector<SIScheduleBlock*>::iterator
I = ReadyBlocks.begin(),
1600 E = ReadyBlocks.end();
I !=
E; ++
I) {
1601 SIBlockSchedCandidate TryCand;
1603 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1604 TryCand.VGPRUsageDiff =
1605 checkRegUsageImpact(TryCand.Block->getInRegs(),
1606 TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];
1607 TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1608 TryCand.NumHighLatencySuccessors =
1609 TryCand.Block->getNumHighLatencySuccessors();
1610 TryCand.LastPosHighLatParentScheduled =
1611 (
unsigned int) std::max<int> (0,
1612 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1613 LastPosWaitedHighLatency);
1614 TryCand.Height = TryCand.Block->Height;
1616 if (VregCurrentUsage > 120 ||
1618 if (!tryCandidateRegUsage(Cand, TryCand) &&
1620 tryCandidateLatency(Cand, TryCand);
1622 if (!tryCandidateLatency(Cand, TryCand))
1623 tryCandidateRegUsage(Cand, TryCand);
1625 if (TryCand.Reason !=
NoCand) {
1626 Cand.setBest(TryCand);
1628 LLVM_DEBUG(
dbgs() <<
"Best Current Choice: " << Cand.Block->getID() <<
' '
1634 dbgs() <<
"Is a block with high latency instruction: "
1635 << (Cand.IsHighLatency ?
"yes\n" :
"no\n");
1636 dbgs() <<
"Position of last high latency dependency: "
1637 << Cand.LastPosHighLatParentScheduled <<
'\n';
1638 dbgs() <<
"VGPRUsageDiff: " << Cand.VGPRUsageDiff <<
'\n';
1642 ReadyBlocks.erase(Best);
1648 void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
1651 if (!
Reg.isVirtual())
1654 (void) LiveRegs.insert(
Reg);
1658 void SIScheduleBlockScheduler::decreaseLiveRegs(
SIScheduleBlock *Block,
1659 std::set<unsigned> &Regs) {
1660 for (
unsigned Reg : Regs) {
1662 std::set<unsigned>::iterator Pos = LiveRegs.find(
Reg);
1663 assert (Pos != LiveRegs.end() &&
1664 LiveRegsConsumers.find(
Reg) != LiveRegsConsumers.end() &&
1665 LiveRegsConsumers[
Reg] >= 1);
1666 --LiveRegsConsumers[
Reg];
1667 if (LiveRegsConsumers[
Reg] == 0)
1668 LiveRegs.erase(Pos);
1672 void SIScheduleBlockScheduler::releaseBlockSuccs(
SIScheduleBlock *Parent) {
1673 for (
const auto &Block : Parent->
getSuccs()) {
1674 if (--BlockNumPredsLeft[
Block.first->getID()] == 0)
1675 ReadyBlocks.push_back(
Block.first);
1679 LastPosHighLatencyParentScheduled[
Block.first->getID()] = NumBlockScheduled;
1683 void SIScheduleBlockScheduler::blockScheduled(
SIScheduleBlock *Block) {
1684 decreaseLiveRegs(Block,
Block->getInRegs());
1685 addLiveRegs(
Block->getOutRegs());
1686 releaseBlockSuccs(Block);
1687 for (
const auto &RegP : LiveOutRegsNumUsages[
Block->getID()]) {
1689 assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||
1690 LiveRegsConsumers[RegP.first] == 0);
1691 LiveRegsConsumers[RegP.first] += RegP.second;
1693 if (LastPosHighLatencyParentScheduled[
Block->getID()] >
1694 (
unsigned)LastPosWaitedHighLatency)
1695 LastPosWaitedHighLatency =
1696 LastPosHighLatencyParentScheduled[
Block->getID()];
1697 ++NumBlockScheduled;
1701 SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
1702 std::set<unsigned> &OutRegs) {
1703 std::vector<int> DiffSetPressure;
1708 if (!
Reg.isVirtual())
1710 if (LiveRegsConsumers[
Reg] > 1)
1713 for (; PSetI.
isValid(); ++PSetI) {
1714 DiffSetPressure[*PSetI] -= PSetI.
getWeight();
1720 if (!
Reg.isVirtual())
1723 for (; PSetI.
isValid(); ++PSetI) {
1724 DiffSetPressure[*PSetI] += PSetI.
getWeight();
1728 return DiffSetPressure;
1734 SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1735 SISchedulerBlockSchedulerVariant ScheduleVariant) {
1738 std::vector<SIScheduleBlock*> ScheduledBlocks;
1741 ScheduledBlocks =
Scheduler.getBlocks();
1744 std::vector<SUnit*>
SUs = Block->getScheduledUnits();
1768 void SIScheduleDAGMI::topologicalSort() {
1781 void SIScheduleDAGMI::moveLowLatencies() {
1782 unsigned DAGSize =
SUnits.size();
1783 int LastLowLatencyUser = -1;
1784 int LastLowLatencyPos = -1;
1786 for (
unsigned i = 0,
e = ScheduledSUnits.size();
i !=
e; ++
i) {
1788 bool IsLowLatencyUser =
false;
1789 unsigned MinPos = 0;
1794 IsLowLatencyUser =
true;
1798 unsigned PredPos = ScheduledSUnitsInv[Pred->
NodeNum];
1799 if (PredPos >= MinPos)
1800 MinPos = PredPos + 1;
1804 unsigned BestPos = LastLowLatencyUser + 1;
1805 if ((
int)BestPos <= LastLowLatencyPos)
1806 BestPos = LastLowLatencyPos + 1;
1807 if (BestPos < MinPos)
1810 for (
unsigned u =
i; u > BestPos; --u) {
1811 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1812 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1814 ScheduledSUnits[BestPos] = SU->
NodeNum;
1815 ScheduledSUnitsInv[SU->
NodeNum] = BestPos;
1817 LastLowLatencyPos = BestPos;
1818 if (IsLowLatencyUser)
1819 LastLowLatencyUser = BestPos;
1820 }
else if (IsLowLatencyUser) {
1821 LastLowLatencyUser =
i;
1825 bool CopyForLowLat =
false;
1831 CopyForLowLat =
true;
1837 for (
unsigned u =
i; u > MinPos; --u) {
1838 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1839 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1841 ScheduledSUnits[MinPos] = SU->
NodeNum;
1842 ScheduledSUnitsInv[SU->
NodeNum] = MinPos;
1849 for (
unsigned i = 0,
e =
SUnits.size();
i !=
e; ++
i) {
1850 SUnits[
i].isScheduled =
false;
1851 SUnits[
i].WeakPredsLeft = SUnitsLinksBackup[
i].WeakPredsLeft;
1852 SUnits[
i].NumPredsLeft = SUnitsLinksBackup[
i].NumPredsLeft;
1853 SUnits[
i].WeakSuccsLeft = SUnitsLinksBackup[
i].WeakSuccsLeft;
1854 SUnits[
i].NumSuccsLeft = SUnitsLinksBackup[
i].NumSuccsLeft;
1859 template<
typename _Iterator>
void
1861 unsigned &VgprUsage,
unsigned &SgprUsage) {
1864 for (_Iterator RegI =
First; RegI != End; ++RegI) {
1867 if (!
Reg.isVirtual())
1870 for (; PSetI.
isValid(); ++PSetI) {
1871 if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)
1873 else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)
1899 SUnitsLinksBackup =
SUnits;
1908 for (
unsigned i = 0,
e = (
unsigned)
SUnits.size();
i !=
e; ++
i) {
1914 bool OffsetIsScalable;
1915 if (SITII->getMemOperandWithOffset(*SU->
getInstr(), BaseLatOp, OffLatReg,
1916 OffsetIsScalable,
TRI))
1941 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1942 Temp =
Scheduler.scheduleVariant(v.first, v.second);
1962 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1963 Temp =
Scheduler.scheduleVariant(v.first, v.second);
1969 ScheduledSUnits = Best.
SUs;
1970 ScheduledSUnitsInv.resize(
SUnits.size());
1972 for (
unsigned i = 0,
e = (
unsigned)
SUnits.size();
i !=
e; ++
i) {
1973 ScheduledSUnitsInv[ScheduledSUnits[
i]] =
i;
1983 for (
unsigned I : ScheduledSUnits) {
1997 dbgs() <<
"*** Final schedule for "