22 #define DEBUG_TYPE "machine-scheduler"
126 case NoCand:
return "NOCAND";
128 case Latency:
return "LATENCY";
130 case Depth:
return "DEPTH";
144 if (TryVal < CandVal) {
148 if (TryVal > CandVal) {
161 if (TryVal > CandVal) {
165 if (TryVal < CandVal) {
179 NodeNum2Index[SU->
NodeNum] = SUnits.size();
180 SUnits.push_back(SU);
184 void SIScheduleBlock::traceCandidate(
const SISchedCandidate &Cand) {
191 void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
192 SISchedCandidate &TryCand) {
194 if (!Cand.isValid()) {
199 if (Cand.SGPRUsage > 60 &&
220 Cand.HasLowLatencyNonWaitedParent,
228 if (TryCand.IsLowLatency &&
238 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
243 SUnit* SIScheduleBlock::pickNode() {
244 SISchedCandidate TopCand;
246 for (
SUnit* SU : TopReadySUs) {
247 SISchedCandidate TryCand;
248 std::vector<unsigned> pressure;
249 std::vector<unsigned> MaxPressure;
253 TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];
254 TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];
257 TryCand.HasLowLatencyNonWaitedParent =
258 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
259 tryCandidateTopDown(TopCand, TryCand);
260 if (TryCand.Reason !=
NoCand)
261 TopCand.setBest(TryCand);
274 for (
SUnit* SU : SUnits) {
275 if (!SU->NumPredsLeft)
276 TopReadySUs.push_back(SU);
279 while (!TopReadySUs.empty()) {
280 SUnit *SU = TopReadySUs[0];
281 ScheduledSUnits.push_back(SU);
297 if (
MI->isDebugValue())
300 if (InstSlot >=
First && InstSlot <= Last)
318 for (
SUnit* SU : ScheduledSUnits) {
319 RPTracker.setPos(SU->getInstr());
324 RPTracker.closeRegion();
327 TopRPTracker.
addLiveRegs(RPTracker.getPressure().LiveInRegs);
328 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
331 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
333 LiveInRegs.insert(RegMaskPair.RegUnit);
358 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
360 if (
Reg.isVirtual() &&
364 LiveOutRegs.insert(
Reg);
385 initRegPressure(BeginBlock, EndBlock);
392 for (
SUnit* SU : SUnits) {
393 if (!SU->NumPredsLeft)
394 TopReadySUs.push_back(SU);
397 while (!TopReadySUs.empty()) {
398 SUnit *SU = pickNode();
399 ScheduledSUnits.push_back(SU);
406 InternalAdditionnalPressure.resize(TopPressure.
MaxSetPressure.size());
410 assert(SUnits.size() == ScheduledSUnits.size() &&
411 TopReadySUs.empty());
412 for (
SUnit* SU : SUnits) {
414 SU->NumPredsLeft == 0);
421 void SIScheduleBlock::undoSchedule() {
422 for (
SUnit* SU : SUnits) {
423 SU->isScheduled =
false;
424 for (
SDep& Succ : SU->Succs) {
426 undoReleaseSucc(SU, &Succ);
429 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
430 ScheduledSUnits.clear();
434 void SIScheduleBlock::undoReleaseSucc(
SUnit *SU,
SDep *SuccEdge) {
444 void SIScheduleBlock::releaseSucc(
SUnit *SU,
SDep *SuccEdge) {
453 dbgs() <<
"*** Scheduling failed! ***\n";
455 dbgs() <<
" has been released too many times!\n";
464 void SIScheduleBlock::releaseSuccessors(
SUnit *SU,
bool InOrOutBlock) {
474 releaseSucc(SU, &Succ);
476 TopReadySUs.push_back(SuccSU);
480 void SIScheduleBlock::nodeScheduled(
SUnit *SU) {
483 std::vector<SUnit *>::iterator
I =
llvm::find(TopReadySUs, SU);
484 if (
I == TopReadySUs.end()) {
485 dbgs() <<
"Data Structure Bug in SI Scheduler\n";
488 TopReadySUs.erase(
I);
490 releaseSuccessors(SU,
true);
493 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->
NodeNum]])
494 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
498 std::map<unsigned, unsigned>::iterator
I =
500 if (
I != NodeNum2Index.end())
501 HasLowLatencyNonWaitedParent[
I->second] = 1;
509 for (
SUnit* SU : SUnits) {
510 releaseSuccessors(SU,
false);
512 HighLatencyBlock =
true;
514 HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
519 unsigned PredID = Pred->
getID();
523 if (PredID ==
P->getID())
526 Preds.push_back(Pred);
531 return PredID ==
S.first->getID();
533 "Loop in the Block Graph!");
538 unsigned SuccID = Succ->
getID();
541 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &
S : Succs) {
542 if (SuccID ==
S.first->getID()) {
550 ++NumHighLatencySuccessors;
551 Succs.push_back(std::make_pair(Succ,
Kind));
555 "Loop in the Block Graph!");
560 dbgs() <<
"Block (" << ID <<
")\n";
564 dbgs() <<
"\nContains High Latency Instruction: "
565 << HighLatencyBlock <<
'\n';
566 dbgs() <<
"\nDepends On:\n";
568 P->printDebug(
false);
571 dbgs() <<
"\nSuccessors:\n";
572 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind>
S : Succs) {
574 dbgs() <<
"(Data Dep) ";
575 S.first->printDebug(
false);
579 dbgs() <<
"LiveInPressure "
580 << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
' '
581 << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
'\n';
582 dbgs() <<
"LiveOutPressure "
583 << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
' '
584 << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n\n";
585 dbgs() <<
"LiveIns:\n";
586 for (
unsigned Reg : LiveInRegs)
589 dbgs() <<
"\nLiveOuts:\n";
590 for (
unsigned Reg : LiveOutRegs)
594 dbgs() <<
"\nInstructions:\n";
595 for (
const SUnit* SU : SUnits)
598 dbgs() <<
"///////////////////////\n";
609 std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator
B =
610 Blocks.find(BlockVariant);
611 if (
B == Blocks.end()) {
613 createBlocksForVariant(BlockVariant);
615 scheduleInsideBlocks();
617 Res.
Blocks = CurrentBlocks;
620 Blocks[BlockVariant] = Res;
630 return CurrentBlocks[Node2CurrentBlock[SU->
NodeNum]]->getID() ==
ID;
633 void SIScheduleBlockCreator::colorHighLatenciesAlone() {
634 unsigned DAGSize = DAG->
SUnits.size();
636 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
639 CurrentColoring[SU->
NodeNum] = NextReservedID++;
646 for (
const auto &PredDep : SU.
Preds) {
647 if (PredDep.getSUnit() == &FromSU &&
654 void SIScheduleBlockCreator::colorHighLatenciesGroups() {
655 unsigned DAGSize = DAG->
SUnits.size();
656 unsigned NumHighLatencies = 0;
658 int Color = NextReservedID;
660 std::set<unsigned> FormingGroup;
662 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
668 if (NumHighLatencies == 0)
671 if (NumHighLatencies <= 6)
673 else if (NumHighLatencies <= 12)
681 unsigned CompatibleGroup =
true;
682 int ProposedColor = Color;
683 std::vector<int> AdditionalElements;
695 for (
unsigned j : FormingGroup) {
697 std::vector<int> SubGraph;
710 else if (SubGraph.size() > 5) {
712 CompatibleGroup =
false;
717 for (
unsigned k : SubGraph) {
723 (CurrentColoring[k] != ProposedColor &&
724 CurrentColoring[k] != 0)) {
725 CompatibleGroup =
false;
731 CompatibleGroup =
false;
735 if (!CompatibleGroup)
739 CompatibleGroup =
false;
750 if (CompatibleGroup) {
751 FormingGroup.insert(SU.
NodeNum);
752 for (
unsigned j : AdditionalElements)
753 CurrentColoring[
j] = ProposedColor;
754 CurrentColoring[SU.
NodeNum] = ProposedColor;
760 if (!CompatibleGroup) {
761 FormingGroup.clear();
762 Color = ++NextReservedID;
763 ProposedColor = Color;
764 FormingGroup.insert(SU.
NodeNum);
765 CurrentColoring[SU.
NodeNum] = ProposedColor;
767 }
else if (Count == GroupSize) {
768 FormingGroup.clear();
769 Color = ++NextReservedID;
770 ProposedColor = Color;
777 void SIScheduleBlockCreator::colorComputeReservedDependencies() {
778 unsigned DAGSize = DAG->
SUnits.size();
779 std::map<std::set<unsigned>,
unsigned> ColorCombinations;
781 CurrentTopDownReservedDependencyColoring.clear();
782 CurrentBottomUpReservedDependencyColoring.clear();
784 CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
785 CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
792 std::set<unsigned> SUColors;
795 if (CurrentColoring[SU->
NodeNum]) {
796 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
805 if (CurrentTopDownReservedDependencyColoring[Pred->
NodeNum] > 0)
806 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->
NodeNum]);
809 if (SUColors.empty())
812 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
813 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
816 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
817 ColorCombinations.find(SUColors);
818 if (Pos != ColorCombinations.end()) {
819 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] = Pos->second;
821 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
823 ColorCombinations[SUColors] = NextNonReservedID++;
828 ColorCombinations.clear();
834 std::set<unsigned> SUColors;
837 if (CurrentColoring[SU->
NodeNum]) {
838 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
847 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0)
848 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum]);
851 if (SUColors.empty())
854 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
855 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
858 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
859 ColorCombinations.find(SUColors);
860 if (Pos != ColorCombinations.end()) {
861 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] = Pos->second;
863 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
865 ColorCombinations[SUColors] = NextNonReservedID++;
871 void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
872 unsigned DAGSize = DAG->
SUnits.size();
873 std::map<std::pair<unsigned, unsigned>,
unsigned> ColorCombinations;
878 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
880 std::pair<unsigned, unsigned> SUColors;
883 if (CurrentColoring[SU->
NodeNum])
886 SUColors.first = CurrentTopDownReservedDependencyColoring[SU->
NodeNum];
887 SUColors.second = CurrentBottomUpReservedDependencyColoring[SU->
NodeNum];
889 std::map<std::pair<unsigned, unsigned>,
unsigned>::iterator Pos =
890 ColorCombinations.find(SUColors);
891 if (Pos != ColorCombinations.end()) {
892 CurrentColoring[SU->
NodeNum] = Pos->second;
894 CurrentColoring[SU->
NodeNum] = NextNonReservedID;
895 ColorCombinations[SUColors] = NextNonReservedID++;
900 void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
901 unsigned DAGSize = DAG->
SUnits.size();
902 std::vector<int> PendingColoring = CurrentColoring;
905 CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&
906 CurrentTopDownReservedDependencyColoring.size() == DAGSize);
909 if (*std::max_element(CurrentBottomUpReservedDependencyColoring.begin(),
910 CurrentBottomUpReservedDependencyColoring.end()) == 0 &&
911 *std::max_element(CurrentTopDownReservedDependencyColoring.begin(),
912 CurrentTopDownReservedDependencyColoring.end()) == 0)
917 std::set<unsigned> SUColors;
918 std::set<unsigned> SUColorsPending;
920 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
923 if (CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] > 0 ||
924 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] > 0)
931 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0 ||
932 CurrentTopDownReservedDependencyColoring[Succ->
NodeNum] > 0)
933 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
934 SUColorsPending.insert(PendingColoring[Succ->
NodeNum]);
939 if (SUColors.size() == 1 && SUColorsPending.size() == 1)
940 PendingColoring[SU->
NodeNum] = *SUColors.begin();
943 PendingColoring[SU->
NodeNum] = NextNonReservedID++;
945 CurrentColoring = PendingColoring;
949 void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
950 unsigned DAGSize = DAG->
SUnits.size();
951 unsigned PreviousColor;
952 std::set<unsigned> SeenColors;
957 PreviousColor = CurrentColoring[0];
959 for (
unsigned i = 1,
e = DAGSize;
i !=
e; ++
i) {
961 unsigned CurrentColor = CurrentColoring[
i];
962 unsigned PreviousColorSave = PreviousColor;
965 if (CurrentColor != PreviousColor)
966 SeenColors.insert(PreviousColor);
967 PreviousColor = CurrentColor;
969 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
972 if (SeenColors.find(CurrentColor) == SeenColors.end())
975 if (PreviousColorSave != CurrentColor)
976 CurrentColoring[
i] = NextNonReservedID++;
978 CurrentColoring[
i] = CurrentColoring[
i-1];
982 void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
983 unsigned DAGSize = DAG->
SUnits.size();
987 std::set<unsigned> SUColors;
989 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1001 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1003 if (SUColors.size() == 1)
1004 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1008 void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
1009 unsigned DAGSize = DAG->
SUnits.size();
1013 std::set<unsigned> SUColors;
1015 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1022 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1024 if (SUColors.size() == 1)
1025 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1029 void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
1030 unsigned DAGSize = DAG->
SUnits.size();
1034 std::set<unsigned> SUColors;
1036 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1043 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1045 if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
1046 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1050 void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
1051 unsigned DAGSize = DAG->
SUnits.size();
1052 std::map<unsigned, unsigned> ColorCount;
1056 unsigned color = CurrentColoring[SU->
NodeNum];
1057 ++ColorCount[color];
1062 unsigned color = CurrentColoring[SU->
NodeNum];
1063 std::set<unsigned> SUColors;
1065 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1068 if (ColorCount[color] > 1)
1075 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1077 if (SUColors.size() == 1 && *SUColors.begin() != color) {
1078 --ColorCount[color];
1079 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1080 ++ColorCount[*SUColors.begin()];
1085 void SIScheduleBlockCreator::cutHugeBlocks() {
1089 void SIScheduleBlockCreator::regroupNoUserInstructions() {
1090 unsigned DAGSize = DAG->
SUnits.size();
1091 int GroupID = NextNonReservedID++;
1095 bool hasSuccessor =
false;
1097 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1104 hasSuccessor =
true;
1107 CurrentColoring[SU->
NodeNum] = GroupID;
1111 void SIScheduleBlockCreator::colorExports() {
1112 unsigned ExportColor = NextNonReservedID++;
1132 for (
unsigned j : ExpGroup) {
1134 std::vector<int> SubGraph;
1150 for (
unsigned k : SubGraph) {
1158 ExpGroup.push_back(SUNum);
1163 for (
unsigned j : ExpGroup)
1164 CurrentColoring[
j] = ExportColor;
1168 unsigned DAGSize = DAG->
SUnits.size();
1169 std::map<unsigned,unsigned> RealID;
1171 CurrentBlocks.clear();
1172 CurrentColoring.clear();
1173 CurrentColoring.resize(DAGSize, 0);
1174 Node2CurrentBlock.clear();
1180 NextNonReservedID = DAGSize + 1;
1185 colorHighLatenciesGroups();
1187 colorHighLatenciesAlone();
1188 colorComputeReservedDependencies();
1189 colorAccordingToReservedDependencies();
1190 colorEndsAccordingToDependencies();
1192 colorForceConsecutiveOrderInGroup();
1193 regroupNoUserInstructions();
1194 colorMergeConstantLoadsNextGroup();
1195 colorMergeIfPossibleNextGroupOnlyForReserved();
1199 Node2CurrentBlock.resize(DAGSize, -1);
1200 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
1202 unsigned Color = CurrentColoring[SU->
NodeNum];
1203 if (RealID.find(Color) == RealID.end()) {
1204 int ID = CurrentBlocks.size();
1205 BlockPtrs.push_back(std::make_unique<SIScheduleBlock>(DAG,
this,
ID));
1206 CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1209 CurrentBlocks[RealID[Color]]->addUnit(SU);
1210 Node2CurrentBlock[SU->
NodeNum] = RealID[Color];
1214 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
1216 int SUID = Node2CurrentBlock[
i];
1221 if (Node2CurrentBlock[Succ->
NodeNum] != SUID)
1222 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->
NodeNum]],
1229 if (Node2CurrentBlock[Pred->
NodeNum] != SUID)
1230 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->
NodeNum]]);
1235 for (
unsigned i = 0,
e = CurrentBlocks.size();
i !=
e; ++
i) {
1237 Block->finalizeUnits();
1240 for (
unsigned i = 0,
e = CurrentBlocks.size();
i !=
e; ++
i) {
1241 SIScheduleBlock *Block = CurrentBlocks[i];
1242 Block->printDebug(true);
1252 for (;
I != End; ++
I) {
1253 if (!
I->isDebugInstr())
1259 void SIScheduleBlockCreator::topologicalSort() {
1260 unsigned DAGSize = CurrentBlocks.size();
1261 std::vector<int> WorkList;
1265 WorkList.reserve(DAGSize);
1266 TopDownIndex2Block.resize(DAGSize);
1267 TopDownBlock2Index.resize(DAGSize);
1268 BottomUpIndex2Block.resize(DAGSize);
1270 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
1272 unsigned Degree =
Block->getSuccs().size();
1273 TopDownBlock2Index[
i] = Degree;
1275 WorkList.push_back(
i);
1280 while (!WorkList.empty()) {
1281 int i = WorkList.back();
1283 WorkList.pop_back();
1284 TopDownBlock2Index[
i] = --
Id;
1285 TopDownIndex2Block[
Id] =
i;
1287 if (!--TopDownBlock2Index[Pred->getID()])
1288 WorkList.push_back(Pred->getID());
1294 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
1297 assert(TopDownBlock2Index[
i] > TopDownBlock2Index[Pred->getID()] &&
1298 "Wrong Top Down topological sorting");
1303 BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
1304 TopDownIndex2Block.rend());
1307 void SIScheduleBlockCreator::scheduleInsideBlocks() {
1308 unsigned DAGSize = CurrentBlocks.size();
1314 LLVM_DEBUG(
dbgs() <<
"First phase: Fast scheduling for Reg Liveness\n");
1315 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
1317 Block->fastSchedule();
1325 std::vector<MachineBasicBlock::iterator> PosOld;
1326 std::vector<MachineBasicBlock::iterator> PosNew;
1327 PosOld.reserve(DAG->
SUnits.size());
1328 PosNew.reserve(DAG->
SUnits.size());
1330 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
1331 int BlockIndice = TopDownIndex2Block[
i];
1333 std::vector<SUnit*> SUs =
Block->getScheduledUnits();
1335 for (
SUnit* SU : SUs) {
1338 PosOld.push_back(Pos);
1339 if (&*CurrentTopFastSched ==
MI) {
1340 PosNew.push_back(Pos);
1341 CurrentTopFastSched =
nextIfDebug(++CurrentTopFastSched,
1353 PosNew.push_back(CurrentTopFastSched);
1362 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
1364 std::vector<SUnit*> SUs =
Block->getScheduledUnits();
1365 Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
1370 for (
unsigned i = PosOld.size(),
e = 0;
i !=
e; --
i) {
1382 LLVM_DEBUG(
for (
unsigned i = 0,
e = CurrentBlocks.size();
i !=
e; ++
i) {
1383 SIScheduleBlock *Block = CurrentBlocks[i];
1384 Block->printDebug(true);
1388 void SIScheduleBlockCreator::fillStats() {
1389 unsigned DAGSize = CurrentBlocks.size();
1391 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
1392 int BlockIndice = TopDownIndex2Block[
i];
1394 if (
Block->getPreds().empty())
1399 if (Depth < Pred->
Depth + Pred->getCost())
1400 Depth = Pred->Depth + Pred->getCost();
1406 for (
unsigned i = 0,
e = DAGSize;
i !=
e; ++
i) {
1407 int BlockIndice = BottomUpIndex2Block[
i];
1409 if (
Block->getSuccs().empty())
1412 unsigned Height = 0;
1413 for (
const auto &Succ :
Block->getSuccs())
1414 Height =
std::max(Height, Succ.first->Height + Succ.first->getCost());
1415 Block->Height = Height;
1425 DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
1426 LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1427 SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1439 LiveOutRegsNumUsages.resize(Blocks.size());
1440 for (
unsigned i = 0,
e = Blocks.size();
i !=
e; ++
i) {
1442 for (
unsigned Reg : Block->getInRegs()) {
1446 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1447 std::set<unsigned>::iterator RegPos = PredOutRegs.find(
Reg);
1449 if (RegPos != PredOutRegs.end()) {
1461 ++LiveOutRegsNumUsages[PredID][
Reg];
1465 LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
1466 BlockNumPredsLeft.resize(Blocks.size());
1467 BlockNumSuccsLeft.resize(Blocks.size());
1469 for (
unsigned i = 0,
e = Blocks.size();
i !=
e; ++
i) {
1471 BlockNumPredsLeft[
i] = Block->getPreds().size();
1472 BlockNumSuccsLeft[
i] = Block->getSuccs().size();
1476 for (
unsigned i = 0,
e = Blocks.size();
i !=
e; ++
i) {
1482 std::set<unsigned> InRegs = DAG->
getInRegs();
1483 addLiveRegs(InRegs);
1489 for (
unsigned i = 0,
e = Blocks.size();
i !=
e; ++
i) {
1493 const std::set<unsigned> &OutRegs = Block->getOutRegs();
1495 if (OutRegs.find(
Reg) == OutRegs.end())
1498 ++LiveOutRegsNumUsages[
ID][
Reg];
1505 for (
unsigned i = 0,
e = Blocks.size();
i !=
e; ++
i) {
1507 for (
unsigned Reg : Block->getInRegs()) {
1510 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1511 std::set<unsigned>::iterator RegPos = PredOutRegs.find(
Reg);
1513 if (RegPos != PredOutRegs.end()) {
1520 ++LiveRegsConsumers[
Reg];
1524 for (
unsigned i = 0,
e = Blocks.size();
i !=
e; ++
i) {
1526 if (BlockNumPredsLeft[
i] == 0) {
1527 ReadyBlocks.push_back(Block);
1532 BlocksScheduled.push_back(Block);
1533 blockScheduled(Block);
1537 : BlocksScheduled) {
1538 dbgs() <<
' ' << Block->getID();
1542 bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1543 SIBlockSchedCandidate &TryCand) {
1544 if (!Cand.isValid()) {
1551 Cand.LastPosHighLatParentScheduled, TryCand, Cand,
Latency))
1558 TryCand, Cand,
Depth))
1561 Cand.NumHighLatencySuccessors,
1567 bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1568 SIBlockSchedCandidate &TryCand) {
1569 if (!Cand.isValid()) {
1578 Cand.NumSuccessors > 0,
1590 SIBlockSchedCandidate Cand;
1591 std::vector<SIScheduleBlock*>::iterator Best;
1593 if (ReadyBlocks.empty())
1597 VregCurrentUsage, SregCurrentUsage);
1598 if (VregCurrentUsage > maxVregUsage)
1599 maxVregUsage = VregCurrentUsage;
1600 if (SregCurrentUsage > maxSregUsage)
1601 maxSregUsage = SregCurrentUsage;
1604 : ReadyBlocks)
dbgs()
1605 <<
Block->getID() <<
' ';
1606 dbgs() <<
"\nCurrent Live:\n";
1611 dbgs() <<
"Current VGPRs: " << VregCurrentUsage <<
'\n';
1612 dbgs() <<
"Current SGPRs: " << SregCurrentUsage <<
'\n';);
1614 Cand.Block =
nullptr;
1615 for (std::vector<SIScheduleBlock*>::iterator
I = ReadyBlocks.begin(),
1616 E = ReadyBlocks.end();
I !=
E; ++
I) {
1617 SIBlockSchedCandidate TryCand;
1619 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1620 TryCand.VGPRUsageDiff =
1621 checkRegUsageImpact(TryCand.Block->getInRegs(),
1622 TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];
1623 TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1624 TryCand.NumHighLatencySuccessors =
1625 TryCand.Block->getNumHighLatencySuccessors();
1626 TryCand.LastPosHighLatParentScheduled =
1627 (
unsigned int) std::max<int> (0,
1628 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1629 LastPosWaitedHighLatency);
1630 TryCand.Height = TryCand.Block->Height;
1632 if (VregCurrentUsage > 120 ||
1634 if (!tryCandidateRegUsage(Cand, TryCand) &&
1636 tryCandidateLatency(Cand, TryCand);
1638 if (!tryCandidateLatency(Cand, TryCand))
1639 tryCandidateRegUsage(Cand, TryCand);
1641 if (TryCand.Reason !=
NoCand) {
1642 Cand.setBest(TryCand);
1644 LLVM_DEBUG(
dbgs() <<
"Best Current Choice: " << Cand.Block->getID() <<
' '
1650 dbgs() <<
"Is a block with high latency instruction: "
1651 << (Cand.IsHighLatency ?
"yes\n" :
"no\n");
1652 dbgs() <<
"Position of last high latency dependency: "
1653 << Cand.LastPosHighLatParentScheduled <<
'\n';
1654 dbgs() <<
"VGPRUsageDiff: " << Cand.VGPRUsageDiff <<
'\n';
1658 ReadyBlocks.erase(Best);
1664 void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
1667 if (!
Reg.isVirtual())
1670 (void) LiveRegs.insert(
Reg);
1674 void SIScheduleBlockScheduler::decreaseLiveRegs(
SIScheduleBlock *Block,
1675 std::set<unsigned> &Regs) {
1676 for (
unsigned Reg : Regs) {
1678 std::set<unsigned>::iterator Pos = LiveRegs.find(
Reg);
1679 assert (Pos != LiveRegs.end() &&
1680 LiveRegsConsumers.find(
Reg) != LiveRegsConsumers.end() &&
1681 LiveRegsConsumers[
Reg] >= 1);
1682 --LiveRegsConsumers[
Reg];
1683 if (LiveRegsConsumers[
Reg] == 0)
1684 LiveRegs.erase(Pos);
1688 void SIScheduleBlockScheduler::releaseBlockSuccs(
SIScheduleBlock *Parent) {
1689 for (
const auto &Block : Parent->
getSuccs()) {
1690 if (--BlockNumPredsLeft[
Block.first->getID()] == 0)
1691 ReadyBlocks.push_back(
Block.first);
1695 LastPosHighLatencyParentScheduled[
Block.first->getID()] = NumBlockScheduled;
1699 void SIScheduleBlockScheduler::blockScheduled(
SIScheduleBlock *Block) {
1700 decreaseLiveRegs(Block,
Block->getInRegs());
1701 addLiveRegs(
Block->getOutRegs());
1702 releaseBlockSuccs(Block);
1703 for (std::map<unsigned, unsigned>::iterator RegI =
1704 LiveOutRegsNumUsages[
Block->getID()].begin(),
1705 E = LiveOutRegsNumUsages[
Block->getID()].end(); RegI !=
E; ++RegI) {
1706 std::pair<unsigned, unsigned> RegP = *RegI;
1708 assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||
1709 LiveRegsConsumers[RegP.first] == 0);
1710 LiveRegsConsumers[RegP.first] += RegP.second;
1712 if (LastPosHighLatencyParentScheduled[
Block->getID()] >
1713 (
unsigned)LastPosWaitedHighLatency)
1714 LastPosWaitedHighLatency =
1715 LastPosHighLatencyParentScheduled[
Block->getID()];
1716 ++NumBlockScheduled;
1720 SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
1721 std::set<unsigned> &OutRegs) {
1722 std::vector<int> DiffSetPressure;
1727 if (!
Reg.isVirtual())
1729 if (LiveRegsConsumers[
Reg] > 1)
1732 for (; PSetI.
isValid(); ++PSetI) {
1733 DiffSetPressure[*PSetI] -= PSetI.
getWeight();
1739 if (!
Reg.isVirtual())
1742 for (; PSetI.
isValid(); ++PSetI) {
1743 DiffSetPressure[*PSetI] += PSetI.
getWeight();
1747 return DiffSetPressure;
1753 SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1754 SISchedulerBlockSchedulerVariant ScheduleVariant) {
1757 std::vector<SIScheduleBlock*> ScheduledBlocks;
1760 ScheduledBlocks =
Scheduler.getBlocks();
1762 for (
unsigned b = 0;
b < ScheduledBlocks.size(); ++
b) {
1764 std::vector<SUnit*>
SUs = Block->getScheduledUnits();
1788 void SIScheduleDAGMI::topologicalSort() {
1801 void SIScheduleDAGMI::moveLowLatencies() {
1802 unsigned DAGSize =
SUnits.size();
1803 int LastLowLatencyUser = -1;
1804 int LastLowLatencyPos = -1;
1806 for (
unsigned i = 0,
e = ScheduledSUnits.size();
i !=
e; ++
i) {
1808 bool IsLowLatencyUser =
false;
1809 unsigned MinPos = 0;
1814 IsLowLatencyUser =
true;
1818 unsigned PredPos = ScheduledSUnitsInv[Pred->
NodeNum];
1819 if (PredPos >= MinPos)
1820 MinPos = PredPos + 1;
1824 unsigned BestPos = LastLowLatencyUser + 1;
1825 if ((
int)BestPos <= LastLowLatencyPos)
1826 BestPos = LastLowLatencyPos + 1;
1827 if (BestPos < MinPos)
1830 for (
unsigned u =
i; u > BestPos; --u) {
1831 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1832 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1834 ScheduledSUnits[BestPos] = SU->
NodeNum;
1835 ScheduledSUnitsInv[SU->
NodeNum] = BestPos;
1837 LastLowLatencyPos = BestPos;
1838 if (IsLowLatencyUser)
1839 LastLowLatencyUser = BestPos;
1840 }
else if (IsLowLatencyUser) {
1841 LastLowLatencyUser =
i;
1845 bool CopyForLowLat =
false;
1851 CopyForLowLat =
true;
1857 for (
unsigned u =
i; u > MinPos; --u) {
1858 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1859 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1861 ScheduledSUnits[MinPos] = SU->
NodeNum;
1862 ScheduledSUnitsInv[SU->
NodeNum] = MinPos;
1869 for (
unsigned i = 0,
e =
SUnits.size();
i !=
e; ++
i) {
1870 SUnits[
i].isScheduled =
false;
1871 SUnits[
i].WeakPredsLeft = SUnitsLinksBackup[
i].WeakPredsLeft;
1872 SUnits[
i].NumPredsLeft = SUnitsLinksBackup[
i].NumPredsLeft;
1873 SUnits[
i].WeakSuccsLeft = SUnitsLinksBackup[
i].WeakSuccsLeft;
1874 SUnits[
i].NumSuccsLeft = SUnitsLinksBackup[
i].NumSuccsLeft;
1879 template<
typename _Iterator>
void
1881 unsigned &VgprUsage,
unsigned &SgprUsage) {
1884 for (_Iterator RegI =
First; RegI != End; ++RegI) {
1887 if (!
Reg.isVirtual())
1890 for (; PSetI.
isValid(); ++PSetI) {
1891 if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)
1893 else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)
1919 SUnitsLinksBackup =
SUnits;
1928 for (
unsigned i = 0,
e = (
unsigned)
SUnits.size();
i !=
e; ++
i) {
1934 bool OffsetIsScalable;
1935 if (SITII->getMemOperandWithOffset(*SU->
getInstr(), BaseLatOp, OffLatReg,
1936 OffsetIsScalable,
TRI))
1961 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1962 Temp =
Scheduler.scheduleVariant(v.first, v.second);
1982 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1983 Temp =
Scheduler.scheduleVariant(v.first, v.second);
1989 ScheduledSUnits = Best.
SUs;
1990 ScheduledSUnitsInv.resize(
SUnits.size());
1992 for (
unsigned i = 0,
e = (
unsigned)
SUnits.size();
i !=
e; ++
i) {
1993 ScheduledSUnitsInv[ScheduledSUnits[
i]] =
i;
2003 for (std::vector<unsigned>::iterator
I = ScheduledSUnits.begin(),
2004 E = ScheduledSUnits.end();
I !=
E; ++
I) {
2018 dbgs() <<
"*** Final schedule for "