22#define DEBUG_TYPE "machine-scheduler"
126 case NoCand:
return "NOCAND";
128 case Latency:
return "LATENCY";
130 case Depth:
return "DEPTH";
143 if (TryVal < CandVal) {
147 if (TryVal > CandVal) {
160 if (TryVal > CandVal) {
164 if (TryVal < CandVal) {
177 NodeNum2Index[SU->
NodeNum] = SUnits.size();
178 SUnits.push_back(SU);
182void SIScheduleBlock::traceCandidate(
const SISchedCandidate &Cand) {
189void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
190 SISchedCandidate &TryCand) {
192 if (!Cand.isValid()) {
197 if (Cand.SGPRUsage > 60 &&
218 Cand.HasLowLatencyNonWaitedParent,
226 if (TryCand.IsLowLatency &&
236 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
241SUnit* SIScheduleBlock::pickNode() {
242 SISchedCandidate TopCand;
244 for (
SUnit* SU : TopReadySUs) {
245 SISchedCandidate TryCand;
246 std::vector<unsigned> pressure;
247 std::vector<unsigned> MaxPressure;
251 TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];
252 TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];
255 TryCand.HasLowLatencyNonWaitedParent =
256 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
257 tryCandidateTopDown(TopCand, TryCand);
258 if (TryCand.Reason !=
NoCand)
259 TopCand.setBest(TryCand);
272 for (
SUnit* SU : SUnits) {
273 if (!SU->NumPredsLeft)
274 TopReadySUs.push_back(SU);
277 while (!TopReadySUs.empty()) {
278 SUnit *SU = TopReadySUs[0];
279 ScheduledSUnits.push_back(SU);
292 UI =
MRI->def_instr_begin(Reg),
293 UE =
MRI->def_instr_end(); UI != UE; ++UI) {
295 if (
MI->isDebugValue())
298 if (InstSlot >=
First && InstSlot <=
Last)
316 for (
SUnit* SU : ScheduledSUnits) {
317 RPTracker.setPos(SU->getInstr());
322 RPTracker.closeRegion();
325 TopRPTracker.
addLiveRegs(RPTracker.getPressure().LiveInRegs);
326 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
329 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
330 if (RegMaskPair.RegUnit.isVirtual())
331 LiveInRegs.insert(RegMaskPair.RegUnit);
356 for (
const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
358 if (
Reg.isVirtual() &&
362 LiveOutRegs.insert(Reg);
383 initRegPressure(BeginBlock, EndBlock);
390 for (
SUnit* SU : SUnits) {
391 if (!SU->NumPredsLeft)
392 TopReadySUs.push_back(SU);
395 while (!TopReadySUs.empty()) {
396 SUnit *SU = pickNode();
397 ScheduledSUnits.push_back(SU);
404 InternalAdditionalPressure.resize(TopPressure.
MaxSetPressure.size());
408 assert(SUnits.size() == ScheduledSUnits.size() &&
409 TopReadySUs.empty());
410 for (
SUnit* SU : SUnits) {
412 SU->NumPredsLeft == 0);
419void SIScheduleBlock::undoSchedule() {
420 for (
SUnit* SU : SUnits) {
421 SU->isScheduled =
false;
422 for (
SDep& Succ : SU->Succs) {
424 undoReleaseSucc(SU, &Succ);
427 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
428 ScheduledSUnits.clear();
432void SIScheduleBlock::undoReleaseSucc(
SUnit *SU,
SDep *SuccEdge) {
442void SIScheduleBlock::releaseSucc(
SUnit *SU,
SDep *SuccEdge) {
451 dbgs() <<
"*** Scheduling failed! ***\n";
453 dbgs() <<
" has been released too many times!\n";
462void SIScheduleBlock::releaseSuccessors(
SUnit *SU,
bool InOrOutBlock) {
472 releaseSucc(SU, &Succ);
474 TopReadySUs.push_back(SuccSU);
478void SIScheduleBlock::nodeScheduled(
SUnit *SU) {
481 std::vector<SUnit *>::iterator
I =
llvm::find(TopReadySUs, SU);
482 if (
I == TopReadySUs.end()) {
483 dbgs() <<
"Data Structure Bug in SI Scheduler\n";
486 TopReadySUs.erase(
I);
488 releaseSuccessors(SU,
true);
491 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->
NodeNum]])
492 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
496 std::map<unsigned, unsigned>::iterator
I =
498 if (
I != NodeNum2Index.end())
499 HasLowLatencyNonWaitedParent[
I->second] = 1;
507 for (
SUnit* SU : SUnits) {
508 releaseSuccessors(SU,
false);
510 HighLatencyBlock =
true;
512 HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
517 unsigned PredID = Pred->
getID();
521 if (PredID ==
P->getID())
524 Preds.push_back(Pred);
529 return PredID == S.first->getID();
531 "Loop in the Block Graph!");
536 unsigned SuccID = Succ->
getID();
539 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
540 if (SuccID == S.first->getID()) {
548 ++NumHighLatencySuccessors;
549 Succs.emplace_back(Succ, Kind);
553 "Loop in the Block Graph!");
558 dbgs() <<
"Block (" <<
ID <<
")\n";
562 dbgs() <<
"\nContains High Latency Instruction: "
563 << HighLatencyBlock <<
'\n';
564 dbgs() <<
"\nDepends On:\n";
566 P->printDebug(
false);
569 dbgs() <<
"\nSuccessors:\n";
570 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
572 dbgs() <<
"(Data Dep) ";
573 S.first->printDebug(
false);
577 dbgs() <<
"LiveInPressure "
578 << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
' '
579 << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
'\n';
580 dbgs() <<
"LiveOutPressure "
581 << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
' '
582 << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n\n";
583 dbgs() <<
"LiveIns:\n";
584 for (
unsigned Reg : LiveInRegs)
587 dbgs() <<
"\nLiveOuts:\n";
588 for (
unsigned Reg : LiveOutRegs)
592 dbgs() <<
"\nInstructions:\n";
593 for (
const SUnit* SU : SUnits)
596 dbgs() <<
"///////////////////////\n";
607 std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator
B =
608 Blocks.find(BlockVariant);
609 if (
B == Blocks.end()) {
611 createBlocksForVariant(BlockVariant);
613 scheduleInsideBlocks();
615 Res.
Blocks = CurrentBlocks;
618 Blocks[BlockVariant] = Res;
627 return CurrentBlocks[Node2CurrentBlock[SU->
NodeNum]]->getID() ==
ID;
630void SIScheduleBlockCreator::colorHighLatenciesAlone() {
631 unsigned DAGSize = DAG->
SUnits.size();
633 for (
unsigned i = 0, e = DAGSize; i != e; ++i) {
636 CurrentColoring[SU->
NodeNum] = NextReservedID++;
643 for (
const auto &PredDep : SU.
Preds) {
644 if (PredDep.getSUnit() == &FromSU &&
651void SIScheduleBlockCreator::colorHighLatenciesGroups() {
652 unsigned DAGSize = DAG->
SUnits.size();
653 unsigned NumHighLatencies = 0;
655 int Color = NextReservedID;
657 std::set<unsigned> FormingGroup;
659 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
665 if (NumHighLatencies == 0)
668 if (NumHighLatencies <= 6)
670 else if (NumHighLatencies <= 12)
678 unsigned CompatibleGroup =
true;
679 int ProposedColor = Color;
680 std::vector<int> AdditionalElements;
692 for (
unsigned j : FormingGroup) {
694 std::vector<int> SubGraph;
707 if (SubGraph.size() > 5) {
709 CompatibleGroup =
false;
713 for (
unsigned k : SubGraph) {
719 CurrentColoring[k] != 0)) {
720 CompatibleGroup =
false;
726 CompatibleGroup =
false;
730 if (!CompatibleGroup)
734 CompatibleGroup =
false;
744 if (CompatibleGroup) {
745 FormingGroup.insert(SU.
NodeNum);
746 for (
unsigned j : AdditionalElements)
747 CurrentColoring[
j] = ProposedColor;
748 CurrentColoring[SU.
NodeNum] = ProposedColor;
754 if (!CompatibleGroup) {
755 FormingGroup.clear();
756 Color = ++NextReservedID;
757 ProposedColor = Color;
758 FormingGroup.insert(SU.
NodeNum);
759 CurrentColoring[SU.
NodeNum] = ProposedColor;
761 }
else if (Count == GroupSize) {
762 FormingGroup.clear();
763 Color = ++NextReservedID;
764 ProposedColor = Color;
771void SIScheduleBlockCreator::colorComputeReservedDependencies() {
772 unsigned DAGSize = DAG->
SUnits.size();
773 std::map<std::set<unsigned>,
unsigned> ColorCombinations;
775 CurrentTopDownReservedDependencyColoring.clear();
776 CurrentBottomUpReservedDependencyColoring.clear();
778 CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
779 CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
786 std::set<unsigned> SUColors;
789 if (CurrentColoring[SU->
NodeNum]) {
790 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
799 if (CurrentTopDownReservedDependencyColoring[Pred->
NodeNum] > 0)
800 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->
NodeNum]);
803 if (SUColors.empty())
806 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
807 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
810 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
811 ColorCombinations.find(SUColors);
812 if (Pos != ColorCombinations.end()) {
813 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] = Pos->second;
815 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] =
817 ColorCombinations[SUColors] = NextNonReservedID++;
822 ColorCombinations.clear();
828 std::set<unsigned> SUColors;
831 if (CurrentColoring[SU->
NodeNum]) {
832 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
841 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0)
842 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum]);
845 if (SUColors.empty())
848 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
849 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
852 std::map<std::set<unsigned>,
unsigned>::iterator Pos =
853 ColorCombinations.find(SUColors);
854 if (Pos != ColorCombinations.end()) {
855 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] = Pos->second;
857 CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] =
859 ColorCombinations[SUColors] = NextNonReservedID++;
865void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
866 std::map<std::pair<unsigned, unsigned>,
unsigned> ColorCombinations;
872 std::pair<unsigned, unsigned> SUColors;
875 if (CurrentColoring[SU.
NodeNum])
878 SUColors.first = CurrentTopDownReservedDependencyColoring[SU.
NodeNum];
879 SUColors.second = CurrentBottomUpReservedDependencyColoring[SU.
NodeNum];
881 std::map<std::pair<unsigned, unsigned>,
unsigned>::iterator Pos =
882 ColorCombinations.find(SUColors);
883 if (Pos != ColorCombinations.end()) {
884 CurrentColoring[SU.
NodeNum] = Pos->second;
886 CurrentColoring[SU.
NodeNum] = NextNonReservedID;
887 ColorCombinations[SUColors] = NextNonReservedID++;
892void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
893 unsigned DAGSize = DAG->
SUnits.size();
894 std::vector<int> PendingColoring = CurrentColoring;
897 CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&
898 CurrentTopDownReservedDependencyColoring.size() == DAGSize);
907 std::set<unsigned> SUColors;
908 std::set<unsigned> SUColorsPending;
910 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
913 if (CurrentBottomUpReservedDependencyColoring[SU->
NodeNum] > 0 ||
914 CurrentTopDownReservedDependencyColoring[SU->
NodeNum] > 0)
921 if (CurrentBottomUpReservedDependencyColoring[Succ->
NodeNum] > 0 ||
922 CurrentTopDownReservedDependencyColoring[Succ->
NodeNum] > 0)
923 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
924 SUColorsPending.insert(PendingColoring[Succ->
NodeNum]);
929 if (SUColors.size() == 1 && SUColorsPending.size() == 1)
930 PendingColoring[SU->
NodeNum] = *SUColors.begin();
933 PendingColoring[SU->
NodeNum] = NextNonReservedID++;
935 CurrentColoring = PendingColoring;
939void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
940 unsigned DAGSize = DAG->
SUnits.size();
941 unsigned PreviousColor;
942 std::set<unsigned> SeenColors;
947 PreviousColor = CurrentColoring[0];
949 for (
unsigned i = 1, e = DAGSize; i !=
e; ++i) {
951 unsigned CurrentColor = CurrentColoring[i];
952 unsigned PreviousColorSave = PreviousColor;
955 if (CurrentColor != PreviousColor)
956 SeenColors.insert(PreviousColor);
957 PreviousColor = CurrentColor;
959 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
962 if (SeenColors.find(CurrentColor) == SeenColors.end())
965 if (PreviousColorSave != CurrentColor)
966 CurrentColoring[i] = NextNonReservedID++;
968 CurrentColoring[i] = CurrentColoring[i-1];
972void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
973 unsigned DAGSize = DAG->
SUnits.size();
977 std::set<unsigned> SUColors;
979 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
991 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
993 if (SUColors.size() == 1)
994 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
998void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
999 unsigned DAGSize = DAG->
SUnits.size();
1003 std::set<unsigned> SUColors;
1005 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1012 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1014 if (SUColors.size() == 1)
1015 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1019void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
1020 unsigned DAGSize = DAG->
SUnits.size();
1024 std::set<unsigned> SUColors;
1026 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1033 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1035 if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
1036 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1040void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
1041 unsigned DAGSize = DAG->
SUnits.size();
1042 std::map<unsigned, unsigned> ColorCount;
1046 unsigned color = CurrentColoring[SU->
NodeNum];
1047 ++ColorCount[color];
1052 unsigned color = CurrentColoring[SU->
NodeNum];
1053 std::set<unsigned> SUColors;
1055 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1058 if (ColorCount[color] > 1)
1065 SUColors.insert(CurrentColoring[Succ->
NodeNum]);
1067 if (SUColors.size() == 1 && *SUColors.begin() != color) {
1068 --ColorCount[color];
1069 CurrentColoring[SU->
NodeNum] = *SUColors.begin();
1070 ++ColorCount[*SUColors.begin()];
1075void SIScheduleBlockCreator::cutHugeBlocks() {
1079void SIScheduleBlockCreator::regroupNoUserInstructions() {
1080 unsigned DAGSize = DAG->
SUnits.size();
1081 int GroupID = NextNonReservedID++;
1085 bool hasSuccessor =
false;
1087 if (CurrentColoring[SU->
NodeNum] <= (
int)DAGSize)
1094 hasSuccessor =
true;
1097 CurrentColoring[SU->
NodeNum] = GroupID;
1101void SIScheduleBlockCreator::colorExports() {
1102 unsigned ExportColor = NextNonReservedID++;
1127 "SUnit unexpectedly not representing an instruction!");
1143 for (
unsigned j : ExpGroup)
1144 CurrentColoring[
j] = ExportColor;
1148 unsigned DAGSize = DAG->
SUnits.size();
1149 std::map<unsigned,unsigned> RealID;
1151 CurrentBlocks.clear();
1152 CurrentColoring.clear();
1153 CurrentColoring.resize(DAGSize, 0);
1154 Node2CurrentBlock.clear();
1160 NextNonReservedID = DAGSize + 1;
1165 colorHighLatenciesGroups();
1167 colorHighLatenciesAlone();
1168 colorComputeReservedDependencies();
1169 colorAccordingToReservedDependencies();
1170 colorEndsAccordingToDependencies();
1172 colorForceConsecutiveOrderInGroup();
1173 regroupNoUserInstructions();
1174 colorMergeConstantLoadsNextGroup();
1175 colorMergeIfPossibleNextGroupOnlyForReserved();
1179 Node2CurrentBlock.resize(DAGSize, -1);
1180 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1182 unsigned Color = CurrentColoring[SU->
NodeNum];
1183 if (RealID.find(Color) == RealID.end()) {
1184 int ID = CurrentBlocks.size();
1185 BlockPtrs.push_back(std::make_unique<SIScheduleBlock>(DAG,
this,
ID));
1186 CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1189 CurrentBlocks[RealID[Color]]->addUnit(SU);
1190 Node2CurrentBlock[SU->
NodeNum] = RealID[Color];
1194 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1196 int SUID = Node2CurrentBlock[i];
1201 if (Node2CurrentBlock[Succ->
NodeNum] != SUID)
1202 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->
NodeNum]],
1209 if (Node2CurrentBlock[Pred->
NodeNum] != SUID)
1210 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->
NodeNum]]);
1216 Block->finalizeUnits();
1218 dbgs() <<
"Blocks created:\n\n";
1220 Block->printDebug(
true);
1230 for (;
I !=
End; ++
I) {
1231 if (!
I->isDebugInstr())
1237void SIScheduleBlockCreator::topologicalSort() {
1238 unsigned DAGSize = CurrentBlocks.size();
1239 std::vector<int> WorkList;
1243 WorkList.reserve(DAGSize);
1244 TopDownIndex2Block.resize(DAGSize);
1245 TopDownBlock2Index.resize(DAGSize);
1246 BottomUpIndex2Block.resize(DAGSize);
1248 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1250 unsigned Degree =
Block->getSuccs().size();
1251 TopDownBlock2Index[i] = Degree;
1253 WorkList.push_back(i);
1258 while (!WorkList.empty()) {
1259 int i = WorkList.back();
1261 WorkList.pop_back();
1262 TopDownBlock2Index[i] = --
Id;
1263 TopDownIndex2Block[
Id] = i;
1265 if (!--TopDownBlock2Index[Pred->getID()])
1266 WorkList.push_back(Pred->getID());
1272 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1275 assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&
1276 "Wrong Top Down topological sorting");
1281 BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
1282 TopDownIndex2Block.rend());
1285void SIScheduleBlockCreator::scheduleInsideBlocks() {
1286 unsigned DAGSize = CurrentBlocks.size();
1292 LLVM_DEBUG(
dbgs() <<
"First phase: Fast scheduling for Reg Liveness\n");
1293 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1295 Block->fastSchedule();
1303 std::vector<MachineBasicBlock::iterator> PosOld;
1304 std::vector<MachineBasicBlock::iterator> PosNew;
1305 PosOld.reserve(DAG->
SUnits.size());
1306 PosNew.reserve(DAG->
SUnits.size());
1308 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1309 int BlockIndice = TopDownIndex2Block[i];
1311 std::vector<SUnit*> SUs =
Block->getScheduledUnits();
1313 for (
SUnit* SU : SUs) {
1316 PosOld.push_back(Pos);
1317 if (&*CurrentTopFastSched ==
MI) {
1318 PosNew.push_back(Pos);
1319 CurrentTopFastSched =
nextIfDebug(++CurrentTopFastSched,
1331 PosNew.push_back(CurrentTopFastSched);
1340 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1342 std::vector<SUnit*> SUs =
Block->getScheduledUnits();
1343 Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
1348 for (
unsigned i = PosOld.size(), e = 0; i != e; --i) {
1362 Block->printDebug(
true);
1366void SIScheduleBlockCreator::fillStats() {
1367 unsigned DAGSize = CurrentBlocks.size();
1369 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1370 int BlockIndice = TopDownIndex2Block[i];
1372 if (
Block->getPreds().empty())
1377 if (Depth < Pred->
Depth + Pred->getCost())
1378 Depth = Pred->Depth + Pred->getCost();
1384 for (
unsigned i = 0, e = DAGSize; i !=
e; ++i) {
1385 int BlockIndice = BottomUpIndex2Block[i];
1387 if (
Block->getSuccs().empty())
1390 unsigned Height = 0;
1391 for (
const auto &Succ :
Block->getSuccs())
1392 Height = std::max(Height, Succ.first->Height + Succ.first->getCost());
1393 Block->Height = Height;
1403 DAG(DAG), Variant(Variant),
Blocks(BlocksStruct.
Blocks),
1404 LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1405 SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1417 LiveOutRegsNumUsages.resize(Blocks.size());
1419 for (
unsigned Reg :
Block->getInRegs()) {
1423 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1424 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1426 if (RegPos != PredOutRegs.end()) {
1438 ++LiveOutRegsNumUsages[PredID][Reg];
1442 LastPosHighLatencyParentScheduled.resize(
Blocks.size(), 0);
1443 BlockNumPredsLeft.resize(
Blocks.size());
1444 BlockNumSuccsLeft.resize(
Blocks.size());
1446 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1448 BlockNumPredsLeft[i] =
Block->getPreds().size();
1449 BlockNumSuccsLeft[i] =
Block->getSuccs().size();
1453 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1459 std::set<unsigned> InRegs = DAG->
getInRegs();
1460 addLiveRegs(InRegs);
1466 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1470 const std::set<unsigned> &OutRegs =
Block->getOutRegs();
1472 if (OutRegs.find(Reg) == OutRegs.end())
1475 ++LiveOutRegsNumUsages[
ID][Reg];
1483 for (
unsigned Reg :
Block->getInRegs()) {
1486 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1487 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1489 if (RegPos != PredOutRegs.end()) {
1496 ++LiveRegsConsumers[Reg];
1500 for (
unsigned i = 0, e =
Blocks.size(); i != e; ++i) {
1502 if (BlockNumPredsLeft[i] == 0) {
1503 ReadyBlocks.push_back(
Block);
1508 BlocksScheduled.push_back(
Block);
1509 blockScheduled(
Block);
1513 : BlocksScheduled) {
1518bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1519 SIBlockSchedCandidate &TryCand) {
1520 if (!Cand.isValid()) {
1527 Cand.LastPosHighLatParentScheduled, TryCand, Cand,
Latency))
1534 TryCand, Cand,
Depth))
1537 Cand.NumHighLatencySuccessors,
1543bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1544 SIBlockSchedCandidate &TryCand) {
1545 if (!Cand.isValid()) {
1554 Cand.NumSuccessors > 0,
1566 SIBlockSchedCandidate Cand;
1567 std::vector<SIScheduleBlock*>::iterator Best;
1569 if (ReadyBlocks.empty())
1573 VregCurrentUsage, SregCurrentUsage);
1574 if (VregCurrentUsage > maxVregUsage)
1575 maxVregUsage = VregCurrentUsage;
1576 if (SregCurrentUsage > maxSregUsage)
1577 maxSregUsage = SregCurrentUsage;
1580 : ReadyBlocks)
dbgs()
1581 <<
Block->getID() <<
' ';
1582 dbgs() <<
"\nCurrent Live:\n";
1587 dbgs() <<
"Current VGPRs: " << VregCurrentUsage <<
'\n';
1588 dbgs() <<
"Current SGPRs: " << SregCurrentUsage <<
'\n';);
1590 Cand.Block =
nullptr;
1591 for (std::vector<SIScheduleBlock*>::iterator
I = ReadyBlocks.begin(),
1592 E = ReadyBlocks.end();
I != E; ++
I) {
1593 SIBlockSchedCandidate TryCand;
1595 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1596 TryCand.VGPRUsageDiff =
1597 checkRegUsageImpact(TryCand.Block->getInRegs(),
1598 TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];
1599 TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1600 TryCand.NumHighLatencySuccessors =
1601 TryCand.Block->getNumHighLatencySuccessors();
1602 TryCand.LastPosHighLatParentScheduled =
1603 (
unsigned int) std::max<int> (0,
1604 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1605 LastPosWaitedHighLatency);
1606 TryCand.Height = TryCand.Block->Height;
1608 if (VregCurrentUsage > 120 ||
1610 if (!tryCandidateRegUsage(Cand, TryCand) &&
1612 tryCandidateLatency(Cand, TryCand);
1614 if (!tryCandidateLatency(Cand, TryCand))
1615 tryCandidateRegUsage(Cand, TryCand);
1617 if (TryCand.Reason !=
NoCand) {
1618 Cand.setBest(TryCand);
1620 LLVM_DEBUG(
dbgs() <<
"Best Current Choice: " << Cand.Block->getID() <<
' '
1626 dbgs() <<
"Is a block with high latency instruction: "
1627 << (Cand.IsHighLatency ?
"yes\n" :
"no\n");
1628 dbgs() <<
"Position of last high latency dependency: "
1629 << Cand.LastPosHighLatParentScheduled <<
'\n';
1630 dbgs() <<
"VGPRUsageDiff: " << Cand.VGPRUsageDiff <<
'\n';
1634 ReadyBlocks.erase(Best);
1640void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
1643 if (!
Reg.isVirtual())
1646 (void) LiveRegs.insert(Reg);
1651 std::set<unsigned> &Regs) {
1652 for (
unsigned Reg : Regs) {
1654 std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
1655 assert (Pos != LiveRegs.end() &&
1656 LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&
1657 LiveRegsConsumers[Reg] >= 1);
1658 --LiveRegsConsumers[
Reg];
1659 if (LiveRegsConsumers[Reg] == 0)
1660 LiveRegs.erase(Pos);
1664void SIScheduleBlockScheduler::releaseBlockSuccs(
SIScheduleBlock *Parent) {
1666 if (--BlockNumPredsLeft[
Block.first->getID()] == 0)
1667 ReadyBlocks.push_back(
Block.first);
1671 LastPosHighLatencyParentScheduled[
Block.first->getID()] = NumBlockScheduled;
1677 addLiveRegs(
Block->getOutRegs());
1678 releaseBlockSuccs(
Block);
1679 for (
const auto &RegP : LiveOutRegsNumUsages[
Block->getID()]) {
1681 assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||
1682 LiveRegsConsumers[RegP.first] == 0);
1683 LiveRegsConsumers[RegP.first] += RegP.second;
1685 if (LastPosHighLatencyParentScheduled[
Block->getID()] >
1686 (
unsigned)LastPosWaitedHighLatency)
1687 LastPosWaitedHighLatency =
1688 LastPosHighLatencyParentScheduled[
Block->getID()];
1689 ++NumBlockScheduled;
1693SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
1694 std::set<unsigned> &OutRegs) {
1695 std::vector<int> DiffSetPressure;
1700 if (!
Reg.isVirtual())
1702 if (LiveRegsConsumers[Reg] > 1)
1705 for (; PSetI.
isValid(); ++PSetI) {
1706 DiffSetPressure[*PSetI] -= PSetI.
getWeight();
1712 if (!
Reg.isVirtual())
1715 for (; PSetI.
isValid(); ++PSetI) {
1716 DiffSetPressure[*PSetI] += PSetI.
getWeight();
1720 return DiffSetPressure;
1726SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1727 SISchedulerBlockSchedulerVariant ScheduleVariant) {
1730 std::vector<SIScheduleBlock*> ScheduledBlocks;
1733 ScheduledBlocks =
Scheduler.getBlocks();
1736 std::vector<SUnit*>
SUs =
Block->getScheduledUnits();
1760void SIScheduleDAGMI::topologicalSort() {
1773void SIScheduleDAGMI::moveLowLatencies() {
1774 unsigned DAGSize =
SUnits.size();
1775 int LastLowLatencyUser = -1;
1776 int LastLowLatencyPos = -1;
1778 for (
unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
1780 bool IsLowLatencyUser =
false;
1781 unsigned MinPos = 0;
1786 IsLowLatencyUser =
true;
1790 unsigned PredPos = ScheduledSUnitsInv[Pred->
NodeNum];
1791 if (PredPos >= MinPos)
1792 MinPos = PredPos + 1;
1796 unsigned BestPos = LastLowLatencyUser + 1;
1797 if ((
int)BestPos <= LastLowLatencyPos)
1798 BestPos = LastLowLatencyPos + 1;
1799 if (BestPos < MinPos)
1802 for (
unsigned u = i;
u > BestPos; --
u) {
1803 ++ScheduledSUnitsInv[ScheduledSUnits[
u-1]];
1804 ScheduledSUnits[
u] = ScheduledSUnits[
u-1];
1806 ScheduledSUnits[BestPos] = SU->
NodeNum;
1807 ScheduledSUnitsInv[SU->
NodeNum] = BestPos;
1809 LastLowLatencyPos = BestPos;
1810 if (IsLowLatencyUser)
1811 LastLowLatencyUser = BestPos;
1812 }
else if (IsLowLatencyUser) {
1813 LastLowLatencyUser = i;
1817 bool CopyForLowLat =
false;
1823 CopyForLowLat =
true;
1829 for (
unsigned u = i;
u > MinPos; --
u) {
1830 ++ScheduledSUnitsInv[ScheduledSUnits[
u-1]];
1831 ScheduledSUnits[
u] = ScheduledSUnits[
u-1];
1833 ScheduledSUnits[MinPos] = SU->
NodeNum;
1834 ScheduledSUnitsInv[SU->
NodeNum] = MinPos;
1841 for (
unsigned i = 0, e =
SUnits.size(); i != e; ++i) {
1842 SUnits[i].isScheduled =
false;
1843 SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
1844 SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
1845 SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
1846 SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
1851template<
typename _Iterator>
void
1853 unsigned &VgprUsage,
unsigned &SgprUsage) {
1856 for (_Iterator RegI =
First; RegI !=
End; ++RegI) {
1859 if (!
Reg.isVirtual())
1862 for (; PSetI.
isValid(); ++PSetI) {
1863 if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)
1865 else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)
1897 SUnitsLinksBackup =
SUnits;
1906 for (
unsigned i = 0, e = (
unsigned)
SUnits.size(); i != e; ++i) {
1912 bool OffsetIsScalable;
1913 if (SITII->getMemOperandWithOffset(*SU->
getInstr(), BaseLatOp, OffLatReg,
1914 OffsetIsScalable,
TRI))
1939 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1940 Temp =
Scheduler.scheduleVariant(v.first, v.second);
1960 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1961 Temp =
Scheduler.scheduleVariant(v.first, v.second);
1967 ScheduledSUnits = Best.
SUs;
1968 ScheduledSUnitsInv.resize(
SUnits.size());
1970 for (
unsigned i = 0, e = (
unsigned)
SUnits.size(); i != e; ++i) {
1971 ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
1981 for (
unsigned I : ScheduledSUnits) {
1995 dbgs() <<
"*** Final schedule for "
unsigned const MachineRegisterInfo * MRI
Provides AMDGPU specific target descriptions.
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DenseMap< Block *, BlockRelaxAux > Blocks
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
Interface definition for SIInstrInfo.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isDefBetween(unsigned Reg, SlotIndex First, SlotIndex Last, const MachineRegisterInfo *MRI, const LiveIntervals *LIS)
static const char * getReasonStr(SIScheduleCandReason Reason)
static bool hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU)
SI Machine Scheduler interface.
GenericScheduler shrinks the unscheduled zone using heuristics to balance the schedule.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
MachineOperand class - Representation of each machine instruction operand.
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
PSetIterator getPressureSets(Register RegUnit) const
Get an iterator over the pressure sets affected by the given physical or virtual register.
Iterate over the pressure sets affected by the given physical or virtual register.
unsigned getWeight() const
Track the current register pressure at some position in the instruction stream, and remember the high...
void setPos(MachineBasicBlock::const_iterator Pos)
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
void closeTop()
Set the boundary for the top of the region and summarize live ins.
void advance()
Advance across the current instruction.
void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
Wrapper class representing virtual and physical registers.
@ Data
Regular data dependence (aka true-dependence).
bool isWeak() const
Tests if this a weak dependence.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
static bool isEXP(const MachineInstr &MI)
bool isHighLatencyDef(int Opc) const override
bool isLowLatencyInstruction(const MachineInstr &MI) const
bool isSUInBlock(SUnit *SU, unsigned ID)
SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
SIScheduleBlocks getBlocks(SISchedulerBlockCreatorVariant BlockVariant)
SIScheduleBlockScheduler(SIScheduleDAGMI *DAG, SISchedulerBlockSchedulerVariant Variant, SIScheduleBlocks BlocksStruct)
ArrayRef< std::pair< SIScheduleBlock *, SIScheduleBlockLinkKind > > getSuccs() const
void addPred(SIScheduleBlock *Pred)
void printDebug(bool Full)
void addSucc(SIScheduleBlock *Succ, SIScheduleBlockLinkKind Kind)
void schedule(MachineBasicBlock::iterator BeginBlock, MachineBasicBlock::iterator EndBlock)
void addUnit(SUnit *SU)
Functions for Block construction.
bool isHighLatencyBlock()
void restoreSULinksLeft()
std::vector< int > BottomUpIndex2SU
std::vector< unsigned > IsHighLatencySU
std::vector< unsigned > LowLatencyOffset
std::vector< int > TopDownIndex2SU
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGTopologicalSort * GetTopo()
void fillVgprSgprCost(_Iterator First, _Iterator End, unsigned &VgprUsage, unsigned &SgprUsage)
MachineRegisterInfo * getMRI()
SIScheduleDAGMI(MachineSchedContext *C)
MachineBasicBlock::iterator getCurrentBottom()
std::vector< unsigned > IsLowLatencySU
MachineBasicBlock::iterator getCurrentTop()
std::set< unsigned > getInRegs()
MachineBasicBlock * getBB()
void initRPTracker(RegPressureTracker &RPTracker)
std::set< unsigned > getOutRegs()
~SIScheduleDAGMI() override
const TargetRegisterInfo * getTRI()
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
bool isScheduled
True once scheduled.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries.
void dumpNode(const SUnit &SU) const override
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
void dump() const override
RegPressureTracker TopRPTracker
void dumpSchedule() const
dump the scheduled Sequence.
std::unique_ptr< MachineSchedStrategy > SchedImpl
void postProcessDAG()
Apply each ScheduleDAGMutation step in order.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
MachineBasicBlock::iterator CurrentTop
The top of the unscheduled zone.
std::vector< int > GetSubGraph(const SUnit &StartSU, const SUnit &TargetSU, bool &Success)
Returns an array of SUs that are both in the successor subtree of StartSU and in the predecessor subt...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
reverse_iterator rbegin()
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SlotIndex - An opaque wrapper around machine indexes.
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual unsigned getNumRegPressureSets() const =0
Get the number of dimensions of register pressure.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
static bool tryGreater(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)
static bool tryLess(int TryVal, int CandVal, SISchedulerCandidate &TryCand, SISchedulerCandidate &Cand, SIScheduleCandReason Reason)
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > PrintDAGs
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
SISchedulerBlockSchedulerVariant
cl::opt< bool > ViewMISchedDAGs
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SISchedulerBlockCreatorVariant
@ LatenciesAlonePlusConsecutive
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
auto max_element(R &&Range)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Implement std::hash so that hash_code can be used in STL containers.
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
std::vector< unsigned > SUs
std::vector< int > TopDownIndex2Block
std::vector< SIScheduleBlock * > Blocks
std::vector< int > TopDownBlock2Index
SIScheduleCandReason Reason
void setRepeat(SIScheduleCandReason R)