45#define DEBUG_TYPE "machine-scheduler"
50 "amdgpu-disable-unclustered-high-rp-reschedule",
cl::Hidden,
51 cl::desc(
"Disable unclustered high register pressure "
52 "reduction scheduling stage."),
56 "amdgpu-disable-clustered-low-occupancy-reschedule",
cl::Hidden,
57 cl::desc(
"Disable clustered low occupancy "
58 "rescheduling for ILP scheduling stage."),
64 "Sets the bias which adds weight to occupancy vs latency. Set it to "
65 "100 to chase the occupancy only."),
70 cl::desc(
"Relax occupancy targets for kernels which are memory "
71 "bound (amdgpu-membound-threshold), or "
72 "Wave Limited (amdgpu-limit-wave-threshold)."),
77 cl::desc(
"Use the AMDGPU specific RPTrackers during scheduling"),
81 "amdgpu-scheduler-pending-queue-limit",
cl::Hidden,
83 "Max (Available+Pending) size to inspect pending queue (0 disables)"),
86#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
87#define DUMP_MAX_REG_PRESSURE
89 "amdgpu-print-max-reg-pressure-regusage-before-scheduler",
cl::Hidden,
90 cl::desc(
"Print a list of live registers along with their def/uses at the "
91 "point of maximum register pressure before scheduling."),
95 "amdgpu-print-max-reg-pressure-regusage-after-scheduler",
cl::Hidden,
96 cl::desc(
"Print a list of live registers along with their def/uses at the "
97 "point of maximum register pressure after scheduling."),
102 "amdgpu-disable-rewrite-mfma-form-sched-stage",
cl::Hidden,
122 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
124 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
146 "VGPRCriticalLimit calculation method.\n");
150 unsigned Addressable =
153 VGPRBudget = std::max(VGPRBudget, Granule);
191 if (!
Op.isReg() ||
Op.isImplicit())
193 if (
Op.getReg().isPhysical() ||
194 (
Op.isDef() &&
Op.getSubReg() != AMDGPU::NoSubRegister))
229 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
237 if (!Zone.
isTop() || !SU)
254 if (NextAvail > CurrCycle)
255 Stall = std::max(
Stall, NextAvail - CurrCycle);
275 unsigned SGPRPressure,
276 unsigned VGPRPressure,
bool IsBottomUp) {
280 if (!
DAG->isTrackingPressure())
303 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
304 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
306 for (
const auto &Diff :
DAG->getPressureDiff(SU)) {
312 (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
315#ifdef EXPENSIVE_CHECKS
316 std::vector<unsigned> CheckPressure, CheckMaxPressure;
319 if (
Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
320 CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
321 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
322 CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
323 errs() <<
"Register Pressure is inaccurate when calculated through "
325 <<
"SGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::SReg_32]
327 << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
"\n"
328 <<
"VGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
330 << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n";
336 unsigned NewSGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
337 unsigned NewVGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
347 const unsigned MaxVGPRPressureInc = 16;
348 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >=
VGPRExcessLimit;
349 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >=
SGPRExcessLimit;
380 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
382 if (SGPRDelta > VGPRDelta) {
396 bool HasBufferedModel =
415 dbgs() <<
"Prefer:\t\t";
416 DAG->dumpNode(*Preferred.
SU);
420 DAG->dumpNode(*Current.
SU);
423 dbgs() <<
"Reason:\t\t";
437 unsigned SGPRPressure = 0;
438 unsigned VGPRPressure = 0;
440 if (
DAG->isTrackingPressure()) {
442 SGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
443 VGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
448 SGPRPressure =
T->getPressure().getSGPRNum();
449 VGPRPressure =
T->getPressure().getArchVGPRNum();
454 for (
SUnit *SU : AQ) {
458 VGPRPressure, IsBottomUp);
478 for (
SUnit *SU : PQ) {
482 VGPRPressure, IsBottomUp);
502 bool &PickedPending) {
522 bool BotPending =
false;
542 "Last pick result should correspond to re-picking right now");
547 bool TopPending =
false;
567 "Last pick result should correspond to re-picking right now");
577 PickedPending = BotPending && TopPending;
580 if (BotPending || TopPending) {
587 Cand.setBest(TryCand);
592 IsTopNode = Cand.AtTop;
599 if (
DAG->top() ==
DAG->bottom()) {
601 Bot.Available.empty() &&
Bot.Pending.empty() &&
"ReadyQ garbage");
607 PickedPending =
false;
641 if (ReadyCycle > CurrentCycle)
713 if (
DAG->isTrackingPressure() &&
719 if (
DAG->isTrackingPressure() &&
724 bool SameBoundary = Zone !=
nullptr;
748 if (IsLegacyScheduler)
767 if (
DAG->isTrackingPressure() &&
777 bool SameBoundary = Zone !=
nullptr;
812 bool CandIsClusterSucc =
814 bool TryCandIsClusterSucc =
816 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
821 if (
DAG->isTrackingPressure() &&
827 if (
DAG->isTrackingPressure() &&
873 if (
DAG->isTrackingPressure()) {
889 bool CandIsClusterSucc =
891 bool TryCandIsClusterSucc =
893 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
902 bool SameBoundary = Zone !=
nullptr;
919 if (TryMayLoad || CandMayLoad) {
920 bool TryLongLatency =
922 bool CandLongLatency =
926 Zone->
isTop() ? CandLongLatency : TryLongLatency, TryCand,
944 if (
DAG->isTrackingPressure() &&
963 !
Rem.IsAcyclicLatencyLimited &&
tryLatency(TryCand, Cand, *Zone))
981 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),
982 RegionLiveOuts(this,
true) {
988 LLVM_DEBUG(
dbgs() <<
"Starting occupancy is " << StartingOccupancy <<
".\n");
990 MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);
991 if (MinOccupancy != StartingOccupancy)
992 LLVM_DEBUG(
dbgs() <<
"Allowing Occupancy drops to " << MinOccupancy
997std::unique_ptr<GCNSchedStage>
999 switch (SchedStageID) {
1001 return std::make_unique<OccInitialScheduleStage>(SchedStageID, *
this);
1003 return std::make_unique<RewriteMFMAFormStage>(SchedStageID, *
this);
1005 return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *
this);
1007 return std::make_unique<ClusteredLowOccStage>(SchedStageID, *
this);
1009 return std::make_unique<PreRARematStage>(SchedStageID, *
this);
1011 return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *
this);
1013 return std::make_unique<MemoryClauseInitialScheduleStage>(SchedStageID,
1027GCNScheduleDAGMILive::getRealRegPressure(
unsigned RegionIdx)
const {
1028 if (Regions[RegionIdx].first == Regions[RegionIdx].second)
1032 &LiveIns[RegionIdx]);
1038 assert(RegionBegin != RegionEnd &&
"Region must not be empty");
1042void GCNScheduleDAGMILive::computeBlockPressure(
unsigned RegionIdx,
1054 const MachineBasicBlock *OnlySucc =
nullptr;
1057 if (!Candidate->empty() && Candidate->pred_size() == 1) {
1058 SlotIndexes *Ind =
LIS->getSlotIndexes();
1060 OnlySucc = Candidate;
1065 size_t CurRegion = RegionIdx;
1066 for (
size_t E = Regions.size(); CurRegion !=
E; ++CurRegion)
1067 if (Regions[CurRegion].first->getParent() !=
MBB)
1072 auto LiveInIt = MBBLiveIns.find(
MBB);
1073 auto &Rgn = Regions[CurRegion];
1075 if (LiveInIt != MBBLiveIns.end()) {
1076 auto LiveIn = std::move(LiveInIt->second);
1078 MBBLiveIns.erase(LiveInIt);
1081 auto LRS = BBLiveInMap.lookup(NonDbgMI);
1082#ifdef EXPENSIVE_CHECKS
1091 if (Regions[CurRegion].first ==
I || NonDbgMI ==
I) {
1092 LiveIns[CurRegion] =
RPTracker.getLiveRegs();
1096 if (Regions[CurRegion].second ==
I) {
1097 Pressure[CurRegion] =
RPTracker.moveMaxPressure();
1098 if (CurRegion-- == RegionIdx)
1100 auto &Rgn = Regions[CurRegion];
1113 MBBLiveIns[OnlySucc] =
RPTracker.moveLiveRegs();
1118GCNScheduleDAGMILive::getRegionLiveInMap()
const {
1119 assert(!Regions.empty());
1120 std::vector<MachineInstr *> RegionFirstMIs;
1121 RegionFirstMIs.reserve(Regions.size());
1123 RegionFirstMIs.push_back(
1130GCNScheduleDAGMILive::getRegionLiveOutMap()
const {
1131 assert(!Regions.empty());
1132 std::vector<MachineInstr *> RegionLastMIs;
1133 RegionLastMIs.reserve(Regions.size());
1144 IdxToInstruction.clear();
1147 IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();
1148 for (
unsigned I = 0;
I < DAG->Regions.size();
I++) {
1149 auto &[RegionBegin, RegionEnd] = DAG->Regions[
I];
1151 if (RegionBegin == RegionEnd)
1155 IdxToInstruction[
I] = RegionKey;
1163 LiveIns.resize(Regions.size());
1164 Pressure.resize(Regions.size());
1165 RegionsWithHighRP.resize(Regions.size());
1166 RegionsWithExcessRP.resize(Regions.size());
1167 RegionsWithIGLPInstrs.resize(Regions.size());
1168 RegionsWithHighRP.reset();
1169 RegionsWithExcessRP.reset();
1170 RegionsWithIGLPInstrs.reset();
1175void GCNScheduleDAGMILive::runSchedStages() {
1176 LLVM_DEBUG(
dbgs() <<
"All regions recorded, starting actual scheduling.\n");
1179 if (!Regions.
empty()) {
1180 BBLiveInMap = getRegionLiveInMap();
1185#ifdef DUMP_MAX_REG_PRESSURE
1195 if (!Stage->initGCNSchedStage())
1198 for (
auto Region : Regions) {
1202 if (!Stage->initGCNRegion()) {
1203 Stage->advanceRegion();
1212 &LiveIns[Stage->getRegionIdx()];
1214 reinterpret_cast<GCNRPTracker *
>(DownwardTracker)
1215 ->reset(
MRI, *RegionLiveIns);
1216 reinterpret_cast<GCNRPTracker *
>(UpwardTracker)
1217 ->reset(
MRI, RegionLiveOuts.getLiveRegsForRegionIdx(
1218 Stage->getRegionIdx()));
1222 Stage->finalizeGCNRegion();
1223 Stage->advanceRegion();
1227 Stage->finalizeGCNSchedStage();
1230#ifdef DUMP_MAX_REG_PRESSURE
1243 OS <<
"Max Occupancy Initial Schedule";
1246 OS <<
"Instruction Rewriting Reschedule";
1249 OS <<
"Unclustered High Register Pressure Reschedule";
1252 OS <<
"Clustered Low Occupancy Reschedule";
1255 OS <<
"Pre-RA Rematerialize";
1258 OS <<
"Max ILP Initial Schedule";
1261 OS <<
"Max memory clause Initial Schedule";
1281void RewriteMFMAFormStage::findReachingDefs(
1303 while (!Worklist.
empty()) {
1318 for (MachineBasicBlock *PredMBB : DefMBB->
predecessors()) {
1319 if (Visited.
insert(PredMBB).second)
1325void RewriteMFMAFormStage::findReachingUses(
1329 for (MachineOperand &UseMO :
1332 findReachingDefs(UseMO, LIS, ReachingDefIndexes);
1336 if (
any_of(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
1348 if (!
ST.hasGFX90AInsts() ||
MFI.getMinWavesPerEU() > 1)
1351 RegionsWithExcessArchVGPR.resize(
DAG.Regions.size());
1352 RegionsWithExcessArchVGPR.reset();
1356 RegionsWithExcessArchVGPR[
Region] =
true;
1359 if (RegionsWithExcessArchVGPR.none())
1362 TII =
ST.getInstrInfo();
1363 SRI =
ST.getRegisterInfo();
1365 std::vector<std::pair<MachineInstr *, unsigned>> RewriteCands;
1369 if (!initHeuristics(RewriteCands, CopyForUse, CopyForDef))
1372 int64_t
Cost = getRewriteCost(RewriteCands, CopyForUse, CopyForDef);
1379 return rewrite(RewriteCands);
1389 if (
DAG.RegionsWithHighRP.none() &&
DAG.RegionsWithExcessRP.none())
1396 InitialOccupancy =
DAG.MinOccupancy;
1399 TempTargetOccupancy =
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy
1400 ? InitialOccupancy + 1
1402 IsAnyRegionScheduled =
false;
1403 S.SGPRLimitBias =
S.HighRPSGPRBias;
1404 S.VGPRLimitBias =
S.HighRPVGPRBias;
1408 <<
"Retrying function scheduling without clustering. "
1409 "Aggressively try to reduce register pressure to achieve occupancy "
1410 << TempTargetOccupancy <<
".\n");
1425 if (
DAG.StartingOccupancy <=
DAG.MinOccupancy)
1429 dbgs() <<
"Retrying function scheduling with lowest recorded occupancy "
1430 <<
DAG.MinOccupancy <<
".\n");
1435#define REMAT_PREFIX "[PreRARemat] "
1436#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
1438#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1439Printable PreRARematStage::ScoredRemat::print()
const {
1441 OS <<
'(' << MaxFreq <<
", " << FreqDiff <<
", " << RegionImpact <<
')';
1462 const unsigned NumRegions =
DAG.Regions.size();
1463 for (
unsigned I = 0;
I < NumRegions; ++
I) {
1473 auto PrintTargetRegions = [&]() ->
void {
1474 if (TargetRegions.none()) {
1479 for (
unsigned I : TargetRegions.set_bits())
1482 auto PrintCandidate = [&](
const ScoredRemat &Cand) ->
Printable {
1486 const RematReg &Remat = *Cand.Remat;
1487 bool HasLiveThroughRegion =
false;
1488 OS <<
'[' << Remat.DefRegion <<
" -";
1489 for (
unsigned I = 0;
I < NumRegions; ++
I) {
1490 if (!Cand.UnpredictableRPSave[
I]) {
1491 if (HasLiveThroughRegion) {
1495 HasLiveThroughRegion =
true;
1500 if (HasLiveThroughRegion)
1502 OS <<
"-> " << Remat.UseRegion <<
"] ";
1503 Remat.DefMI->
print(OS,
true,
false,
1511 dbgs() <<
"Analyzing ";
1512 MF.getFunction().printAsOperand(
dbgs(),
false);
1515 if (!setObjective()) {
1516 LLVM_DEBUG(
dbgs() <<
"no objective to achieve, occupancy is maximal at "
1517 <<
MFI.getMaxWavesPerEU() <<
'\n');
1522 dbgs() <<
"increase occupancy from " << *TargetOcc - 1 <<
'\n';
1524 dbgs() <<
"reduce spilling (minimum target occupancy is "
1525 <<
MFI.getMinWavesPerEU() <<
")\n";
1527 PrintTargetRegions();
1532 if (!collectRematRegs(MIRegion)) {
1539 for (
auto [
I, Remat] :
enumerate(RematRegs)) {
1540 ScoredRemat &Candidate = Candidates[
I];
1541 Candidate.init(&Remat, FreqInfo,
DAG);
1542 Candidate.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc);
1543 if (!Candidate.hasNullScore())
1548 dbgs() <<
"Rematerializable registers:\n";
1549 for (
const ScoredRemat &Cand : Candidates)
1557 dbgs() <<
"unknown ";
1558 dbgs() <<
" | " << *
DAG.Regions[
I].first;
1566 RecomputeRP.reset();
1569 sort(CandidateOrder, [&](
unsigned LHSIndex,
unsigned RHSIndex) {
1570 return Candidates[LHSIndex] < Candidates[RHSIndex];
1574 dbgs() <<
"==== NEW REMAT ROUND ====\n"
1576 <<
"Candidates with non-null score, in rematerialization order:\n";
1577 for (
const ScoredRemat &Cand :
reverse(Candidates)) {
1579 << PrintCandidate(Cand) <<
'\n';
1581 PrintTargetRegions();
1587 while (!CandidateOrder.
empty()) {
1588 const ScoredRemat &Cand = Candidates[CandidateOrder.
back()];
1595 if (!Cand.maybeBeneficial(TargetRegions, RPTargets)) {
1597 << Cand.print() <<
" | " << *Cand.Remat->DefMI);
1601 RematReg &Remat = *Cand.Remat;
1606 removeFromLiveMaps(Remat.getReg(), Cand.LiveIn, Cand.LiveOut);
1616 RollbackInfo &Rollback =
1617 Rollbacks.emplace_back(&Remat, Cand.LiveIn, Cand.LiveOut);
1618 Rollback.RematMI = RematMI;
1624 Remat.DefMI->
setDesc(
DAG.TII->get(TargetOpcode::DBG_VALUE));
1626 if (MO.isReg() && MO.readsReg()) {
1627 Rollback.RegMap.insert({Idx, MO.getReg()});
1633 DAG.deleteMI(Remat.DefRegion, Remat.DefMI);
1639 updateRPTargets(Cand.Live, Cand.RPSave);
1640 RecomputeRP |= Cand.UnpredictableRPSave;
1641 RescheduleRegions |= Cand.Live;
1642 if (!TargetRegions.any()) {
1648 if (!updateAndVerifyRPTargets(RecomputeRP) && !TargetRegions.any()) {
1657 unsigned NumUsefulCandidates = 0;
1658 for (
unsigned CandIdx : CandidateOrder) {
1659 ScoredRemat &Candidate = Candidates[CandIdx];
1660 Candidate.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc);
1661 if (!Candidate.hasNullScore())
1662 CandidateOrder[NumUsefulCandidates++] = CandIdx;
1664 if (NumUsefulCandidates == 0) {
1665 REMAT_DEBUG(
dbgs() <<
"Stop on exhausted rematerialization candidates\n");
1668 CandidateOrder.truncate(NumUsefulCandidates);
1671 if (RescheduleRegions.none())
1677 unsigned DynamicVGPRBlockSize =
MFI.getDynamicVGPRBlockSize();
1678 for (
unsigned I : RescheduleRegions.set_bits()) {
1679 DAG.Pressure[
I] = RPTargets[
I].getCurrentRP();
1681 <<
DAG.Pressure[
I].getOccupancy(
ST, DynamicVGPRBlockSize)
1682 <<
" (" << RPTargets[
I] <<
")\n");
1684 AchievedOcc =
MFI.getMaxWavesPerEU();
1687 std::min(AchievedOcc, RP.getOccupancy(
ST, DynamicVGPRBlockSize));
1691 dbgs() <<
"Retrying function scheduling with new min. occupancy of "
1692 << AchievedOcc <<
" from rematerializing (original was "
1693 <<
DAG.MinOccupancy;
1695 dbgs() <<
", target was " << *TargetOcc;
1699 DAG.setTargetOccupancy(getStageTargetOccupancy());
1710 S.SGPRLimitBias =
S.VGPRLimitBias = 0;
1711 if (
DAG.MinOccupancy > InitialOccupancy) {
1712 assert(IsAnyRegionScheduled);
1714 <<
" stage successfully increased occupancy to "
1715 <<
DAG.MinOccupancy <<
'\n');
1716 }
else if (!IsAnyRegionScheduled) {
1717 assert(
DAG.MinOccupancy == InitialOccupancy);
1719 <<
": No regions scheduled, min occupancy stays at "
1720 <<
DAG.MinOccupancy <<
", MFI occupancy stays at "
1721 <<
MFI.getOccupancy() <<
".\n");
1729 if (
DAG.begin() ==
DAG.end())
1736 unsigned NumRegionInstrs = std::distance(
DAG.begin(),
DAG.end());
1740 if (
DAG.begin() == std::prev(
DAG.end()))
1746 <<
"\n From: " << *
DAG.begin() <<
" To: ";
1748 else dbgs() <<
"End";
1749 dbgs() <<
" RegionInstrs: " << NumRegionInstrs <<
'\n');
1757 for (
auto &
I :
DAG) {
1770 dbgs() <<
"Pressure before scheduling:\nRegion live-ins:"
1772 <<
"Region live-in pressure: "
1776 S.HasHighPressure =
false;
1798 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1801 unsigned CurrentTargetOccupancy =
1802 IsAnyRegionScheduled ?
DAG.MinOccupancy : TempTargetOccupancy;
1804 (CurrentTargetOccupancy <= InitialOccupancy ||
1805 DAG.Pressure[
RegionIdx].getOccupancy(
ST, DynamicVGPRBlockSize) !=
1812 if (!IsAnyRegionScheduled && IsSchedulingThisRegion) {
1813 IsAnyRegionScheduled =
true;
1814 if (
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy)
1815 DAG.setTargetOccupancy(TempTargetOccupancy);
1817 return IsSchedulingThisRegion;
1833 return !RevertAllRegions && RescheduleRegions[
RegionIdx] &&
1853 if (
S.HasHighPressure)
1874 if (
DAG.MinOccupancy < *TargetOcc) {
1876 <<
" cannot meet occupancy target, interrupting "
1877 "re-scheduling in all regions\n");
1878 RevertAllRegions =
true;
1889 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1900 unsigned TargetOccupancy = std::min(
1901 S.getTargetOccupancy(),
ST.getOccupancyWithWorkGroupSizes(
MF).second);
1902 unsigned WavesAfter = std::min(
1903 TargetOccupancy,
PressureAfter.getOccupancy(
ST, DynamicVGPRBlockSize));
1904 unsigned WavesBefore = std::min(
1906 LLVM_DEBUG(
dbgs() <<
"Occupancy before scheduling: " << WavesBefore
1907 <<
", after " << WavesAfter <<
".\n");
1913 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
1917 if (WavesAfter < WavesBefore && WavesAfter <
DAG.MinOccupancy &&
1918 WavesAfter >=
MFI.getMinAllowedOccupancy()) {
1919 LLVM_DEBUG(
dbgs() <<
"Function is memory bound, allow occupancy drop up to "
1920 <<
MFI.getMinAllowedOccupancy() <<
" waves\n");
1921 NewOccupancy = WavesAfter;
1924 if (NewOccupancy <
DAG.MinOccupancy) {
1925 DAG.MinOccupancy = NewOccupancy;
1926 MFI.limitOccupancy(
DAG.MinOccupancy);
1928 <<
DAG.MinOccupancy <<
".\n");
1932 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
MF);
1935 unsigned MaxArchVGPRs = std::min(MaxVGPRs,
ST.getAddressableNumArchVGPRs());
1936 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
MF);
1960 unsigned ReadyCycle = CurrCycle;
1961 for (
auto &
D : SU.
Preds) {
1962 if (
D.isAssignedRegDep()) {
1965 unsigned DefReady = ReadyCycles[
DAG.getSUnit(
DefMI)->NodeNum];
1966 ReadyCycle = std::max(ReadyCycle, DefReady +
Latency);
1969 ReadyCycles[SU.
NodeNum] = ReadyCycle;
1976 std::pair<MachineInstr *, unsigned>
B)
const {
1977 return A.second <
B.second;
1983 if (ReadyCycles.empty())
1985 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1986 dbgs() <<
"\n################## Schedule time ReadyCycles for MBB : " << BBNum
1987 <<
" ##################\n# Cycle #\t\t\tInstruction "
1991 for (
auto &
I : ReadyCycles) {
1992 if (
I.second > IPrev + 1)
1993 dbgs() <<
"****************************** BUBBLE OF " <<
I.second - IPrev
1994 <<
" CYCLES DETECTED ******************************\n\n";
1995 dbgs() <<
"[ " <<
I.second <<
" ] : " << *
I.first <<
"\n";
2008 unsigned SumBubbles = 0;
2010 unsigned CurrCycle = 0;
2011 for (
auto &SU : InputSchedule) {
2012 unsigned ReadyCycle =
2014 SumBubbles += ReadyCycle - CurrCycle;
2016 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
2018 CurrCycle = ++ReadyCycle;
2041 unsigned SumBubbles = 0;
2043 unsigned CurrCycle = 0;
2044 for (
auto &
MI :
DAG) {
2048 unsigned ReadyCycle =
2050 SumBubbles += ReadyCycle - CurrCycle;
2052 ReadyCyclesSorted.insert(std::make_pair(SU->
getInstr(), ReadyCycle));
2054 CurrCycle = ++ReadyCycle;
2071 if (WavesAfter <
DAG.MinOccupancy)
2075 if (
DAG.MFI.isDynamicVGPREnabled()) {
2077 &
ST,
DAG.MFI.getDynamicVGPRBlockSize(),
2080 &
ST,
DAG.MFI.getDynamicVGPRBlockSize(),
2082 if (BlocksAfter > BlocksBefore)
2119 <<
"\n\t *** In shouldRevertScheduling ***\n"
2120 <<
" *********** BEFORE UnclusteredHighRPStage ***********\n");
2124 <<
"\n *********** AFTER UnclusteredHighRPStage ***********\n");
2126 unsigned OldMetric = MBefore.
getMetric();
2127 unsigned NewMetric = MAfter.
getMetric();
2128 unsigned WavesBefore = std::min(
2129 S.getTargetOccupancy(),
2136 LLVM_DEBUG(
dbgs() <<
"\tMetric before " << MBefore <<
"\tMetric after "
2137 << MAfter <<
"Profit: " << Profit <<
"\n");
2168 unsigned WavesAfter) {
2175 LLVM_DEBUG(
dbgs() <<
"New pressure will result in more spilling.\n");
2187 "instruction number mismatch");
2188 if (MIOrder.
empty())
2201 if (MII != RegionEnd) {
2203 bool NonDebugReordered =
2204 !
MI->isDebugInstr() &&
2210 if (NonDebugReordered)
2211 DAG.LIS->handleMove(*
MI,
true);
2218 if (!
MI->isDebugInstr()) {
2220 SlotIndex PrevIdx =
DAG.LIS->getSlotIndexes()->getIndexBefore(*
MI);
2221 if (PrevIdx >= MIIdx)
2222 DAG.LIS->handleMove(*
MI,
true);
2226 if (
MI->isDebugInstr()) {
2233 Op.setIsUndef(
false);
2236 if (
DAG.ShouldTrackLaneMasks) {
2238 SlotIndex SlotIdx =
DAG.LIS->getInstructionIndex(*MI).getRegSlot();
2253bool RewriteMFMAFormStage::isRewriteCandidate(
MachineInstr *
MI)
const {
2260bool RewriteMFMAFormStage::initHeuristics(
2261 std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
2269 if (!isRewriteCandidate(&
MI))
2273 assert(ReplacementOp != -1);
2275 RewriteCands.push_back({&
MI,
MI.getOpcode()});
2276 MI.setDesc(
TII->get(ReplacementOp));
2278 MachineOperand *Src2 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src2);
2279 if (Src2->
isReg()) {
2281 findReachingDefs(*Src2,
DAG.LIS, Src2ReachingDefs);
2285 for (SlotIndex RDIdx : Src2ReachingDefs) {
2286 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIdx);
2287 if (!
TII->isMAI(*RD))
2292 MachineOperand &Dst =
MI.getOperand(0);
2295 findReachingUses(&
MI,
DAG.LIS, DstReachingUses);
2297 for (MachineOperand *RUOp : DstReachingUses) {
2298 if (
TII->isMAI(*RUOp->getParent()))
2304 CopyForUse[RUOp->getParent()->getParent()].insert(RUOp->getReg());
2307 findReachingDefs(*RUOp,
DAG.LIS, DstUsesReachingDefs);
2309 for (SlotIndex RDIndex : DstUsesReachingDefs) {
2310 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2311 if (
TII->isMAI(*RD))
2323 const TargetRegisterClass *VDefRC =
DAG.MRI.getRegClass(Dst.getReg());
2324 const TargetRegisterClass *ADefRC = SRI->getEquivalentAGPRClass(VDefRC);
2325 DAG.MRI.setRegClass(Dst.getReg(), ADefRC);
2326 if (Src2->
isReg()) {
2330 const TargetRegisterClass *VUseRC =
DAG.MRI.getRegClass(Src2->
getReg());
2331 const TargetRegisterClass *AUseRC = SRI->getEquivalentAGPRClass(VUseRC);
2332 DAG.MRI.setRegClass(Src2->
getReg(), AUseRC);
2341int64_t RewriteMFMAFormStage::getRewriteCost(
2342 const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
2343 const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
2344 const SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
2345 MachineBlockFrequencyInfo *MBFI =
DAG.MBFI;
2347 int64_t BestSpillCost = 0;
2351 std::pair<unsigned, unsigned> MaxVectorRegs =
2352 ST.getMaxNumVectorRegs(
MF.getFunction());
2353 unsigned ArchVGPRThreshold = MaxVectorRegs.first;
2354 unsigned AGPRThreshold = MaxVectorRegs.second;
2355 unsigned CombinedThreshold =
ST.getMaxNumVGPRs(
MF);
2358 if (!RegionsWithExcessArchVGPR[Region])
2363 MF, ArchVGPRThreshold, AGPRThreshold, CombinedThreshold);
2371 MF, ArchVGPRThreshold, AGPRThreshold, CombinedThreshold);
2373 uint64_t BlockFreq =
2377 bool RelativeFreqIsDenom = EntryFreq > BlockFreq;
2378 uint64_t RelativeFreq = EntryFreq && BlockFreq
2379 ? (RelativeFreqIsDenom ? EntryFreq / BlockFreq
2380 : BlockFreq / EntryFreq)
2385 int64_t SpillCost = ((int)SpillCostAfter - (int)SpillCostBefore) * 2;
2388 if (RelativeFreqIsDenom)
2389 SpillCost /= (int64_t)RelativeFreq;
2391 SpillCost *= (int64_t)RelativeFreq;
2397 if (SpillCost < BestSpillCost)
2398 BestSpillCost = SpillCost;
2403 Cost = BestSpillCost;
2406 unsigned CopyCost = 0;
2410 for (MachineInstr *
DefMI : CopyForDef) {
2417 const TargetRegisterClass *RC =
DAG.MRI.getRegClass(DefReg);
2422 for (
auto &[UseBlock, UseRegs] : CopyForUse) {
2427 const TargetRegisterClass *RC =
DAG.MRI.getRegClass(
UseReg);
2436 for (
auto &[
MI, OriginalOpcode] : RewriteCands) {
2438 const TargetRegisterClass *ADefRC =
2439 DAG.MRI.getRegClass(
MI->getOperand(0).getReg());
2440 const TargetRegisterClass *VDefRC = SRI->getEquivalentVGPRClass(ADefRC);
2441 DAG.MRI.setRegClass(
MI->getOperand(0).getReg(), VDefRC);
2442 MI->setDesc(
TII->get(OriginalOpcode));
2444 MachineOperand *Src2 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
2452 const TargetRegisterClass *AUseRC =
DAG.MRI.getRegClass(Src2->
getReg());
2453 const TargetRegisterClass *VUseRC = SRI->getEquivalentVGPRClass(AUseRC);
2454 DAG.MRI.setRegClass(Src2->
getReg(), VUseRC);
2457 return Cost + CopyCost;
2460bool RewriteMFMAFormStage::rewrite(
2461 const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands) {
2462 DenseMap<MachineInstr *, unsigned> FirstMIToRegion;
2463 DenseMap<MachineInstr *, unsigned> LastMIToRegion;
2471 if (
Entry.second !=
Entry.first->getParent()->end())
2514 DenseSet<Register> RewriteRegs;
2517 DenseMap<Register, Register> RedefMap;
2519 DenseMap<Register, DenseSet<MachineOperand *>>
ReplaceMap;
2521 DenseMap<Register, SmallPtrSet<MachineInstr *, 8>> ReachingDefCopyMap;
2524 DenseMap<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>
2527 for (
auto &[
MI, OriginalOpcode] : RewriteCands) {
2529 if (ReplacementOp == -1)
2531 MI->setDesc(
TII->get(ReplacementOp));
2534 MachineOperand *Src2 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
2535 if (Src2->
isReg()) {
2542 findReachingDefs(*Src2,
DAG.LIS, Src2ReachingDefs);
2543 SmallSetVector<MachineInstr *, 8> Src2DefsReplace;
2545 for (SlotIndex RDIndex : Src2ReachingDefs) {
2546 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2547 if (
TII->isMAI(*RD))
2551 Src2DefsReplace.
insert(RD);
2554 if (!Src2DefsReplace.
empty()) {
2556 if (RI != RedefMap.
end()) {
2557 MappedReg = RI->second;
2560 const TargetRegisterClass *Src2RC =
DAG.MRI.getRegClass(Src2Reg);
2561 const TargetRegisterClass *VGPRRC =
2562 SRI->getEquivalentVGPRClass(Src2RC);
2565 MappedReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2566 RedefMap[Src2Reg] = MappedReg;
2571 for (MachineInstr *RD : Src2DefsReplace) {
2573 if (ReachingDefCopyMap[Src2Reg].insert(RD).second) {
2574 MachineInstrBuilder VGPRCopy =
2577 .
addDef(MappedReg, {}, 0)
2578 .addUse(Src2Reg, {}, 0);
2579 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2584 unsigned UpdateRegion = LastMIToRegion[RD];
2585 DAG.Regions[UpdateRegion].second = VGPRCopy;
2586 LastMIToRegion.
erase(RD);
2593 RewriteRegs.
insert(Src2Reg);
2603 MachineOperand *Dst = &
MI->getOperand(0);
2612 SmallVector<MachineInstr *, 8> DstUseDefsReplace;
2614 findReachingUses(
MI,
DAG.LIS, DstReachingUses);
2616 for (MachineOperand *RUOp : DstReachingUses) {
2617 if (
TII->isMAI(*RUOp->getParent()))
2621 if (
find(DstReachingUseCopies, RUOp) == DstReachingUseCopies.
end())
2624 findReachingDefs(*RUOp,
DAG.LIS, DstUsesReachingDefs);
2626 for (SlotIndex RDIndex : DstUsesReachingDefs) {
2627 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2628 if (
TII->isMAI(*RD))
2633 if (
find(DstUseDefsReplace, RD) == DstUseDefsReplace.
end())
2638 if (!DstUseDefsReplace.
empty()) {
2640 if (RI != RedefMap.
end()) {
2641 MappedReg = RI->second;
2644 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(DstReg);
2645 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2648 MappedReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2649 RedefMap[DstReg] = MappedReg;
2654 for (MachineInstr *RD : DstUseDefsReplace) {
2656 if (ReachingDefCopyMap[DstReg].insert(RD).second) {
2657 MachineInstrBuilder VGPRCopy =
2660 .
addDef(MappedReg, {}, 0)
2661 .addUse(DstReg, {}, 0);
2662 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2667 LastMIToRegion.
find(RD);
2668 if (LMI != LastMIToRegion.
end()) {
2669 unsigned UpdateRegion = LMI->second;
2670 DAG.Regions[UpdateRegion].second = VGPRCopy;
2671 LastMIToRegion.
erase(RD);
2677 DenseSet<MachineOperand *> &DstRegSet =
ReplaceMap[DstReg];
2678 for (MachineOperand *RU : DstReachingUseCopies) {
2679 MachineBasicBlock *RUBlock = RU->getParent()->getParent();
2682 if (RUBlock !=
MI->getParent()) {
2689 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(DstReg);
2690 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2691 Register NewUseReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2692 MachineInstr *UseInst = RU->getParent();
2693 MachineInstrBuilder VGPRCopy =
2696 .
addDef(NewUseReg, {}, 0)
2697 .addUse(DstReg, {}, 0);
2698 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2701 RU->setReg(NewUseReg);
2707 RewriteRegs.
insert(DstReg);
2717 std::pair<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>;
2718 for (RUBType RUBlockEntry : ReachingUseTracker) {
2719 using RUDType = std::pair<Register, SmallPtrSet<MachineOperand *, 8>>;
2720 for (RUDType RUDst : RUBlockEntry.second) {
2721 MachineOperand *OpBegin = *RUDst.second.begin();
2722 SlotIndex InstPt =
DAG.LIS->getInstructionIndex(*OpBegin->
getParent());
2725 for (MachineOperand *User : RUDst.second) {
2726 SlotIndex NewInstPt =
DAG.LIS->getInstructionIndex(*
User->getParent());
2731 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(RUDst.first);
2732 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2733 Register NewUseReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2734 MachineInstr *UseInst =
DAG.LIS->getInstructionFromIndex(InstPt);
2736 MachineInstrBuilder VGPRCopy =
2739 .
addDef(NewUseReg, {}, 0)
2740 .addUse(RUDst.first, {}, 0);
2741 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2746 FirstMIToRegion.
find(UseInst);
2747 if (FI != FirstMIToRegion.
end()) {
2748 unsigned UpdateRegion = FI->second;
2749 DAG.Regions[UpdateRegion].first = VGPRCopy;
2750 FirstMIToRegion.
erase(UseInst);
2754 for (MachineOperand *User : RUDst.second) {
2755 User->setReg(NewUseReg);
2766 for (std::pair<Register, Register> NewDef : RedefMap) {
2771 for (MachineOperand *ReplaceOp :
ReplaceMap[OldReg])
2772 ReplaceOp->setReg(NewReg);
2776 for (
Register RewriteReg : RewriteRegs) {
2777 Register RegToRewrite = RewriteReg;
2781 if (RI != RedefMap.end())
2782 RegToRewrite = RI->second;
2784 const TargetRegisterClass *CurrRC =
DAG.MRI.getRegClass(RegToRewrite);
2785 const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(CurrRC);
2787 DAG.MRI.setRegClass(RegToRewrite, AGPRRC);
2791 DAG.LIS->reanalyze(
DAG.MF);
2793 RegionPressureMap LiveInUpdater(&
DAG,
false);
2794 LiveInUpdater.buildLiveRegMap();
2797 DAG.LiveIns[Region] = LiveInUpdater.getLiveRegsForRegionIdx(Region);
2804unsigned PreRARematStage::getStageTargetOccupancy()
const {
2805 return TargetOcc ? *TargetOcc :
MFI.getMinWavesPerEU();
2808bool PreRARematStage::setObjective() {
2812 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
F);
2813 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
F);
2814 bool HasVectorRegisterExcess =
false;
2815 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2816 const GCNRegPressure &
RP =
DAG.Pressure[
I];
2817 GCNRPTarget &
Target = RPTargets.emplace_back(MaxSGPRs, MaxVGPRs,
MF, RP);
2819 TargetRegions.set(
I);
2820 HasVectorRegisterExcess |=
Target.hasVectorRegisterExcess();
2823 if (HasVectorRegisterExcess ||
DAG.MinOccupancy >=
MFI.getMaxWavesPerEU()) {
2826 TargetOcc = std::nullopt;
2830 TargetOcc =
DAG.MinOccupancy + 1;
2831 const unsigned VGPRBlockSize =
MFI.getDynamicVGPRBlockSize();
2832 MaxSGPRs =
ST.getMaxNumSGPRs(*TargetOcc,
false);
2833 MaxVGPRs =
ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize);
2834 for (
auto [
I, Target] :
enumerate(RPTargets)) {
2835 Target.setTarget(MaxSGPRs, MaxVGPRs);
2837 TargetRegions.set(
I);
2841 return TargetRegions.any();
2844bool PreRARematStage::collectRematRegs(
2845 const DenseMap<MachineInstr *, unsigned> &MIRegion) {
2848 DAG.RegionLiveOuts.buildLiveRegMap();
2852 SmallSet<Register, 4> MarkedRegs;
2853 auto IsMarkedForRemat = [&MarkedRegs](
const MachineOperand &MO) ->
bool {
2854 return MO.isReg() && MarkedRegs.
contains(MO.getReg());
2858 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2860 for (
auto MI = Bounds.first;
MI != Bounds.second; ++
MI) {
2863 if (!isReMaterializable(
DefMI))
2876 MachineInstr *
UseMI =
DAG.MRI.getOneNonDBGUser(
Reg);
2880 if (UseRegion == MIRegion.
end() || UseRegion->second ==
I)
2891 if (IsMarkedForRemat(UseMO) ||
2898 SlotIndex UseIdx =
DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(
true);
2909 return !RematRegs.empty();
2912PreRARematStage::RematReg::RematReg(
2913 MachineInstr *
DefMI, MachineInstr *
UseMI, GCNScheduleDAGMILive &
DAG,
2914 const DenseMap<MachineInstr *, unsigned> &MIRegion)
2916 UseRegion(MIRegion.at(
UseMI)),
2917 Mask(
DAG.RegionLiveOuts.getLiveRegsForRegionIdx(DefRegion).at(
getReg())) {
2920bool PreRARematStage::ScoredRemat::maybeBeneficial(
2922 for (
unsigned I : TargetRegions.
set_bits()) {
2923 if (Live[
I] && RPTargets[
I].isSaveBeneficial(RPSave))
2929void PreRARematStage::ScoredRemat::insertMI(
unsigned RegionIdx,
2930 MachineInstr *RematMI,
2931 GCNScheduleDAGMILive &
DAG)
const {
2934 Bounds.first = RematMI;
2935 DAG.LIS->InsertMachineInstrInMaps(*RematMI);
2941 assert(
DAG.MLI &&
"MLI not defined in DAG");
2945 const unsigned NumRegions =
DAG.Regions.size();
2949 for (
unsigned I = 0;
I < NumRegions; ++
I) {
2953 if (BlockFreq && BlockFreq <
MinFreq)
2962 if (
MinFreq >= ScaleFactor * ScaleFactor) {
2964 Freq /= ScaleFactor;
2970void PreRARematStage::ScoredRemat::init(RematReg *Remat,
const FreqInfo &Freq,
2972 this->Remat = Remat;
2973 const unsigned NumRegions =
DAG.Regions.size();
2974 LiveIn.resize(NumRegions);
2975 LiveOut.resize(NumRegions);
2976 Live.resize(NumRegions);
2977 UnpredictableRPSave.resize(NumRegions);
2981 for (
unsigned I = 0, E = NumRegions;
I != E; ++
I) {
2982 if (
DAG.LiveIns[
I].contains(DefReg))
2984 if (
DAG.RegionLiveOuts.getLiveRegsForRegionIdx(
I).contains(DefReg))
2989 if (!LiveIn[
I] || !LiveOut[
I] ||
I == Remat->UseRegion)
2990 UnpredictableRPSave.set(
I);
3003 int64_t DefOrMin = std::max(Freq.
Regions[Remat->DefRegion], Freq.
MinFreq);
3004 int64_t UseOrMax = Freq.
Regions[Remat->UseRegion];
3007 FreqDiff = DefOrMin - UseOrMax;
3010void PreRARematStage::ScoredRemat::update(
const BitVector &TargetRegions,
3012 const FreqInfo &FreqInfo,
3016 for (
unsigned I : TargetRegions.
set_bits()) {
3025 if (!NumRegsBenefit)
3029 RegionImpact += (UnpredictableRPSave[
I] ? 1 : 2) * NumRegsBenefit;
3033 if (UnpredictableRPSave[
I]) {
3038 MaxFreq = std::max(MaxFreq, Freq);
3044PreRARematStage::ScoredRemat::rematerialize(GCNScheduleDAGMILive &
DAG)
const {
3045 const SIInstrInfo *
TII =
DAG.MF.getSubtarget<GCNSubtarget>().getInstrInfo();
3046 MachineInstr &
DefMI = *Remat->DefMI;
3052 TII->reMaterialize(*InsertPos->getParent(), InsertPos, NewReg, 0,
DefMI);
3053 MachineInstr *RematMI = &*std::prev(InsertPos);
3055 insertMI(Remat->UseRegion, RematMI,
DAG);
3057#ifdef EXPENSIVE_CHECKS
3060 for (MachineOperand &MO :
DefMI.operands()) {
3061 if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
3068 LiveInterval &LI =
DAG.LIS->getInterval(
UseReg);
3069 LaneBitmask LM =
DAG.MRI.getMaxLaneMaskForVReg(MO.getReg());
3071 LM =
DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());
3073 LaneBitmask LiveInMask =
DAG.LiveIns[Remat->UseRegion].at(
UseReg);
3074 LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
3078 if (UncoveredLanes.
any()) {
3080 for (LiveInterval::SubRange &SR : LI.
subranges())
3081 assert((SR.LaneMask & UncoveredLanes).none());
3088void PreRARematStage::commitRematerializations()
const {
3090 for (
const RollbackInfo &Rollback : Rollbacks)
3091 DAG.deleteMI(Rollback.Remat->DefRegion, Rollback.Remat->DefMI);
3094void PreRARematStage::updateRPTargets(
const BitVector &Regions,
3095 const GCNRegPressure &RPSave) {
3097 RPTargets[
I].saveRP(RPSave);
3098 if (TargetRegions[
I] && RPTargets[
I].satisfied()) {
3100 TargetRegions.reset(
I);
3105bool PreRARematStage::updateAndVerifyRPTargets(
const BitVector &Regions) {
3106 bool TooOptimistic =
false;
3108 GCNRPTarget &
Target = RPTargets[
I];
3114 if (!TargetRegions[
I] && !
Target.satisfied()) {
3116 TooOptimistic =
true;
3117 TargetRegions.set(
I);
3120 return TooOptimistic;
3124bool PreRARematStage::isReMaterializable(
const MachineInstr &
MI) {
3125 if (!
DAG.TII->isReMaterializable(
MI))
3128 for (
const MachineOperand &MO :
MI.all_uses()) {
3131 if (MO.getReg().isPhysical()) {
3132 if (
DAG.MRI.isConstantPhysReg(MO.getReg()) ||
DAG.TII->isIgnorableUse(MO))
3141void PreRARematStage::removeFromLiveMaps(
Register Reg,
const BitVector &LiveIn,
3142 const BitVector &LiveOut) {
3144 LiveOut.
size() ==
DAG.Regions.size() &&
"region num mismatch");
3148 DAG.RegionLiveOuts.getLiveRegsForRegionIdx(
I).erase(
Reg);
3151void PreRARematStage::addToLiveMaps(
Register Reg, LaneBitmask Mask,
3152 const BitVector &LiveIn,
3153 const BitVector &LiveOut) {
3155 LiveOut.
size() ==
DAG.Regions.size() &&
"region num mismatch");
3156 std::pair<Register, LaneBitmask> LiveReg(
Reg, Mask);
3158 DAG.LiveIns[
I].insert(LiveReg);
3160 DAG.RegionLiveOuts.getLiveRegsForRegionIdx(
I).insert(LiveReg);
3172 if (
DAG.MinOccupancy >= *TargetOcc) {
3173 commitRematerializations();
3180 const bool ShouldRollbackRemats = AchievedOcc < *TargetOcc;
3185 if (ShouldRollbackRemats) {
3186 for (
const RollbackInfo &Rollback : Rollbacks) {
3187 const RematReg *Remat = Rollback.Remat;
3188 MachineInstr *RematMI = Rollback.RematMI;
3190 for (
const auto &[MOIdx,
Reg] : Rollback.RegMap)
3191 Remat->DefMI->getOperand(MOIdx).setReg(
Reg);
3196 for (
const auto &[
RegionIdx, OrigMIOrder, MaxPressure] : RegionReverts) {
3203 if (!ShouldRollbackRemats) {
3204 commitRematerializations();
3205 DAG.setTargetOccupancy(AchievedOcc);
3210 DAG.setTargetOccupancy(*TargetOcc - 1);
3215 DenseSet<Register> RecomputeLI;
3216 for (
const RollbackInfo &Rollback : Rollbacks) {
3217 const RematReg *Remat = Rollback.Remat;
3218 MachineInstr *RematMI = Rollback.RematMI;
3223 Register OriginalReg = Remat->DefMI->getOperand(0).getReg();
3224 Remat->UseMI->substituteRegister(
Reg, OriginalReg, 0, *
DAG.TRI);
3226 <<
"] Deleting rematerialization " << *RematMI);
3227 DAG.deleteMI(Remat->UseRegion, RematMI);
3228 addToLiveMaps(OriginalReg, Remat->Mask, Rollback.LiveIn, Rollback.LiveOut);
3232 for (MachineOperand &MO : Rollback.Remat->DefMI->operands()) {
3233 if (MO.isReg() && MO.getReg().isVirtual())
3234 RecomputeLI.
insert(MO.getReg());
3238 DAG.LIS->removeInterval(
Reg);
3239 DAG.LIS->createAndComputeVirtRegInterval(
Reg);
3241#ifdef EXPENSIVE_CHECKS
3246 for (
unsigned I : RescheduleRegions.set_bits())
3247 DAG.Pressure[
I] =
DAG.getRealRegPressure(
I);
3252void GCNScheduleDAGMILive::deleteMI(
unsigned RegionIdx, MachineInstr *
MI) {
3259 MI->eraseFromParent();
3262void GCNScheduleDAGMILive::setTargetOccupancy(
unsigned TargetOccupancy) {
3263 MinOccupancy = TargetOccupancy;
3264 if (
MFI.getOccupancy() < TargetOccupancy)
3265 MFI.increaseOccupancy(
MF, MinOccupancy);
3267 MFI.limitOccupancy(MinOccupancy);
3284 if (HasIGLPInstrs) {
3285 SavedMutations.clear();
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static SUnit * pickOnlyChoice(SchedBoundary &Zone)
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
static cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))
static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))
#define REMAT_PREFIX
Allows to easily filter for this stage's debug output.
static MachineInstr * getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)
static bool shouldCheckPending(SchedBoundary &Zone, const TargetSchedModel *SchedModel)
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static cl::opt< bool > PrintMaxRPRegUsageAfterScheduler("amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling."), cl::init(false))
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
static cl::opt< bool > DisableRewriteMFMAFormSchedStage("amdgpu-disable-rewrite-mfma-form-sched-stage", cl::Hidden, cl::desc("Disable rewrite mfma rewrite scheduling stage"), cl::init(true))
static bool canUsePressureDiffs(const SUnit &SU)
Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
static cl::opt< unsigned > PendingQueueLimit("amdgpu-scheduler-pending-queue-limit", cl::Hidden, cl::desc("Max (Available+Pending) size to inspect pending queue (0 disables)"), cl::init(256))
static cl::opt< bool > PrintMaxRPRegUsageBeforeScheduler("amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling."), cl::init(false))
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
static constexpr std::pair< StringLiteral, StringLiteral > ReplaceMap[]
A common definition of LaneBitmask for use in TableGen and CodeGen.
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static constexpr unsigned SM(unsigned Version)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
iterator_range< const_set_bits_iterator > set_bits() const
size_type size() const
size - Returns the number of bits in this bitvector.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
bool initGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const
Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
Models a register pressure target, allowing to evaluate and track register savings against that targe...
unsigned getNumRegsBenefit(const GCNRegPressure &SaveRP) const
Returns the benefit towards achieving the RP target that saving SaveRP represents,...
GCNRegPressure getPressure() const
DenseMap< unsigned, LaneBitmask > LiveRegSet
virtual bool initGCNRegion()
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
void modifyRegionSchedule(unsigned RegionIdx, ArrayRef< MachineInstr * > MIOrder)
Sets the schedule of region RegionIdx to MIOrder.
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
virtual void finalizeGCNRegion()
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineBasicBlock * CurrentMBB
This is a minimal scheduler strategy.
GCNDownwardRPTracker DownwardTracker
bool useGCNTrackers() const
void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
unsigned SGPRCriticalLimit
std::vector< unsigned > MaxPressure
bool hasNextStage() const
SUnit * pickNodeBidirectional(bool &IsTopNode, bool &PickedPending)
GCNSchedStageID getCurrentStage()
bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Evaluates instructions in the pending queue using a subset of scheduling heuristics.
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
unsigned VGPRCriticalLimit
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
std::optional< bool > GCNTrackersOverride
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &IsPending, bool IsBottomUp)
unsigned getStructuralStallCycles(SchedBoundary &Zone, SUnit *SU) const
Estimate how many cycles SU must wait due to structural hazards at the current boundary cycle.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
void recede(const MachineInstr &MI)
Move to the state of RP just before the MI .
void traceCandidate(const SchedCandidate &Cand)
LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
const MachineSchedContext * Context
const TargetRegisterInfo * TRI
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
GenericScheduler(const MachineSchedContext *C)
bool shouldRevertScheduling(unsigned WavesAfter) override
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
void removeInterval(Register Reg)
Interval removal.
LLVM_ABI void dump() const
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
succ_iterator succ_begin()
unsigned succ_size() const
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
LLVM_ABI BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
LLVM_ABI BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI void substituteRegister(Register FromReg, Register ToReg, unsigned SubIdx, const TargetRegisterInfo &RegInfo)
Replace all occurrences of FromReg with ToReg:SubIdx, properly composing subreg indices where necessa...
LLVM_ABI void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
void finalizeGCNRegion() override
bool initGCNRegion() override
bool initGCNSchedStage() override
Capture a change in pressure for a single pressure set.
Simple wrapper around std::function<void(raw_ostream&)>.
Helpers for implementing custom MachineSchedStrategy classes.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void advance()
Advance across the current instruction.
LLVM_ABI void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
LLVM_ABI void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
RegionT * getParent() const
Get the parent of the Region.
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
bool isIGLPMutationOnly(unsigned Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getOccupancy() const
unsigned getDynamicVGPRBlockSize() const
unsigned getMinAllowedOccupancy() const
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
unsigned NodeNum
Entry # of node in the node vector.
unsigned short Latency
Node latency.
bool isScheduled
True once scheduled.
unsigned ParentClusterIdx
The parent cluster id.
unsigned BotReadyCycle
Cycle relative to end when node is ready.
bool hasReservedResource
Uses a reserved resource.
bool isBottomReady() const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
LLVM_ABI void releasePending()
Release pending ready nodes in to the available queue.
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
LLVM_ABI SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
ScheduleHazardRecognizer * HazardRec
LLVM_ABI void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LLVM_ABI bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
LLVM_ABI std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource can be scheduled.
A ScheduleDAG for scheduling lists of MachineInstr.
bool ScheduleSingleMIRegions
True if regions with a single MI should be scheduled.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
const MachineLoopInfo * MLI
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
RegPressureTracker RPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
MachineFunction & MF
Machine function.
static const unsigned ScaleFactor
unsigned getMetric() const
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
SlotIndex - An opaque wrapper around machine indexes.
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
SlotIndex getPrevSlot() const
Returns the previous slot in the index list.
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
uint8_t getCopyCost() const
Return the cost of copying a value between two registers in this class.
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
bool initGCNSchedStage() override
bool initGCNRegion() override
void finalizeGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
VNInfo - Value Number Information.
SlotIndex def
The index of the defining instruction.
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
static bool allUsesAvailableAt(const MachineInstr *MI, SlotIndex UseIdx, const LiveIntervals &LIS, const MachineRegisterInfo &MRI, const TargetInstrInfo &TII)
std::pair< iterator, bool > insert(const ValueT &V)
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
LLVM_READONLY int32_t getMFMASrcCVDstAGPROp(uint32_t Opcode)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop, bool BiasPRegsExtra=false)
Minimize physical register live ranges.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
@ UnclusteredHighRPReschedule
@ MemoryClauseInitialSchedule
@ ClusteredLowOccupancyReschedule
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI cl::opt< bool > VerifyScheduling
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
DWARFExpression::Operation Op
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF, GCNRegPressure::RegKind Kind, LiveIntervals &LIS, const MachineLoopInfo *MLI)
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Implement std::hash so that hash_code can be used in STL containers.
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
unsigned getArchVGPRNum() const
unsigned getAGPRNum() const
unsigned getSGPRNum() const
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void setBest(SchedCandidate &Best)
void reset(const CandPolicy &NewPolicy)
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SchedResourceDelta ResDelta
Status of an instruction's critical resource consumption.
unsigned DemandedResources
constexpr bool any() const
static constexpr LaneBitmask getNone()
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Execution frequency information required by scoring heuristics.
SmallVector< uint64_t > Regions
Per-region execution frequencies. 0 when unknown.
uint64_t MinFreq
Minimum and maximum observed frequencies.
FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG)
PressureChange CriticalMax
PressureChange CurrentMax