46#define DEBUG_TYPE "machine-scheduler"
51 "amdgpu-disable-unclustered-high-rp-reschedule",
cl::Hidden,
52 cl::desc(
"Disable unclustered high register pressure "
53 "reduction scheduling stage."),
57 "amdgpu-disable-clustered-low-occupancy-reschedule",
cl::Hidden,
58 cl::desc(
"Disable clustered low occupancy "
59 "rescheduling for ILP scheduling stage."),
65 "Sets the bias which adds weight to occupancy vs latency. Set it to "
66 "100 to chase the occupancy only."),
71 cl::desc(
"Relax occupancy targets for kernels which are memory "
72 "bound (amdgpu-membound-threshold), or "
73 "Wave Limited (amdgpu-limit-wave-threshold)."),
78 cl::desc(
"Use the AMDGPU specific RPTrackers during scheduling"),
82 "amdgpu-scheduler-pending-queue-limit",
cl::Hidden,
84 "Max (Available+Pending) size to inspect pending queue (0 disables)"),
87#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
88#define DUMP_MAX_REG_PRESSURE
90 "amdgpu-print-max-reg-pressure-regusage-before-scheduler",
cl::Hidden,
91 cl::desc(
"Print a list of live registers along with their def/uses at the "
92 "point of maximum register pressure before scheduling."),
96 "amdgpu-print-max-reg-pressure-regusage-after-scheduler",
cl::Hidden,
97 cl::desc(
"Print a list of live registers along with their def/uses at the "
98 "point of maximum register pressure after scheduling."),
103 "amdgpu-disable-rewrite-mfma-form-sched-stage",
cl::Hidden,
108struct VGPRThresholdParser :
public cl::parser<unsigned> {
111 bool parse(cl::Option &O, StringRef ArgName, StringRef Arg,
unsigned &
Value) {
113 return O.error(
"'" + Arg +
"' value invalid for uint argument!");
116 return O.error(
"'" + Arg +
"' value must be in the range [0, 100]!");
126 cl::desc(
"Percent of VGPR limits that we should use as RP threshold "
127 "during scheduling. We have two limits relevant to scheduling: "
128 "Critical (avoid decreasing occupancy), Excess (avoid spilling). "
129 "This flag scales both limits back by an equal percent: (0 = use "
130 " default calculation, 1-100 = use percentage), default: 0"),
150 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
152 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
174 "VGPRCriticalLimit calculation method.\n");
178 unsigned Addressable =
181 VGPRBudget = std::max(VGPRBudget, Granule);
194 <<
". VGPRCriticalLimit: " << OriginalVGPRCriticalLimit
234 if (!
Op.isReg() ||
Op.isImplicit())
236 if (
Op.getReg().isPhysical() ||
237 (
Op.isDef() &&
Op.getSubReg() != AMDGPU::NoSubRegister))
272 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
280 if (!Zone.
isTop() || !SU)
297 if (NextAvail > CurrCycle)
298 Stall = std::max(
Stall, NextAvail - CurrCycle);
318 unsigned SGPRPressure,
319 unsigned VGPRPressure,
bool IsBottomUp) {
323 if (!
DAG->isTrackingPressure())
346 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
347 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
349 for (
const auto &Diff :
DAG->getPressureDiff(SU)) {
355 (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
358#ifdef EXPENSIVE_CHECKS
359 std::vector<unsigned> CheckPressure, CheckMaxPressure;
362 if (
Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
363 CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
364 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
365 CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
366 errs() <<
"Register Pressure is inaccurate when calculated through "
368 <<
"SGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::SReg_32]
370 << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
"\n"
371 <<
"VGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
373 << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n";
379 unsigned NewSGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
380 unsigned NewVGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
390 const unsigned MaxVGPRPressureInc = 16;
391 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >=
VGPRExcessLimit;
392 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >=
SGPRExcessLimit;
423 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
425 if (SGPRDelta > VGPRDelta) {
439 bool HasBufferedModel =
458 dbgs() <<
"Prefer:\t\t";
459 DAG->dumpNode(*Preferred.
SU);
463 DAG->dumpNode(*Current.
SU);
466 dbgs() <<
"Reason:\t\t";
480 unsigned SGPRPressure = 0;
481 unsigned VGPRPressure = 0;
483 if (
DAG->isTrackingPressure()) {
485 SGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
486 VGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
491 SGPRPressure =
T->getPressure().getSGPRNum();
492 VGPRPressure =
T->getPressure().getArchVGPRNum();
497 for (
SUnit *SU : AQ) {
501 VGPRPressure, IsBottomUp);
521 for (
SUnit *SU : PQ) {
525 VGPRPressure, IsBottomUp);
545 bool &PickedPending) {
565 bool BotPending =
false;
585 "Last pick result should correspond to re-picking right now");
590 bool TopPending =
false;
610 "Last pick result should correspond to re-picking right now");
620 PickedPending = BotPending && TopPending;
623 if (BotPending || TopPending) {
630 Cand.setBest(TryCand);
635 IsTopNode = Cand.AtTop;
642 if (
DAG->top() ==
DAG->bottom()) {
644 Bot.Available.empty() &&
Bot.Pending.empty() &&
"ReadyQ garbage");
650 PickedPending =
false;
684 if (ReadyCycle > CurrentCycle)
756 if (
DAG->isTrackingPressure() &&
762 if (
DAG->isTrackingPressure() &&
767 bool SameBoundary = Zone !=
nullptr;
791 if (IsLegacyScheduler)
810 if (
DAG->isTrackingPressure() &&
820 bool SameBoundary = Zone !=
nullptr;
855 bool CandIsClusterSucc =
857 bool TryCandIsClusterSucc =
859 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
864 if (
DAG->isTrackingPressure() &&
870 if (
DAG->isTrackingPressure() &&
916 if (
DAG->isTrackingPressure()) {
932 bool CandIsClusterSucc =
934 bool TryCandIsClusterSucc =
936 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
945 bool SameBoundary = Zone !=
nullptr;
962 if (TryMayLoad || CandMayLoad) {
963 bool TryLongLatency =
965 bool CandLongLatency =
969 Zone->
isTop() ? CandLongLatency : TryLongLatency, TryCand,
987 if (
DAG->isTrackingPressure() &&
1006 !
Rem.IsAcyclicLatencyLimited &&
tryLatency(TryCand, Cand, *Zone))
1024 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),
1025 RegionLiveOuts(this,
true) {
1031 LLVM_DEBUG(
dbgs() <<
"Starting occupancy is " << StartingOccupancy <<
".\n");
1033 MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);
1034 if (MinOccupancy != StartingOccupancy)
1035 LLVM_DEBUG(
dbgs() <<
"Allowing Occupancy drops to " << MinOccupancy
1040std::unique_ptr<GCNSchedStage>
1042 switch (SchedStageID) {
1044 return std::make_unique<OccInitialScheduleStage>(SchedStageID, *
this);
1046 return std::make_unique<RewriteMFMAFormStage>(SchedStageID, *
this);
1048 return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *
this);
1050 return std::make_unique<ClusteredLowOccStage>(SchedStageID, *
this);
1052 return std::make_unique<PreRARematStage>(SchedStageID, *
this);
1054 return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *
this);
1056 return std::make_unique<MemoryClauseInitialScheduleStage>(SchedStageID,
1070GCNScheduleDAGMILive::getRealRegPressure(
unsigned RegionIdx)
const {
1071 if (Regions[RegionIdx].first == Regions[RegionIdx].second)
1075 &LiveIns[RegionIdx]);
1081 assert(RegionBegin != RegionEnd &&
"Region must not be empty");
1085void GCNScheduleDAGMILive::computeBlockPressure(
unsigned RegionIdx,
1097 const MachineBasicBlock *OnlySucc =
nullptr;
1100 if (!Candidate->empty() && Candidate->pred_size() == 1) {
1101 SlotIndexes *Ind =
LIS->getSlotIndexes();
1103 OnlySucc = Candidate;
1108 size_t CurRegion = RegionIdx;
1109 for (
size_t E = Regions.size(); CurRegion !=
E; ++CurRegion)
1110 if (Regions[CurRegion].first->getParent() !=
MBB)
1115 auto LiveInIt = MBBLiveIns.find(
MBB);
1116 auto &Rgn = Regions[CurRegion];
1118 if (LiveInIt != MBBLiveIns.end()) {
1119 auto LiveIn = std::move(LiveInIt->second);
1121 MBBLiveIns.erase(LiveInIt);
1124 auto LRS = BBLiveInMap.lookup(NonDbgMI);
1125#ifdef EXPENSIVE_CHECKS
1134 if (Regions[CurRegion].first ==
I || NonDbgMI ==
I) {
1135 LiveIns[CurRegion] =
RPTracker.getLiveRegs();
1139 if (Regions[CurRegion].second ==
I) {
1140 Pressure[CurRegion] =
RPTracker.moveMaxPressure();
1141 if (CurRegion-- == RegionIdx)
1143 auto &Rgn = Regions[CurRegion];
1156 MBBLiveIns[OnlySucc] =
RPTracker.moveLiveRegs();
1161GCNScheduleDAGMILive::getRegionLiveInMap()
const {
1162 assert(!Regions.empty());
1163 std::vector<MachineInstr *> RegionFirstMIs;
1164 RegionFirstMIs.reserve(Regions.size());
1166 RegionFirstMIs.push_back(
1173GCNScheduleDAGMILive::getRegionLiveOutMap()
const {
1174 assert(!Regions.empty());
1175 std::vector<MachineInstr *> RegionLastMIs;
1176 RegionLastMIs.reserve(Regions.size());
1187 IdxToInstruction.clear();
1190 IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();
1191 for (
unsigned I = 0;
I < DAG->Regions.size();
I++) {
1192 auto &[RegionBegin, RegionEnd] = DAG->Regions[
I];
1194 if (RegionBegin == RegionEnd)
1198 IdxToInstruction[
I] = RegionKey;
1206 LiveIns.resize(Regions.size());
1207 Pressure.resize(Regions.size());
1208 RegionsWithHighRP.resize(Regions.size());
1209 RegionsWithExcessRP.resize(Regions.size());
1210 RegionsWithIGLPInstrs.resize(Regions.size());
1211 RegionsWithHighRP.reset();
1212 RegionsWithExcessRP.reset();
1213 RegionsWithIGLPInstrs.reset();
1218void GCNScheduleDAGMILive::runSchedStages() {
1219 LLVM_DEBUG(
dbgs() <<
"All regions recorded, starting actual scheduling.\n");
1222 if (!Regions.
empty()) {
1223 BBLiveInMap = getRegionLiveInMap();
1228#ifdef DUMP_MAX_REG_PRESSURE
1238 if (!Stage->initGCNSchedStage())
1241 for (
auto Region : Regions) {
1245 if (!Stage->initGCNRegion()) {
1246 Stage->advanceRegion();
1252 const unsigned RegionIdx = Stage->getRegionIdx();
1255 MRI, RegionLiveOuts.getLiveRegsForRegionIdx(RegionIdx));
1259 Stage->finalizeGCNRegion();
1260 Stage->advanceRegion();
1264 Stage->finalizeGCNSchedStage();
1267#ifdef DUMP_MAX_REG_PRESSURE
1280 OS <<
"Max Occupancy Initial Schedule";
1283 OS <<
"Instruction Rewriting Reschedule";
1286 OS <<
"Unclustered High Register Pressure Reschedule";
1289 OS <<
"Clustered Low Occupancy Reschedule";
1292 OS <<
"Pre-RA Rematerialize";
1295 OS <<
"Max ILP Initial Schedule";
1298 OS <<
"Max memory clause Initial Schedule";
1318void RewriteMFMAFormStage::findReachingDefs(
1340 while (!Worklist.
empty()) {
1355 for (MachineBasicBlock *PredMBB : DefMBB->
predecessors()) {
1356 if (Visited.
insert(PredMBB).second)
1362void RewriteMFMAFormStage::findReachingUses(
1366 for (MachineOperand &UseMO :
1369 findReachingDefs(UseMO, LIS, ReachingDefIndexes);
1373 if (
any_of(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
1385 if (!
ST.hasGFX90AInsts() ||
MFI.getMinWavesPerEU() > 1)
1388 RegionsWithExcessArchVGPR.resize(
DAG.Regions.size());
1389 RegionsWithExcessArchVGPR.reset();
1393 RegionsWithExcessArchVGPR[
Region] =
true;
1396 if (RegionsWithExcessArchVGPR.none())
1399 TII =
ST.getInstrInfo();
1400 SRI =
ST.getRegisterInfo();
1402 std::vector<std::pair<MachineInstr *, unsigned>> RewriteCands;
1406 if (!initHeuristics(RewriteCands, CopyForUse, CopyForDef))
1409 int64_t
Cost = getRewriteCost(RewriteCands, CopyForUse, CopyForDef);
1416 return rewrite(RewriteCands);
1426 if (
DAG.RegionsWithHighRP.none() &&
DAG.RegionsWithExcessRP.none())
1433 InitialOccupancy =
DAG.MinOccupancy;
1436 TempTargetOccupancy =
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy
1437 ? InitialOccupancy + 1
1439 IsAnyRegionScheduled =
false;
1440 S.SGPRLimitBias =
S.HighRPSGPRBias;
1441 S.VGPRLimitBias =
S.HighRPVGPRBias;
1445 <<
"Retrying function scheduling without clustering. "
1446 "Aggressively try to reduce register pressure to achieve occupancy "
1447 << TempTargetOccupancy <<
".\n");
1462 if (
DAG.StartingOccupancy <=
DAG.MinOccupancy)
1466 dbgs() <<
"Retrying function scheduling with lowest recorded occupancy "
1467 <<
DAG.MinOccupancy <<
".\n");
1472#define REMAT_PREFIX "[PreRARemat] "
1473#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
1475#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1476Printable PreRARematStage::ScoredRemat::print()
const {
1478 OS <<
'(' << MaxFreq <<
", " << FreqDiff <<
", " << RegionImpact <<
')';
1493 auto PrintTargetRegions = [&]() ->
void {
1494 if (TargetRegions.none()) {
1499 for (
unsigned I : TargetRegions.set_bits())
1506 dbgs() <<
"Analyzing ";
1507 MF.getFunction().printAsOperand(
dbgs(),
false);
1510 if (!setObjective()) {
1511 LLVM_DEBUG(
dbgs() <<
"no objective to achieve, occupancy is maximal at "
1512 <<
MFI.getMaxWavesPerEU() <<
'\n');
1517 dbgs() <<
"increase occupancy from " << *TargetOcc - 1 <<
'\n';
1519 dbgs() <<
"reduce spilling (minimum target occupancy is "
1520 <<
MFI.getMinWavesPerEU() <<
")\n";
1522 PrintTargetRegions();
1527 DAG.RegionLiveOuts.buildLiveRegMap();
1529 if (!Remater.analyze()) {
1543 for (
unsigned RegIdx = 0, E = Remater.getNumRegs(); RegIdx < E; ++RegIdx) {
1547 unsigned NumUsers = 0;
1548 for (
const auto &[
_, RegionUses] : CandReg.
Uses)
1549 NumUsers += RegionUses.size();
1563 return MarkedRegs.contains(MO.getReg());
1570 SlotIndex UseIdx =
DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(
true);
1577 Cand.init(RegIdx, FreqInfo, Remater,
DAG);
1578 Cand.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc);
1579 if (!Cand.hasNullScore())
1590 Rollback = std::make_unique<RollbackSupport>(Remater);
1597 RecomputeRP.
reset();
1600 sort(CandidateOrder, [&](
unsigned LHSIndex,
unsigned RHSIndex) {
1601 return Candidates[LHSIndex] < Candidates[RHSIndex];
1605 dbgs() <<
"==== NEW REMAT ROUND ====\n"
1607 <<
"Candidates with non-null score, in rematerialization order:\n";
1608 for (
const ScoredRemat &Cand :
reverse(Candidates)) {
1610 << Remater.printRematReg(Cand.RegIdx) <<
'\n';
1612 PrintTargetRegions();
1618 while (!CandidateOrder.
empty()) {
1619 const ScoredRemat &Cand = Candidates[CandidateOrder.
back()];
1628 if (!Cand.maybeBeneficial(TargetRegions, RPTargets)) {
1630 << Cand.print() <<
" | "
1631 << Remater.printRematReg(Cand.RegIdx));
1636#ifdef EXPENSIVE_CHECKS
1640 if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
1650 LM =
DAG.TRI->getSubRegIndexLaneMask(MO.getSubReg());
1652 const unsigned UseRegion = Reg.Uses.begin()->first;
1654 LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
1658 if (UncoveredLanes.
any()) {
1661 assert((SR.LaneMask & UncoveredLanes).none());
1668 REMAT_DEBUG(
dbgs() <<
"** REMAT " << Remater.printRematReg(Cand.RegIdx)
1670 removeFromLiveMaps(Reg.getDefReg(), Cand.LiveIn, Cand.LiveOut);
1672 Rollback->LiveMapUpdates.emplace_back(Cand.RegIdx, Cand.LiveIn,
1675 Cand.rematerialize(Remater);
1680 updateRPTargets(Cand.Live, Cand.RPSave);
1681 RecomputeRP |= Cand.UnpredictableRPSave;
1682 RescheduleRegions |= Cand.Live;
1683 if (!TargetRegions.any()) {
1689 if (!updateAndVerifyRPTargets(RecomputeRP) && !TargetRegions.any()) {
1698 unsigned NumUsefulCandidates = 0;
1699 for (
unsigned CandIdx : CandidateOrder) {
1700 ScoredRemat &Candidate = Candidates[CandIdx];
1701 Candidate.update(TargetRegions, RPTargets, FreqInfo, !TargetOcc);
1702 if (!Candidate.hasNullScore())
1703 CandidateOrder[NumUsefulCandidates++] = CandIdx;
1705 if (NumUsefulCandidates == 0) {
1706 REMAT_DEBUG(
dbgs() <<
"Stop on exhausted rematerialization candidates\n");
1709 CandidateOrder.truncate(NumUsefulCandidates);
1712 if (RescheduleRegions.none())
1718 unsigned DynamicVGPRBlockSize =
MFI.getDynamicVGPRBlockSize();
1719 for (
unsigned I : RescheduleRegions.set_bits()) {
1720 DAG.Pressure[
I] = RPTargets[
I].getCurrentRP();
1722 <<
DAG.Pressure[
I].getOccupancy(
ST, DynamicVGPRBlockSize)
1723 <<
" (" << RPTargets[
I] <<
")\n");
1725 AchievedOcc =
MFI.getMaxWavesPerEU();
1728 std::min(AchievedOcc, RP.getOccupancy(
ST, DynamicVGPRBlockSize));
1732 dbgs() <<
"Retrying function scheduling with new min. occupancy of "
1733 << AchievedOcc <<
" from rematerializing (original was "
1734 <<
DAG.MinOccupancy;
1736 dbgs() <<
", target was " << *TargetOcc;
1740 DAG.setTargetOccupancy(getStageTargetOccupancy());
1751 S.SGPRLimitBias =
S.VGPRLimitBias = 0;
1752 if (
DAG.MinOccupancy > InitialOccupancy) {
1753 assert(IsAnyRegionScheduled);
1755 <<
" stage successfully increased occupancy to "
1756 <<
DAG.MinOccupancy <<
'\n');
1757 }
else if (!IsAnyRegionScheduled) {
1758 assert(
DAG.MinOccupancy == InitialOccupancy);
1760 <<
": No regions scheduled, min occupancy stays at "
1761 <<
DAG.MinOccupancy <<
", MFI occupancy stays at "
1762 <<
MFI.getOccupancy() <<
".\n");
1770 if (
DAG.begin() ==
DAG.end())
1777 unsigned NumRegionInstrs = std::distance(
DAG.begin(),
DAG.end());
1781 if (
DAG.begin() == std::prev(
DAG.end()))
1787 <<
"\n From: " << *
DAG.begin() <<
" To: ";
1789 else dbgs() <<
"End";
1790 dbgs() <<
" RegionInstrs: " << NumRegionInstrs <<
'\n');
1798 for (
auto &
I :
DAG) {
1811 dbgs() <<
"Pressure before scheduling:\nRegion live-ins:"
1813 <<
"Region live-in pressure: "
1817 S.HasHighPressure =
false;
1839 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1842 unsigned CurrentTargetOccupancy =
1843 IsAnyRegionScheduled ?
DAG.MinOccupancy : TempTargetOccupancy;
1845 (CurrentTargetOccupancy <= InitialOccupancy ||
1846 DAG.Pressure[
RegionIdx].getOccupancy(
ST, DynamicVGPRBlockSize) !=
1853 if (!IsAnyRegionScheduled && IsSchedulingThisRegion) {
1854 IsAnyRegionScheduled =
true;
1855 if (
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy)
1856 DAG.setTargetOccupancy(TempTargetOccupancy);
1858 return IsSchedulingThisRegion;
1874 return !RevertAllRegions && RescheduleRegions[
RegionIdx] &&
1894 if (
S.HasHighPressure)
1915 if (
DAG.MinOccupancy < *TargetOcc) {
1917 <<
" cannot meet occupancy target, interrupting "
1918 "re-scheduling in all regions\n");
1919 RevertAllRegions =
true;
1930 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1941 unsigned TargetOccupancy = std::min(
1942 S.getTargetOccupancy(),
ST.getOccupancyWithWorkGroupSizes(
MF).second);
1943 unsigned WavesAfter = std::min(
1944 TargetOccupancy,
PressureAfter.getOccupancy(
ST, DynamicVGPRBlockSize));
1945 unsigned WavesBefore = std::min(
1947 LLVM_DEBUG(
dbgs() <<
"Occupancy before scheduling: " << WavesBefore
1948 <<
", after " << WavesAfter <<
".\n");
1954 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
1958 if (WavesAfter < WavesBefore && WavesAfter <
DAG.MinOccupancy &&
1959 WavesAfter >=
MFI.getMinAllowedOccupancy()) {
1960 LLVM_DEBUG(
dbgs() <<
"Function is memory bound, allow occupancy drop up to "
1961 <<
MFI.getMinAllowedOccupancy() <<
" waves\n");
1962 NewOccupancy = WavesAfter;
1965 if (NewOccupancy <
DAG.MinOccupancy) {
1966 DAG.MinOccupancy = NewOccupancy;
1967 MFI.limitOccupancy(
DAG.MinOccupancy);
1969 <<
DAG.MinOccupancy <<
".\n");
1973 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
MF);
1976 unsigned MaxArchVGPRs = std::min(MaxVGPRs,
ST.getAddressableNumArchVGPRs());
1977 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
MF);
2001 unsigned ReadyCycle = CurrCycle;
2002 for (
auto &
D : SU.
Preds) {
2003 if (
D.isAssignedRegDep()) {
2006 unsigned DefReady = ReadyCycles[
DAG.getSUnit(
DefMI)->NodeNum];
2007 ReadyCycle = std::max(ReadyCycle, DefReady +
Latency);
2010 ReadyCycles[SU.
NodeNum] = ReadyCycle;
2017 std::pair<MachineInstr *, unsigned>
B)
const {
2018 return A.second <
B.second;
2024 if (ReadyCycles.empty())
2026 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
2027 dbgs() <<
"\n################## Schedule time ReadyCycles for MBB : " << BBNum
2028 <<
" ##################\n# Cycle #\t\t\tInstruction "
2032 for (
auto &
I : ReadyCycles) {
2033 if (
I.second > IPrev + 1)
2034 dbgs() <<
"****************************** BUBBLE OF " <<
I.second - IPrev
2035 <<
" CYCLES DETECTED ******************************\n\n";
2036 dbgs() <<
"[ " <<
I.second <<
" ] : " << *
I.first <<
"\n";
2049 unsigned SumBubbles = 0;
2051 unsigned CurrCycle = 0;
2052 for (
auto &SU : InputSchedule) {
2053 unsigned ReadyCycle =
2055 SumBubbles += ReadyCycle - CurrCycle;
2057 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
2059 CurrCycle = ++ReadyCycle;
2082 unsigned SumBubbles = 0;
2084 unsigned CurrCycle = 0;
2085 for (
auto &
MI :
DAG) {
2089 unsigned ReadyCycle =
2091 SumBubbles += ReadyCycle - CurrCycle;
2093 ReadyCyclesSorted.insert(std::make_pair(SU->
getInstr(), ReadyCycle));
2095 CurrCycle = ++ReadyCycle;
2112 if (WavesAfter <
DAG.MinOccupancy)
2116 if (
DAG.MFI.isDynamicVGPREnabled()) {
2118 ST,
DAG.MFI.getDynamicVGPRBlockSize(),
2122 if (BlocksAfter > BlocksBefore)
2159 <<
"\n\t *** In shouldRevertScheduling ***\n"
2160 <<
" *********** BEFORE UnclusteredHighRPStage ***********\n");
2164 <<
"\n *********** AFTER UnclusteredHighRPStage ***********\n");
2166 unsigned OldMetric = MBefore.
getMetric();
2167 unsigned NewMetric = MAfter.
getMetric();
2168 unsigned WavesBefore = std::min(
2169 S.getTargetOccupancy(),
2176 LLVM_DEBUG(
dbgs() <<
"\tMetric before " << MBefore <<
"\tMetric after "
2177 << MAfter <<
"Profit: " << Profit <<
"\n");
2208 unsigned WavesAfter) {
2215 LLVM_DEBUG(
dbgs() <<
"New pressure will result in more spilling.\n");
2227 "instruction number mismatch");
2228 if (MIOrder.
empty())
2241 if (MII != RegionEnd) {
2243 bool NonDebugReordered =
2244 !
MI->isDebugInstr() &&
2250 if (NonDebugReordered)
2251 DAG.LIS->handleMove(*
MI,
true);
2258 if (!
MI->isDebugInstr()) {
2260 SlotIndex PrevIdx =
DAG.LIS->getSlotIndexes()->getIndexBefore(*
MI);
2261 if (PrevIdx >= MIIdx)
2262 DAG.LIS->handleMove(*
MI,
true);
2266 if (
MI->isDebugInstr()) {
2273 Op.setIsUndef(
false);
2276 if (
DAG.ShouldTrackLaneMasks) {
2278 SlotIndex SlotIdx =
DAG.LIS->getInstructionIndex(*MI).getRegSlot();
2302 if (RD->
getOpcode() == AMDGPU::AV_MOV_B32_IMM_PSEUDO ||
2303 RD->
getOpcode() == AMDGPU::AV_MOV_B64_IMM_PSEUDO)
2310bool RewriteMFMAFormStage::hasUseRequiringVGPR(
2312 const SmallPtrSetImpl<MachineInstr *> &RewriteSet) {
2313 for (SlotIndex RDIdx : Src2ReachingDefs) {
2314 const MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIdx);
2316 findReachingUses(RD,
DAG.LIS, ReachingUses);
2317 for (
const MachineOperand *UseMO : ReachingUses) {
2329void RewriteMFMAFormStage::resetRewriteCandsToVGPR(
2330 ArrayRef<std::pair<MachineInstr *, unsigned>> RewriteCands) {
2331 for (
auto [
MI, OriginalOpcode] : RewriteCands) {
2333 const TargetRegisterClass *ADefRC =
2334 DAG.MRI.getRegClass(
MI->getOperand(0).getReg());
2335 const TargetRegisterClass *VDefRC = SRI->getEquivalentVGPRClass(ADefRC);
2336 DAG.MRI.setRegClass(
MI->getOperand(0).getReg(), VDefRC);
2337 MI->setDesc(
TII->get(OriginalOpcode));
2339 MachineOperand *Src2 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
2346 const TargetRegisterClass *AUseRC =
DAG.MRI.getRegClass(Src2->
getReg());
2347 const TargetRegisterClass *VUseRC = SRI->getEquivalentVGPRClass(AUseRC);
2348 DAG.MRI.setRegClass(Src2->
getReg(), VUseRC);
2352bool RewriteMFMAFormStage::isRewriteCandidate(MachineInstr *
MI)
const {
2353 if (!
static_cast<const SIInstrInfo *
>(
DAG.TII)->isMAI(*
MI))
2358 Register DstReg =
MI->getOperand(0).getReg();
2359 for (
const MachineOperand &Use :
DAG.MRI.use_nodbg_operands(DstReg)) {
2360 if (!
TII->isMAI(*
Use.getParent()) && !
Use.getParent()->isCopy())
2366bool RewriteMFMAFormStage::initHeuristics(
2367 std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
2368 DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
2369 SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
2374 SmallPtrSet<MachineInstr *, 16> RewriteSet;
2375 DenseSet<Register> CandSrc2Regs;
2376 for (MachineBasicBlock &
MBB :
MF) {
2377 for (MachineInstr &
MI :
MBB) {
2378 if (!isRewriteCandidate(&
MI))
2381 MachineOperand *Src2 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src2);
2382 if (Src2 && Src2->
isReg())
2388 for (MachineBasicBlock &
MBB :
MF) {
2389 for (MachineInstr &
MI :
MBB) {
2390 if (!isRewriteCandidate(&
MI))
2394 assert(ReplacementOp != -1);
2396 RewriteCands.push_back({&
MI,
MI.getOpcode()});
2397 MI.setDesc(
TII->get(ReplacementOp));
2399 MachineOperand *Src2 =
TII->getNamedOperand(
MI, AMDGPU::OpName::src2);
2400 if (Src2->
isReg()) {
2402 findReachingDefs(*Src2,
DAG.LIS, Src2ReachingDefs);
2406 bool Src2NeedsVGPR = hasUseRequiringVGPR(Src2ReachingDefs, RewriteSet);
2407 Src2NeedsVGPRCache[&
MI] = Src2NeedsVGPR;
2409 for (SlotIndex RDIdx : Src2ReachingDefs) {
2410 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIdx);
2411 if (!Src2NeedsVGPR &&
2418 MachineOperand &Dst =
MI.getOperand(0);
2421 findReachingUses(&
MI,
DAG.LIS, DstReachingUses);
2423 for (MachineOperand *RUOp : DstReachingUses) {
2424 MachineInstr *UserMI = RUOp->getParent();
2426 if (
TII->isMAI(*UserMI) && RewriteSet.
contains(UserMI))
2432 CopyForUse[UserMI->
getParent()].insert(RUOp->getReg());
2434 if (
TII->isMAI(*UserMI))
2438 findReachingDefs(*RUOp,
DAG.LIS, DstUsesReachingDefs);
2440 for (SlotIndex RDIndex : DstUsesReachingDefs) {
2441 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2442 if (
TII->isMAI(*RD))
2454 const TargetRegisterClass *VDefRC =
DAG.MRI.getRegClass(Dst.getReg());
2455 const TargetRegisterClass *ADefRC = SRI->getEquivalentAGPRClass(VDefRC);
2456 DAG.MRI.setRegClass(Dst.getReg(), ADefRC);
2457 if (Src2->
isReg()) {
2461 const TargetRegisterClass *VUseRC =
DAG.MRI.getRegClass(Src2->
getReg());
2462 const TargetRegisterClass *AUseRC = SRI->getEquivalentAGPRClass(VUseRC);
2463 DAG.MRI.setRegClass(Src2->
getReg(), AUseRC);
2472int64_t RewriteMFMAFormStage::getRewriteCost(
2473 ArrayRef<std::pair<MachineInstr *, unsigned>> RewriteCands,
2474 const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
2475 const SmallPtrSetImpl<MachineInstr *> &CopyForDef) {
2476 MachineBlockFrequencyInfo *MBFI =
DAG.MBFI;
2478 int64_t BestSpillCost = 0;
2482 std::pair<unsigned, unsigned> MaxVectorRegs =
2483 ST.getMaxNumVectorRegs(
MF.getFunction());
2484 unsigned ArchVGPRThreshold = MaxVectorRegs.first;
2485 unsigned AGPRThreshold = MaxVectorRegs.second;
2486 unsigned CombinedThreshold =
ST.getMaxNumVGPRs(
MF);
2489 if (!RegionsWithExcessArchVGPR[Region])
2494 MF, ArchVGPRThreshold, AGPRThreshold, CombinedThreshold);
2502 MF, ArchVGPRThreshold, AGPRThreshold, CombinedThreshold);
2504 uint64_t BlockFreq =
2508 bool RelativeFreqIsDenom = EntryFreq > BlockFreq;
2509 uint64_t RelativeFreq = EntryFreq && BlockFreq
2510 ? (RelativeFreqIsDenom ? EntryFreq / BlockFreq
2511 : BlockFreq / EntryFreq)
2516 int64_t SpillCost = ((int)SpillCostAfter - (int)SpillCostBefore) * 2;
2519 if (RelativeFreqIsDenom)
2520 SpillCost /= (int64_t)RelativeFreq;
2522 SpillCost *= (int64_t)RelativeFreq;
2525 if (SpillCost > 0) {
2526 resetRewriteCandsToVGPR(RewriteCands);
2530 if (SpillCost < BestSpillCost)
2531 BestSpillCost = SpillCost;
2536 Cost = BestSpillCost;
2539 unsigned CopyCost = 0;
2543 for (MachineInstr *
DefMI : CopyForDef) {
2550 const TargetRegisterClass *RC =
DAG.MRI.getRegClass(DefReg);
2555 for (
auto &[UseBlock, UseRegs] : CopyForUse) {
2560 const TargetRegisterClass *RC =
DAG.MRI.getRegClass(
UseReg);
2569 resetRewriteCandsToVGPR(RewriteCands);
2571 return Cost + CopyCost;
2574bool RewriteMFMAFormStage::rewrite(
2575 ArrayRef<std::pair<MachineInstr *, unsigned>> RewriteCands) {
2576 DenseMap<MachineInstr *, unsigned> FirstMIToRegion;
2577 DenseMap<MachineInstr *, unsigned> LastMIToRegion;
2585 if (
Entry.second !=
Entry.first->getParent()->end())
2628 DenseSet<Register> RewriteRegs;
2631 DenseMap<Register, Register> RedefMap;
2633 DenseMap<Register, DenseSet<MachineOperand *>>
ReplaceMap;
2635 DenseMap<Register, SmallPtrSet<MachineInstr *, 8>> ReachingDefCopyMap;
2638 DenseMap<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>
2643 SmallPtrSet<MachineInstr *, 16> RewriteCandsSet;
2644 DenseSet<Register> RewriteSrc2Regs;
2645 for (
auto &[
MI, OriginalOpcode] : RewriteCands) {
2647 MachineOperand *Src2 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
2648 if (Src2 && Src2->
isReg())
2652 for (
auto &[
MI, OriginalOpcode] : RewriteCands) {
2654 if (ReplacementOp == -1)
2656 MI->setDesc(
TII->get(ReplacementOp));
2659 MachineOperand *Src2 =
TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
2660 if (Src2->
isReg()) {
2667 findReachingDefs(*Src2,
DAG.LIS, Src2ReachingDefs);
2668 SmallSetVector<MachineInstr *, 8> Src2DefsReplace;
2672 bool Src2NeedsVGPR = Src2NeedsVGPRCache.lookup(
MI);
2674 for (SlotIndex RDIndex : Src2ReachingDefs) {
2675 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2676 if (!Src2NeedsVGPR &&
2680 Src2DefsReplace.
insert(RD);
2683 if (!Src2DefsReplace.
empty()) {
2684 auto RI = RedefMap.
find(Src2Reg);
2685 if (RI != RedefMap.
end()) {
2686 MappedReg = RI->second;
2689 const TargetRegisterClass *Src2RC =
DAG.MRI.getRegClass(Src2Reg);
2690 const TargetRegisterClass *VGPRRC =
2691 SRI->getEquivalentVGPRClass(Src2RC);
2694 MappedReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2695 RedefMap[Src2Reg] = MappedReg;
2700 for (MachineInstr *RD : Src2DefsReplace) {
2702 if (ReachingDefCopyMap[Src2Reg].insert(RD).second) {
2703 MachineInstrBuilder VGPRCopy =
2706 .
addDef(MappedReg, {}, 0)
2707 .addUse(Src2Reg, {}, 0);
2708 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2713 unsigned UpdateRegion = LastMIToRegion[RD];
2714 DAG.Regions[UpdateRegion].second = VGPRCopy;
2715 LastMIToRegion.
erase(RD);
2722 RewriteRegs.
insert(Src2Reg);
2732 MachineOperand *Dst = &
MI->getOperand(0);
2741 SmallVector<MachineInstr *, 8> DstUseDefsReplace;
2743 findReachingUses(
MI,
DAG.LIS, DstReachingUses);
2745 for (MachineOperand *RUOp : DstReachingUses) {
2746 MachineInstr *UserMI = RUOp->
getParent();
2748 if (
TII->isMAI(*UserMI) && RewriteCandsSet.
contains(UserMI))
2752 if (
find(DstReachingUseCopies, RUOp) == DstReachingUseCopies.
end())
2756 if (
TII->isMAI(*UserMI))
2760 findReachingDefs(*RUOp,
DAG.LIS, DstUsesReachingDefs);
2762 for (SlotIndex RDIndex : DstUsesReachingDefs) {
2763 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2764 if (
TII->isMAI(*RD))
2769 if (
find(DstUseDefsReplace, RD) == DstUseDefsReplace.
end())
2774 if (!DstUseDefsReplace.
empty()) {
2775 auto RI = RedefMap.
find(DstReg);
2776 if (RI != RedefMap.
end()) {
2777 MappedReg = RI->second;
2780 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(DstReg);
2781 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2784 MappedReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2785 RedefMap[DstReg] = MappedReg;
2790 for (MachineInstr *RD : DstUseDefsReplace) {
2792 if (ReachingDefCopyMap[DstReg].insert(RD).second) {
2793 MachineInstrBuilder VGPRCopy =
2796 .
addDef(MappedReg, {}, 0)
2797 .addUse(DstReg, {}, 0);
2798 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2802 auto LMI = LastMIToRegion.
find(RD);
2803 if (LMI != LastMIToRegion.
end()) {
2804 unsigned UpdateRegion = LMI->second;
2805 DAG.Regions[UpdateRegion].second = VGPRCopy;
2806 LastMIToRegion.
erase(RD);
2812 DenseSet<MachineOperand *> &DstRegSet =
ReplaceMap[DstReg];
2813 for (MachineOperand *RU : DstReachingUseCopies) {
2814 MachineBasicBlock *RUBlock = RU->getParent()->getParent();
2817 if (RUBlock !=
MI->getParent()) {
2824 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(DstReg);
2825 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2826 Register NewUseReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2827 MachineInstr *UseInst = RU->getParent();
2828 MachineInstrBuilder VGPRCopy =
2831 .
addDef(NewUseReg, {}, 0)
2832 .addUse(DstReg, {}, 0);
2833 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2836 RU->setReg(NewUseReg);
2842 RewriteRegs.
insert(DstReg);
2852 std::pair<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>;
2853 for (RUBType RUBlockEntry : ReachingUseTracker) {
2854 using RUDType = std::pair<Register, SmallPtrSet<MachineOperand *, 8>>;
2855 for (RUDType RUDst : RUBlockEntry.second) {
2856 MachineOperand *OpBegin = *RUDst.second.begin();
2857 SlotIndex InstPt =
DAG.LIS->getInstructionIndex(*OpBegin->
getParent());
2860 for (MachineOperand *User : RUDst.second) {
2861 SlotIndex NewInstPt =
DAG.LIS->getInstructionIndex(*
User->getParent());
2866 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(RUDst.first);
2867 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2868 Register NewUseReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2869 MachineInstr *UseInst =
DAG.LIS->getInstructionFromIndex(InstPt);
2871 MachineInstrBuilder VGPRCopy =
2874 .
addDef(NewUseReg, {}, 0)
2875 .addUse(RUDst.first, {}, 0);
2876 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2880 auto FI = FirstMIToRegion.
find(UseInst);
2881 if (FI != FirstMIToRegion.
end()) {
2882 unsigned UpdateRegion = FI->second;
2883 DAG.Regions[UpdateRegion].first = VGPRCopy;
2884 FirstMIToRegion.
erase(UseInst);
2888 for (MachineOperand *User : RUDst.second) {
2889 User->setReg(NewUseReg);
2900 for (std::pair<Register, Register> NewDef : RedefMap) {
2905 for (MachineOperand *ReplaceOp :
ReplaceMap[OldReg])
2906 ReplaceOp->setReg(NewReg);
2910 for (
Register RewriteReg : RewriteRegs) {
2911 Register RegToRewrite = RewriteReg;
2914 auto RI = RedefMap.find(RewriteReg);
2915 if (RI != RedefMap.end())
2916 RegToRewrite = RI->second;
2918 const TargetRegisterClass *CurrRC =
DAG.MRI.getRegClass(RegToRewrite);
2919 const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(CurrRC);
2921 DAG.MRI.setRegClass(RegToRewrite, AGPRRC);
2925 DAG.LIS->reanalyze(
DAG.MF);
2927 RegionPressureMap LiveInUpdater(&
DAG,
false);
2928 LiveInUpdater.buildLiveRegMap();
2931 DAG.LiveIns[Region] = LiveInUpdater.getLiveRegsForRegionIdx(Region);
2938unsigned PreRARematStage::getStageTargetOccupancy()
const {
2939 return TargetOcc ? *TargetOcc :
MFI.getMinWavesPerEU();
2942bool PreRARematStage::setObjective() {
2946 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
F);
2947 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
F);
2948 bool HasVectorRegisterExcess =
false;
2949 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2950 const GCNRegPressure &
RP =
DAG.Pressure[
I];
2951 GCNRPTarget &
Target = RPTargets.emplace_back(MaxSGPRs, MaxVGPRs,
MF, RP);
2953 TargetRegions.set(
I);
2954 HasVectorRegisterExcess |=
Target.hasVectorRegisterExcess();
2957 if (HasVectorRegisterExcess ||
DAG.MinOccupancy >=
MFI.getMaxWavesPerEU()) {
2960 TargetOcc = std::nullopt;
2964 TargetOcc =
DAG.MinOccupancy + 1;
2965 const unsigned VGPRBlockSize =
MFI.getDynamicVGPRBlockSize();
2966 MaxSGPRs =
ST.getMaxNumSGPRs(*TargetOcc,
false);
2967 MaxVGPRs =
ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize);
2968 for (
auto [
I, Target] :
enumerate(RPTargets)) {
2969 Target.setTarget(MaxSGPRs, MaxVGPRs);
2971 TargetRegions.set(
I);
2975 return TargetRegions.any();
2978bool PreRARematStage::ScoredRemat::maybeBeneficial(
2980 for (
unsigned I : TargetRegions.set_bits()) {
2981 if (Live[
I] && RPTargets[
I].isSaveBeneficial(RPSave))
2989 assert(
DAG.MLI &&
"MLI not defined in DAG");
2993 const unsigned NumRegions =
DAG.Regions.size();
2997 for (
unsigned I = 0;
I < NumRegions; ++
I) {
3001 if (BlockFreq && BlockFreq <
MinFreq)
3010 if (
MinFreq >= ScaleFactor * ScaleFactor) {
3012 Freq /= ScaleFactor;
3018void PreRARematStage::ScoredRemat::init(RegisterIdx RegIdx,
3022 this->RegIdx = RegIdx;
3023 const unsigned NumRegions =
DAG.Regions.size();
3024 LiveIn.resize(NumRegions);
3025 LiveOut.resize(NumRegions);
3026 Live.resize(NumRegions);
3027 UnpredictableRPSave.resize(NumRegions);
3031 assert(Reg.Uses.size() == 1 &&
"expected users in single region");
3032 const unsigned UseRegion = Reg.Uses.begin()->first;
3035 for (
unsigned I = 0, E = NumRegions;
I != E; ++
I) {
3036 if (
DAG.LiveIns[
I].contains(DefReg))
3038 if (
DAG.RegionLiveOuts.getLiveRegsForRegionIdx(
I).contains(DefReg))
3043 if (!LiveIn[
I] || !LiveOut[
I] ||
I == UseRegion)
3044 UnpredictableRPSave.set(
I);
3053 int64_t DefOrMin = std::max(Freq.
Regions[Reg.DefRegion], Freq.
MinFreq);
3054 int64_t UseOrMax = Freq.
Regions[UseRegion];
3057 FreqDiff = DefOrMin - UseOrMax;
3060void PreRARematStage::ScoredRemat::update(
const BitVector &TargetRegions,
3062 const FreqInfo &FreqInfo,
3066 for (
unsigned I : TargetRegions.
set_bits()) {
3075 if (!NumRegsBenefit)
3079 RegionImpact += (UnpredictableRPSave[
I] ? 1 : 2) * NumRegsBenefit;
3083 if (UnpredictableRPSave[
I]) {
3088 MaxFreq = std::max(MaxFreq, Freq);
3093void PreRARematStage::ScoredRemat::rematerialize(
3094 Rematerializer &Remater)
const {
3095 const Rematerializer::Reg &
Reg = Remater.getReg(RegIdx);
3096 Rematerializer::DependencyReuseInfo DRI;
3097 for (
const Rematerializer::Reg::Dependency &Dep :
Reg.Dependencies)
3099 unsigned UseRegion =
Reg.Uses.begin()->first;
3100 Remater.rematerializeToRegion(RegIdx, UseRegion, DRI);
3103void PreRARematStage::updateRPTargets(
const BitVector &Regions,
3104 const GCNRegPressure &RPSave) {
3106 RPTargets[
I].saveRP(RPSave);
3107 if (TargetRegions[
I] && RPTargets[
I].satisfied()) {
3109 TargetRegions.reset(
I);
3114bool PreRARematStage::updateAndVerifyRPTargets(
const BitVector &Regions) {
3115 bool TooOptimistic =
false;
3117 GCNRPTarget &
Target = RPTargets[
I];
3123 if (!TargetRegions[
I] && !
Target.satisfied()) {
3125 TooOptimistic =
true;
3126 TargetRegions.set(
I);
3129 return TooOptimistic;
3132void PreRARematStage::removeFromLiveMaps(
Register Reg,
const BitVector &LiveIn,
3133 const BitVector &LiveOut) {
3135 LiveOut.
size() ==
DAG.Regions.size() &&
"region num mismatch");
3139 DAG.RegionLiveOuts.getLiveRegsForRegionIdx(
I).erase(
Reg);
3142void PreRARematStage::addToLiveMaps(
Register Reg, LaneBitmask Mask,
3143 const BitVector &LiveIn,
3144 const BitVector &LiveOut) {
3146 LiveOut.
size() ==
DAG.Regions.size() &&
"region num mismatch");
3147 std::pair<Register, LaneBitmask> LiveReg(
Reg, Mask);
3149 DAG.LiveIns[
I].insert(LiveReg);
3151 DAG.RegionLiveOuts.getLiveRegsForRegionIdx(
I).insert(LiveReg);
3163 if (
DAG.MinOccupancy >= *TargetOcc)
3167 for (
const auto &[
RegionIdx, OrigMIOrder, MaxPressure] : RegionReverts) {
3177 if (AchievedOcc >= *TargetOcc) {
3178 DAG.setTargetOccupancy(AchievedOcc);
3183 DAG.setTargetOccupancy(*TargetOcc - 1);
3188 assert(Rollback &&
"rollbacker should be defined");
3189 Rollback->Listener.rollback(Remater);
3190 for (
const auto &[RegIdx, LiveIn, LiveOut] : Rollback->LiveMapUpdates) {
3191 const Rematerializer::Reg &
Reg = Remater.getReg(RegIdx);
3192 addToLiveMaps(
Reg.getDefReg(),
Reg.Mask, LiveIn, LiveOut);
3195#ifdef EXPENSIVE_CHECKS
3200 for (
unsigned I : RescheduleRegions.set_bits())
3201 DAG.Pressure[
I] =
DAG.getRealRegPressure(
I);
3206void GCNScheduleDAGMILive::setTargetOccupancy(
unsigned TargetOccupancy) {
3207 MinOccupancy = TargetOccupancy;
3208 if (
MFI.getOccupancy() < TargetOccupancy)
3209 MFI.increaseOccupancy(
MF, MinOccupancy);
3211 MFI.limitOccupancy(MinOccupancy);
3228 if (HasIGLPInstrs) {
3229 SavedMutations.clear();
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static SUnit * pickOnlyChoice(SchedBoundary &Zone)
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
static cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))
static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))
#define REMAT_PREFIX
Allows to easily filter for this stage's debug output.
static cl::opt< unsigned, false, VGPRThresholdParser > VGPRThresholdPercentOpt("amdgpu-vgpr-threshold-percent", cl::Hidden, cl::desc("Percent of VGPR limits that we should use as RP threshold " "during scheduling. We have two limits relevant to scheduling: " "Critical (avoid decreasing occupancy), Excess (avoid spilling). " "This flag scales both limits back by an equal percent: (0 = use " " default calculation, 1-100 = use percentage), default: 0"), cl::init(0))
static MachineInstr * getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)
static bool shouldCheckPending(SchedBoundary &Zone, const TargetSchedModel *SchedModel)
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static bool isReachingDefAGPRForm(MachineInstr *RD, const SmallPtrSetImpl< MachineInstr * > &RewriteSet, const DenseSet< Register > &CandSrc2Regs, const SIInstrInfo &TII)
Returns true if reaching def RD will be in AGPR form after the rewrite and so needs no bridge copy: a...
static cl::opt< bool > PrintMaxRPRegUsageAfterScheduler("amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling."), cl::init(false))
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
static cl::opt< bool > DisableRewriteMFMAFormSchedStage("amdgpu-disable-rewrite-mfma-form-sched-stage", cl::Hidden, cl::desc("Disable rewrite mfma rewrite scheduling stage"), cl::init(true))
static bool canUsePressureDiffs(const SUnit &SU)
Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
static cl::opt< unsigned > PendingQueueLimit("amdgpu-scheduler-pending-queue-limit", cl::Hidden, cl::desc("Max (Available+Pending) size to inspect pending queue (0 disables)"), cl::init(256))
static cl::opt< bool > PrintMaxRPRegUsageBeforeScheduler("amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling."), cl::init(false))
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
static constexpr std::pair< StringLiteral, StringLiteral > ReplaceMap[]
A common definition of LaneBitmask for use in TableGen and CodeGen.
static llvm::Error parse(GsymDataExtractor &Data, uint64_t BaseAddr, LineEntryCallback const &Callback)
Promote Memory to Register
static constexpr unsigned SM(unsigned Version)
MIR-level target-independent rematerialization helpers.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
Get the first element.
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
BitVector & reset()
Reset all bits in the bitvector.
iterator_range< const_set_bits_iterator > set_bits() const
size_type size() const
Returns the number of bits in this bitvector.
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
bool initGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
bool reset(const MachineInstr &MI, MachineBasicBlock::const_iterator End, const LiveRegSet *LiveRegs=nullptr)
Reset tracker to the point before the MI filling LiveRegs upon this point using LIS.
GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const
Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
Models a register pressure target, allowing to evaluate and track register savings against that targe...
unsigned getNumRegsBenefit(const GCNRegPressure &SaveRP) const
Returns the benefit towards achieving the RP target that saving SaveRP represents,...
GCNRegPressure getPressure() const
virtual bool initGCNRegion()
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
void modifyRegionSchedule(unsigned RegionIdx, ArrayRef< MachineInstr * > MIOrder)
Sets the schedule of region RegionIdx to MIOrder.
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
virtual void finalizeGCNRegion()
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineBasicBlock * CurrentMBB
This is a minimal scheduler strategy.
GCNDownwardRPTracker DownwardTracker
bool useGCNTrackers() const
void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
unsigned SGPRCriticalLimit
std::vector< unsigned > MaxPressure
bool hasNextStage() const
SUnit * pickNodeBidirectional(bool &IsTopNode, bool &PickedPending)
GCNSchedStageID getCurrentStage()
bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Evaluates instructions in the pending queue using a subset of scheduling heuristics.
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
unsigned VGPRCriticalLimit
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
std::optional< bool > GCNTrackersOverride
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &IsPending, bool IsBottomUp)
unsigned getStructuralStallCycles(SchedBoundary &Zone, SUnit *SU) const
Estimate how many cycles SU must wait due to structural hazards at the current boundary cycle.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
void recede(const MachineInstr &MI)
Move to the state of RP just before the MI .
void reset(const MachineInstr &MI)
Resets tracker to the point just after MI (in program order), which can be a debug instruction.
void traceCandidate(const SchedCandidate &Cand)
LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
const MachineSchedContext * Context
const TargetRegisterInfo * TRI
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
GenericScheduler(const MachineSchedContext *C)
bool shouldRevertScheduling(unsigned WavesAfter) override
A live range for subregisters.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
LLVM_ABI void dump() const
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
succ_iterator succ_begin()
unsigned succ_size() const
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate machine basic b...
LLVM_ABI BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
LLVM_ABI BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
filtered_mop_range all_uses()
Returns an iterator range over all operands that are (explicit or implicit) register uses.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
void finalizeGCNRegion() override
bool initGCNRegion() override
bool initGCNSchedStage() override
Capture a change in pressure for a single pressure set.
Simple wrapper around std::function<void(raw_ostream&)>.
Helpers for implementing custom MachineSchedStrategy classes.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void advance()
Advance across the current instruction.
LLVM_ABI void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
LLVM_ABI void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
MIR-level target-independent rematerializer.
bool isIGLPMutationOnly(unsigned Opcode) const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getOccupancy() const
unsigned getDynamicVGPRBlockSize() const
unsigned getMinAllowedOccupancy() const
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
unsigned NodeNum
Entry # of node in the node vector.
unsigned short Latency
Node latency.
bool isScheduled
True once scheduled.
unsigned ParentClusterIdx
The parent cluster id.
unsigned BotReadyCycle
Cycle relative to end when node is ready.
bool hasReservedResource
Uses a reserved resource.
bool isBottomReady() const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
LLVM_ABI void releasePending()
Release pending ready nodes in to the available queue.
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
LLVM_ABI SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
ScheduleHazardRecognizer * HazardRec
LLVM_ABI void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LLVM_ABI bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
LLVM_ABI std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned ReleaseAtCycle, unsigned AcquireAtCycle)
Compute the next cycle at which the given processor resource can be scheduled.
A ScheduleDAG for scheduling lists of MachineInstr.
bool ScheduleSingleMIRegions
True if regions with a single MI should be scheduled.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
const MachineLoopInfo * MLI
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
RegPressureTracker RPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
MachineFunction & MF
Machine function.
static const unsigned ScaleFactor
unsigned getMetric() const
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
SlotIndex - An opaque wrapper around machine indexes.
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
SlotIndex getPrevSlot() const
Returns the previous slot in the index list.
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
uint8_t getCopyCost() const
Return the cost of copying a value between two registers in this class.
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
bool initGCNSchedStage() override
bool initGCNRegion() override
void finalizeGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
VNInfo - Value Number Information.
SlotIndex def
The index of the defining instruction.
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
static LLVM_ABI bool allUsesAvailableAt(const MachineInstr *MI, SlotIndex UseIdx, const LiveIntervals &LIS, const MachineRegisterInfo &MRI, const TargetInstrInfo &TII)
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAddressableNumVGPRs(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo &STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getVGPRAllocGranule(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
LLVM_READONLY int32_t getMFMASrcCVDstAGPROp(uint32_t Opcode)
@ C
The default llvm calling convention, compatible with C.
This namespace contains all of the command line option processing machinery.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop, bool BiasPRegsExtra=false)
Minimize physical register live ranges.
FunctionAddr VTableAddr Value
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
@ UnclusteredHighRPReschedule
@ MemoryClauseInitialSchedule
@ ClusteredLowOccupancyReschedule
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI cl::opt< bool > VerifyScheduling
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
DWARFExpression::Operation Op
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF, GCNRegPressure::RegKind Kind, LiveIntervals &LIS, const MachineLoopInfo *MLI)
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Implement std::hash so that hash_code can be used in STL containers.
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
unsigned getArchVGPRNum() const
unsigned getAGPRNum() const
unsigned getSGPRNum() const
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void setBest(SchedCandidate &Best)
void reset(const CandPolicy &NewPolicy)
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SchedResourceDelta ResDelta
Status of an instruction's critical resource consumption.
unsigned DemandedResources
constexpr bool any() const
static constexpr LaneBitmask getNone()
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Execution frequency information required by scoring heuristics.
SmallVector< uint64_t > Regions
Per-region execution frequencies. 0 when unknown.
uint64_t MinFreq
Minimum and maximum observed frequencies.
FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG)
PressureChange CriticalMax
PressureChange CurrentMax
DependencyReuseInfo & reuse(RegisterIdx DepIdx)
RegisterIdx RegIdx
The corresponding register's index in the rematerializer.
A rematerializable register defined by a single machine instruction.
MachineInstr * DefMI
Single MI defining the rematerializable register.
SmallDenseMap< unsigned, RegionUsers, 2 > Uses
Uses of the register, mapped by region.
Register getDefReg() const
Returns the rematerializable register from its defining instruction.