38#define DEBUG_TYPE "machine-scheduler"
43 "amdgpu-disable-unclustered-high-rp-reschedule",
cl::Hidden,
44 cl::desc(
"Disable unclustered high register pressure "
45 "reduction scheduling stage."),
49 "amdgpu-disable-clustered-low-occupancy-reschedule",
cl::Hidden,
50 cl::desc(
"Disable clustered low occupancy "
51 "rescheduling for ILP scheduling stage."),
57 "Sets the bias which adds weight to occupancy vs latency. Set it to "
58 "100 to chase the occupancy only."),
63 cl::desc(
"Relax occupancy targets for kernels which are memory "
64 "bound (amdgpu-membound-threshold), or "
65 "Wave Limited (amdgpu-limit-wave-threshold)."),
70 cl::desc(
"Use the AMDGPU specific RPTrackers during scheduling"),
74 "amdgpu-scheduler-pending-queue-limit",
cl::Hidden,
76 "Max (Available+Pending) size to inspect pending queue (0 disables)"),
79#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
80#define DUMP_MAX_REG_PRESSURE
82 "amdgpu-print-max-reg-pressure-regusage-before-scheduler",
cl::Hidden,
83 cl::desc(
"Print a list of live registers along with their def/uses at the "
84 "point of maximum register pressure before scheduling."),
88 "amdgpu-print-max-reg-pressure-regusage-after-scheduler",
cl::Hidden,
89 cl::desc(
"Print a list of live registers along with their def/uses at the "
90 "point of maximum register pressure after scheduling."),
95 "amdgpu-disable-rewrite-mfma-form-sched-stage",
cl::Hidden,
113 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
115 Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
137 "VGPRCriticalLimit calculation method.\n");
141 unsigned Addressable =
144 VGPRBudget = std::max(VGPRBudget, Granule);
183 if (!
Op.isReg() ||
Op.isImplicit())
185 if (
Op.getReg().isPhysical() ||
186 (
Op.isDef() &&
Op.getSubReg() != AMDGPU::NoSubRegister))
194 std::vector<unsigned> &Pressure, std::vector<unsigned> &MaxPressure,
209 Pressure.resize(4, 0);
220 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = NewPressure.
getSGPRNum();
221 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] =
223 Pressure[AMDGPU::RegisterPressureSets::AGPR_32] = NewPressure.
getAGPRNum();
230 unsigned SGPRPressure,
231 unsigned VGPRPressure,
bool IsBottomUp) {
235 if (!
DAG->isTrackingPressure())
258 Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure;
259 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure;
261 for (
const auto &Diff :
DAG->getPressureDiff(SU)) {
267 (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc());
270#ifdef EXPENSIVE_CHECKS
271 std::vector<unsigned> CheckPressure, CheckMaxPressure;
274 if (
Pressure[AMDGPU::RegisterPressureSets::SReg_32] !=
275 CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] ||
276 Pressure[AMDGPU::RegisterPressureSets::VGPR_32] !=
277 CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) {
278 errs() <<
"Register Pressure is inaccurate when calculated through "
280 <<
"SGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::SReg_32]
282 << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] <<
"\n"
283 <<
"VGPR got " <<
Pressure[AMDGPU::RegisterPressureSets::VGPR_32]
285 << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] <<
"\n";
291 unsigned NewSGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
292 unsigned NewVGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
302 const unsigned MaxVGPRPressureInc = 16;
303 bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >=
VGPRExcessLimit;
304 bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >=
SGPRExcessLimit;
335 if (SGPRDelta >= 0 || VGPRDelta >= 0) {
337 if (SGPRDelta > VGPRDelta) {
351 bool HasBufferedModel =
370 dbgs() <<
"Prefer:\t\t";
371 DAG->dumpNode(*Preferred.
SU);
375 DAG->dumpNode(*Current.
SU);
378 dbgs() <<
"Reason:\t\t";
392 unsigned SGPRPressure = 0;
393 unsigned VGPRPressure = 0;
395 if (
DAG->isTrackingPressure()) {
397 SGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::SReg_32];
398 VGPRPressure =
Pressure[AMDGPU::RegisterPressureSets::VGPR_32];
403 SGPRPressure =
T->getPressure().getSGPRNum();
404 VGPRPressure =
T->getPressure().getArchVGPRNum();
409 for (
SUnit *SU : AQ) {
413 VGPRPressure, IsBottomUp);
433 for (
SUnit *SU : PQ) {
437 VGPRPressure, IsBottomUp);
457 bool &PickedPending) {
477 bool BotPending =
false;
497 "Last pick result should correspond to re-picking right now");
502 bool TopPending =
false;
522 "Last pick result should correspond to re-picking right now");
532 PickedPending = BotPending && TopPending;
535 if (BotPending || TopPending) {
542 Cand.setBest(TryCand);
547 IsTopNode = Cand.AtTop;
554 if (
DAG->top() ==
DAG->bottom()) {
556 Bot.Available.empty() &&
Bot.Pending.empty() &&
"ReadyQ garbage");
562 PickedPending =
false;
596 if (ReadyCycle > CurrentCycle)
668 if (
DAG->isTrackingPressure() &&
674 if (
DAG->isTrackingPressure() &&
679 bool SameBoundary = Zone !=
nullptr;
721 if (
DAG->isTrackingPressure() &&
731 bool SameBoundary = Zone !=
nullptr;
766 bool CandIsClusterSucc =
768 bool TryCandIsClusterSucc =
770 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
775 if (
DAG->isTrackingPressure() &&
781 if (
DAG->isTrackingPressure() &&
827 if (
DAG->isTrackingPressure()) {
843 bool CandIsClusterSucc =
845 bool TryCandIsClusterSucc =
847 if (
tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand,
856 bool SameBoundary = Zone !=
nullptr;
873 if (TryMayLoad || CandMayLoad) {
874 bool TryLongLatency =
876 bool CandLongLatency =
880 Zone->
isTop() ? CandLongLatency : TryLongLatency, TryCand,
898 if (
DAG->isTrackingPressure() &&
917 !
Rem.IsAcyclicLatencyLimited &&
tryLatency(TryCand, Cand, *Zone))
935 StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy),
936 RegionLiveOuts(this,
true) {
942 LLVM_DEBUG(
dbgs() <<
"Starting occupancy is " << StartingOccupancy <<
".\n");
944 MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);
945 if (MinOccupancy != StartingOccupancy)
946 LLVM_DEBUG(
dbgs() <<
"Allowing Occupancy drops to " << MinOccupancy
951std::unique_ptr<GCNSchedStage>
953 switch (SchedStageID) {
955 return std::make_unique<OccInitialScheduleStage>(SchedStageID, *
this);
957 return std::make_unique<RewriteMFMAFormStage>(SchedStageID, *
this);
959 return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *
this);
961 return std::make_unique<ClusteredLowOccStage>(SchedStageID, *
this);
963 return std::make_unique<PreRARematStage>(SchedStageID, *
this);
965 return std::make_unique<ILPInitialScheduleStage>(SchedStageID, *
this);
967 return std::make_unique<MemoryClauseInitialScheduleStage>(SchedStageID,
981GCNScheduleDAGMILive::getRealRegPressure(
unsigned RegionIdx)
const {
984 &LiveIns[RegionIdx]);
990 assert(RegionBegin != RegionEnd &&
"Region must not be empty");
994void GCNScheduleDAGMILive::computeBlockPressure(
unsigned RegionIdx,
1006 const MachineBasicBlock *OnlySucc =
nullptr;
1009 if (!Candidate->empty() && Candidate->pred_size() == 1) {
1010 SlotIndexes *Ind =
LIS->getSlotIndexes();
1012 OnlySucc = Candidate;
1017 size_t CurRegion = RegionIdx;
1018 for (
size_t E = Regions.size(); CurRegion !=
E; ++CurRegion)
1019 if (Regions[CurRegion].first->getParent() !=
MBB)
1024 auto LiveInIt = MBBLiveIns.find(
MBB);
1025 auto &Rgn = Regions[CurRegion];
1027 if (LiveInIt != MBBLiveIns.end()) {
1028 auto LiveIn = std::move(LiveInIt->second);
1030 MBBLiveIns.erase(LiveInIt);
1033 auto LRS = BBLiveInMap.lookup(NonDbgMI);
1034#ifdef EXPENSIVE_CHECKS
1043 if (Regions[CurRegion].first ==
I || NonDbgMI ==
I) {
1044 LiveIns[CurRegion] =
RPTracker.getLiveRegs();
1048 if (Regions[CurRegion].second ==
I) {
1049 Pressure[CurRegion] =
RPTracker.moveMaxPressure();
1050 if (CurRegion-- == RegionIdx)
1052 auto &Rgn = Regions[CurRegion];
1065 MBBLiveIns[OnlySucc] =
RPTracker.moveLiveRegs();
1070GCNScheduleDAGMILive::getRegionLiveInMap()
const {
1071 assert(!Regions.empty());
1072 std::vector<MachineInstr *> RegionFirstMIs;
1073 RegionFirstMIs.reserve(Regions.size());
1075 RegionFirstMIs.push_back(
1082GCNScheduleDAGMILive::getRegionLiveOutMap()
const {
1083 assert(!Regions.empty());
1084 std::vector<MachineInstr *> RegionLastMIs;
1085 RegionLastMIs.reserve(Regions.size());
1096 IdxToInstruction.clear();
1099 IsLiveOut ? DAG->getRegionLiveOutMap() : DAG->getRegionLiveInMap();
1100 for (
unsigned I = 0;
I < DAG->Regions.size();
I++) {
1101 auto &[RegionBegin, RegionEnd] = DAG->Regions[
I];
1103 if (RegionBegin == RegionEnd)
1107 IdxToInstruction[
I] = RegionKey;
1115 LiveIns.resize(Regions.size());
1116 Pressure.resize(Regions.size());
1117 RegionsWithHighRP.resize(Regions.size());
1118 RegionsWithExcessRP.resize(Regions.size());
1119 RegionsWithIGLPInstrs.resize(Regions.size());
1120 RegionsWithHighRP.reset();
1121 RegionsWithExcessRP.reset();
1122 RegionsWithIGLPInstrs.reset();
1127void GCNScheduleDAGMILive::runSchedStages() {
1128 LLVM_DEBUG(
dbgs() <<
"All regions recorded, starting actual scheduling.\n");
1130 if (!Regions.
empty()) {
1131 BBLiveInMap = getRegionLiveInMap();
1136#ifdef DUMP_MAX_REG_PRESSURE
1144 GCNSchedStrategy &S =
static_cast<GCNSchedStrategy &
>(*SchedImpl);
1147 if (!Stage->initGCNSchedStage())
1150 for (
auto Region : Regions) {
1154 if (!Stage->initGCNRegion()) {
1155 Stage->advanceRegion();
1164 &LiveIns[Stage->getRegionIdx()];
1166 reinterpret_cast<GCNRPTracker *
>(DownwardTracker)
1167 ->reset(
MRI, *RegionLiveIns);
1168 reinterpret_cast<GCNRPTracker *
>(UpwardTracker)
1169 ->reset(
MRI, RegionLiveOuts.getLiveRegsForRegionIdx(
1170 Stage->getRegionIdx()));
1174 Stage->finalizeGCNRegion();
1177 Stage->finalizeGCNSchedStage();
1180#ifdef DUMP_MAX_REG_PRESSURE
1193 OS <<
"Max Occupancy Initial Schedule";
1196 OS <<
"Instruction Rewriting Reschedule";
1199 OS <<
"Unclustered High Register Pressure Reschedule";
1202 OS <<
"Clustered Low Occupancy Reschedule";
1205 OS <<
"Pre-RA Rematerialize";
1208 OS <<
"Max ILP Initial Schedule";
1211 OS <<
"Max memory clause Initial Schedule";
1231void RewriteMFMAFormStage::findReachingDefs(
1245 SmallVector<MachineBasicBlock *, 8> Worklist;
1253 while (!Worklist.
empty()) {
1268 for (MachineBasicBlock *PredMBB : DefMBB->
predecessors()) {
1269 if (Visited.
insert(PredMBB).second)
1275void RewriteMFMAFormStage::findReachingUses(
1279 for (MachineOperand &UseMO :
1282 findReachingDefs(UseMO, LIS, ReachingDefIndexes);
1286 if (
any_of(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) {
1298 if (!
ST.hasGFX90AInsts() ||
MFI.getMinWavesPerEU() > 1)
1301 RegionsWithExcessArchVGPR.resize(
DAG.Regions.size());
1302 RegionsWithExcessArchVGPR.reset();
1306 RegionsWithExcessArchVGPR[
Region] =
true;
1309 if (RegionsWithExcessArchVGPR.none())
1312 TII =
ST.getInstrInfo();
1313 SRI =
ST.getRegisterInfo();
1315 std::vector<std::pair<MachineInstr *, unsigned>> RewriteCands;
1319 if (!initHeuristics(RewriteCands, CopyForUse, CopyForDef))
1322 int64_t
Cost = getRewriteCost(RewriteCands, CopyForUse, CopyForDef);
1329 return rewrite(RewriteCands);
1339 if (
DAG.RegionsWithHighRP.none() &&
DAG.RegionsWithExcessRP.none())
1346 InitialOccupancy =
DAG.MinOccupancy;
1349 TempTargetOccupancy =
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy
1350 ? InitialOccupancy + 1
1352 IsAnyRegionScheduled =
false;
1353 S.SGPRLimitBias =
S.HighRPSGPRBias;
1354 S.VGPRLimitBias =
S.HighRPVGPRBias;
1358 <<
"Retrying function scheduling without clustering. "
1359 "Aggressively try to reduce register pressure to achieve occupancy "
1360 << TempTargetOccupancy <<
".\n");
1375 if (
DAG.StartingOccupancy <=
DAG.MinOccupancy)
1379 dbgs() <<
"Retrying function scheduling with lowest recorded occupancy "
1380 <<
DAG.MinOccupancy <<
".\n");
1385#define REMAT_PREFIX "[PreRARemat] "
1386#define REMAT_DEBUG(X) LLVM_DEBUG(dbgs() << REMAT_PREFIX; X;)
1399 const unsigned NumRegions =
DAG.Regions.size();
1400 RegionBB.reserve(NumRegions);
1401 for (
unsigned I = 0;
I < NumRegions; ++
I) {
1404 MIRegion.insert({&*
MI,
I});
1408 if (!canIncreaseOccupancyOrReduceSpill())
1414 DAG.RegionLiveOuts.buildLiveRegMap();
1416 dbgs() <<
"Retrying function scheduling with new min. occupancy of "
1417 << AchievedOcc <<
" from rematerializing (original was "
1418 <<
DAG.MinOccupancy;
1420 dbgs() <<
", target was " << *TargetOcc;
1424 if (AchievedOcc >
DAG.MinOccupancy) {
1425 DAG.MinOccupancy = AchievedOcc;
1427 MFI.increaseOccupancy(
MF,
DAG.MinOccupancy);
1439 S.SGPRLimitBias =
S.VGPRLimitBias = 0;
1440 if (
DAG.MinOccupancy > InitialOccupancy) {
1441 assert(IsAnyRegionScheduled);
1443 <<
" stage successfully increased occupancy to "
1444 <<
DAG.MinOccupancy <<
'\n');
1445 }
else if (!IsAnyRegionScheduled) {
1446 assert(
DAG.MinOccupancy == InitialOccupancy);
1448 <<
": No regions scheduled, min occupancy stays at "
1449 <<
DAG.MinOccupancy <<
", MFI occupancy stays at "
1450 <<
MFI.getOccupancy() <<
".\n");
1461 unsigned NumRegionInstrs = std::distance(
DAG.begin(),
DAG.end());
1465 if (
DAG.begin() ==
DAG.end() ||
DAG.begin() == std::prev(
DAG.end()))
1471 <<
"\n From: " << *
DAG.begin() <<
" To: ";
1473 else dbgs() <<
"End";
1474 dbgs() <<
" RegionInstrs: " << NumRegionInstrs <<
'\n');
1482 for (
auto &
I :
DAG) {
1495 dbgs() <<
"Pressure before scheduling:\nRegion live-ins:"
1497 <<
"Region live-in pressure: "
1501 S.HasHighPressure =
false;
1523 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1526 unsigned CurrentTargetOccupancy =
1527 IsAnyRegionScheduled ?
DAG.MinOccupancy : TempTargetOccupancy;
1529 (CurrentTargetOccupancy <= InitialOccupancy ||
1530 DAG.Pressure[
RegionIdx].getOccupancy(
ST, DynamicVGPRBlockSize) !=
1537 if (!IsAnyRegionScheduled && IsSchedulingThisRegion) {
1538 IsAnyRegionScheduled =
true;
1539 if (
MFI.getMaxWavesPerEU() >
DAG.MinOccupancy) {
1540 DAG.MinOccupancy = TempTargetOccupancy;
1541 MFI.increaseOccupancy(
MF, TempTargetOccupancy);
1544 return IsSchedulingThisRegion;
1579 if (
S.HasHighPressure)
1601 unsigned DynamicVGPRBlockSize =
DAG.MFI.getDynamicVGPRBlockSize();
1612 unsigned TargetOccupancy = std::min(
1613 S.getTargetOccupancy(),
ST.getOccupancyWithWorkGroupSizes(
MF).second);
1614 unsigned WavesAfter = std::min(
1615 TargetOccupancy,
PressureAfter.getOccupancy(
ST, DynamicVGPRBlockSize));
1616 unsigned WavesBefore = std::min(
1618 LLVM_DEBUG(
dbgs() <<
"Occupancy before scheduling: " << WavesBefore
1619 <<
", after " << WavesAfter <<
".\n");
1625 unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
1629 if (WavesAfter < WavesBefore && WavesAfter <
DAG.MinOccupancy &&
1630 WavesAfter >=
MFI.getMinAllowedOccupancy()) {
1631 LLVM_DEBUG(
dbgs() <<
"Function is memory bound, allow occupancy drop up to "
1632 <<
MFI.getMinAllowedOccupancy() <<
" waves\n");
1633 NewOccupancy = WavesAfter;
1636 if (NewOccupancy <
DAG.MinOccupancy) {
1637 DAG.MinOccupancy = NewOccupancy;
1638 MFI.limitOccupancy(
DAG.MinOccupancy);
1640 <<
DAG.MinOccupancy <<
".\n");
1644 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
MF);
1647 unsigned MaxArchVGPRs = std::min(MaxVGPRs,
ST.getAddressableNumArchVGPRs());
1648 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
MF);
1670 unsigned ReadyCycle = CurrCycle;
1671 for (
auto &
D : SU.
Preds) {
1672 if (
D.isAssignedRegDep()) {
1675 unsigned DefReady = ReadyCycles[
DAG.getSUnit(
DefMI)->NodeNum];
1676 ReadyCycle = std::max(ReadyCycle, DefReady +
Latency);
1679 ReadyCycles[SU.
NodeNum] = ReadyCycle;
1686 std::pair<MachineInstr *, unsigned>
B)
const {
1687 return A.second <
B.second;
1693 if (ReadyCycles.empty())
1695 unsigned BBNum = ReadyCycles.begin()->first->getParent()->getNumber();
1696 dbgs() <<
"\n################## Schedule time ReadyCycles for MBB : " << BBNum
1697 <<
" ##################\n# Cycle #\t\t\tInstruction "
1701 for (
auto &
I : ReadyCycles) {
1702 if (
I.second > IPrev + 1)
1703 dbgs() <<
"****************************** BUBBLE OF " <<
I.second - IPrev
1704 <<
" CYCLES DETECTED ******************************\n\n";
1705 dbgs() <<
"[ " <<
I.second <<
" ] : " << *
I.first <<
"\n";
1718 unsigned SumBubbles = 0;
1720 unsigned CurrCycle = 0;
1721 for (
auto &SU : InputSchedule) {
1722 unsigned ReadyCycle =
1724 SumBubbles += ReadyCycle - CurrCycle;
1726 ReadyCyclesSorted.insert(std::make_pair(SU.getInstr(), ReadyCycle));
1728 CurrCycle = ++ReadyCycle;
1751 unsigned SumBubbles = 0;
1753 unsigned CurrCycle = 0;
1754 for (
auto &
MI :
DAG) {
1758 unsigned ReadyCycle =
1760 SumBubbles += ReadyCycle - CurrCycle;
1762 ReadyCyclesSorted.insert(std::make_pair(SU->
getInstr(), ReadyCycle));
1764 CurrCycle = ++ReadyCycle;
1781 if (WavesAfter <
DAG.MinOccupancy)
1785 if (
DAG.MFI.isDynamicVGPREnabled()) {
1787 &
ST,
DAG.MFI.getDynamicVGPRBlockSize(),
1790 &
ST,
DAG.MFI.getDynamicVGPRBlockSize(),
1792 if (BlocksAfter > BlocksBefore)
1829 <<
"\n\t *** In shouldRevertScheduling ***\n"
1830 <<
" *********** BEFORE UnclusteredHighRPStage ***********\n");
1834 <<
"\n *********** AFTER UnclusteredHighRPStage ***********\n");
1836 unsigned OldMetric = MBefore.
getMetric();
1837 unsigned NewMetric = MAfter.
getMetric();
1838 unsigned WavesBefore = std::min(
1839 S.getTargetOccupancy(),
1846 LLVM_DEBUG(
dbgs() <<
"\tMetric before " << MBefore <<
"\tMetric after "
1847 << MAfter <<
"Profit: " << Profit <<
"\n");
1877 unsigned WavesAfter) {
1884 LLVM_DEBUG(
dbgs() <<
"New pressure will result in more spilling.\n");
1893 DAG.RegionEnd =
DAG.RegionBegin;
1894 int SkippedDebugInstr = 0;
1896 if (
MI->isDebugInstr()) {
1897 ++SkippedDebugInstr;
1902 if (MII !=
DAG.RegionEnd) {
1904 bool NonDebugReordered =
1910 if (NonDebugReordered)
1911 DAG.LIS->handleMove(*
MI,
true);
1915 for (
auto &
Op :
MI->all_defs())
1916 Op.setIsUndef(
false);
1919 if (!
MI->isDebugInstr()) {
1920 if (
DAG.ShouldTrackLaneMasks) {
1922 SlotIndex SlotIdx =
DAG.LIS->getInstructionIndex(*MI).getRegSlot();
1929 DAG.RegionEnd =
MI->getIterator();
1937 while (SkippedDebugInstr-- > 0)
1943 DAG.RegionBegin =
Unsched.front()->getIterator();
1944 if (
DAG.RegionBegin->isDebugInstr()) {
1946 if (
MI->isDebugInstr())
1948 DAG.RegionBegin =
MI->getIterator();
1955 DAG.placeDebugValues();
1960bool RewriteMFMAFormStage::isRewriteCandidate(
MachineInstr *
MI)
const {
1967bool RewriteMFMAFormStage::initHeuristics(
1968 std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
1976 if (!isRewriteCandidate(&
MI))
1980 assert(ReplacementOp != -1);
1982 RewriteCands.push_back({&
MI,
MI.getOpcode()});
1983 MI.setDesc(TII->get(ReplacementOp));
1985 MachineOperand *Src2 = TII->getNamedOperand(
MI, AMDGPU::OpName::src2);
1986 if (Src2->
isReg()) {
1988 findReachingDefs(*Src2,
DAG.LIS, Src2ReachingDefs);
1992 for (SlotIndex RDIdx : Src2ReachingDefs) {
1993 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIdx);
1994 if (!TII->isMAI(*RD))
1999 MachineOperand &Dst =
MI.getOperand(0);
2002 findReachingUses(&
MI,
DAG.LIS, DstReachingUses);
2004 for (MachineOperand *RUOp : DstReachingUses) {
2005 if (TII->isMAI(*RUOp->getParent()))
2011 CopyForUse[RUOp->getParent()->getParent()].insert(RUOp->getReg());
2014 findReachingDefs(*RUOp,
DAG.LIS, DstUsesReachingDefs);
2016 for (SlotIndex RDIndex : DstUsesReachingDefs) {
2017 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2018 if (TII->isMAI(*RD))
2030 const TargetRegisterClass *VGPRRC =
DAG.MRI.getRegClass(Dst.getReg());
2031 const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(VGPRRC);
2032 DAG.MRI.setRegClass(Dst.getReg(), AGPRRC);
2034 DAG.MRI.setRegClass(Src2->
getReg(), AGPRRC);
2042int64_t RewriteMFMAFormStage::getRewriteCost(
2043 const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
2046 MachineBlockFrequencyInfo *MBFI =
DAG.MBFI;
2048 int64_t BestSpillCost = 0;
2052 std::pair<unsigned, unsigned> MaxVectorRegs =
2053 ST.getMaxNumVectorRegs(
MF.getFunction());
2054 unsigned ArchVGPRThreshold = MaxVectorRegs.first;
2055 unsigned AGPRThreshold = MaxVectorRegs.second;
2056 unsigned CombinedThreshold =
ST.getMaxNumVGPRs(
MF);
2059 if (!RegionsWithExcessArchVGPR[Region])
2064 MF, ArchVGPRThreshold, AGPRThreshold, CombinedThreshold);
2072 MF, ArchVGPRThreshold, AGPRThreshold, CombinedThreshold);
2074 uint64_t BlockFreq =
2078 bool RelativeFreqIsDenom = EntryFreq > BlockFreq;
2079 uint64_t RelativeFreq = EntryFreq && BlockFreq
2080 ? (RelativeFreqIsDenom ? EntryFreq / BlockFreq
2081 : BlockFreq / EntryFreq)
2086 int64_t SpillCost = ((int)SpillCostAfter - (int)SpillCostBefore) * 2;
2089 if (RelativeFreqIsDenom)
2090 SpillCost /= (int64_t)RelativeFreq;
2092 SpillCost *= (int64_t)RelativeFreq;
2098 if (SpillCost < BestSpillCost)
2099 BestSpillCost = SpillCost;
2104 Cost = BestSpillCost;
2107 unsigned CopyCost = 0;
2111 for (MachineInstr *
DefMI : CopyForDef) {
2118 const TargetRegisterClass *RC =
DAG.MRI.getRegClass(DefReg);
2123 for (
auto &[UseBlock, UseRegs] : CopyForUse) {
2128 const TargetRegisterClass *RC =
DAG.MRI.getRegClass(
UseReg);
2137 for (
auto &[
MI, OriginalOpcode] : RewriteCands) {
2139 const TargetRegisterClass *AGPRRC =
2140 DAG.MRI.getRegClass(
MI->getOperand(0).getReg());
2141 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(AGPRRC);
2143 MachineOperand *Src2 = TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
2147 DAG.MRI.setRegClass(Src2->
getReg(), VGPRRC);
2148 DAG.MRI.setRegClass(
MI->getOperand(0).getReg(), VGPRRC);
2149 MI->setDesc(TII->get(OriginalOpcode));
2152 return Cost + CopyCost;
2155bool RewriteMFMAFormStage::rewrite(
2156 const std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands) {
2157 DenseMap<MachineInstr *, unsigned> FirstMIToRegion;
2158 DenseMap<MachineInstr *, unsigned> LastMIToRegion;
2166 if (
Entry.second !=
Entry.first->getParent()->end())
2209 DenseSet<Register> RewriteRegs;
2212 DenseMap<Register, Register> RedefMap;
2214 DenseMap<Register, DenseSet<MachineOperand *>>
ReplaceMap;
2216 DenseMap<Register, SmallPtrSet<MachineInstr *, 8>> ReachingDefCopyMap;
2219 DenseMap<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>
2222 for (
auto &[
MI, OriginalOpcode] : RewriteCands) {
2224 if (ReplacementOp == -1)
2226 MI->setDesc(TII->get(ReplacementOp));
2229 MachineOperand *Src2 = TII->getNamedOperand(*
MI, AMDGPU::OpName::src2);
2230 if (Src2->
isReg()) {
2237 findReachingDefs(*Src2,
DAG.LIS, Src2ReachingDefs);
2238 SmallSetVector<MachineInstr *, 8> Src2DefsReplace;
2240 for (SlotIndex RDIndex : Src2ReachingDefs) {
2241 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2242 if (TII->isMAI(*RD))
2246 Src2DefsReplace.
insert(RD);
2249 if (!Src2DefsReplace.
empty()) {
2251 if (RI != RedefMap.
end()) {
2252 MappedReg = RI->second;
2255 const TargetRegisterClass *Src2RC =
DAG.MRI.getRegClass(Src2Reg);
2256 const TargetRegisterClass *VGPRRC =
2257 SRI->getEquivalentVGPRClass(Src2RC);
2260 MappedReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2261 RedefMap[Src2Reg] = MappedReg;
2266 for (MachineInstr *RD : Src2DefsReplace) {
2268 if (ReachingDefCopyMap[Src2Reg].insert(RD).second) {
2269 MachineInstrBuilder VGPRCopy =
2272 .
addDef(MappedReg, {}, 0)
2273 .addUse(Src2Reg, {}, 0);
2274 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2279 unsigned UpdateRegion = LastMIToRegion[RD];
2280 DAG.Regions[UpdateRegion].second = VGPRCopy;
2281 LastMIToRegion.
erase(RD);
2288 RewriteRegs.
insert(Src2Reg);
2298 MachineOperand *Dst = &
MI->getOperand(0);
2307 SmallVector<MachineInstr *, 8> DstUseDefsReplace;
2309 findReachingUses(
MI,
DAG.LIS, DstReachingUses);
2311 for (MachineOperand *RUOp : DstReachingUses) {
2312 if (TII->isMAI(*RUOp->getParent()))
2316 if (
find(DstReachingUseCopies, RUOp) == DstReachingUseCopies.
end())
2319 findReachingDefs(*RUOp,
DAG.LIS, DstUsesReachingDefs);
2321 for (SlotIndex RDIndex : DstUsesReachingDefs) {
2322 MachineInstr *RD =
DAG.LIS->getInstructionFromIndex(RDIndex);
2323 if (TII->isMAI(*RD))
2328 if (
find(DstUseDefsReplace, RD) == DstUseDefsReplace.
end())
2333 if (!DstUseDefsReplace.
empty()) {
2335 if (RI != RedefMap.
end()) {
2336 MappedReg = RI->second;
2339 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(DstReg);
2340 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2343 MappedReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2344 RedefMap[DstReg] = MappedReg;
2349 for (MachineInstr *RD : DstUseDefsReplace) {
2351 if (ReachingDefCopyMap[DstReg].insert(RD).second) {
2352 MachineInstrBuilder VGPRCopy =
2355 .
addDef(MappedReg, {}, 0)
2356 .addUse(DstReg, {}, 0);
2357 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2362 LastMIToRegion.
find(RD);
2363 if (LMI != LastMIToRegion.
end()) {
2364 unsigned UpdateRegion = LMI->second;
2365 DAG.Regions[UpdateRegion].second = VGPRCopy;
2366 LastMIToRegion.
erase(RD);
2372 DenseSet<MachineOperand *> &DstRegSet =
ReplaceMap[DstReg];
2373 for (MachineOperand *RU : DstReachingUseCopies) {
2374 MachineBasicBlock *RUBlock = RU->getParent()->getParent();
2377 if (RUBlock !=
MI->getParent()) {
2384 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(DstReg);
2385 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2386 Register NewUseReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2387 MachineInstr *UseInst = RU->getParent();
2388 MachineInstrBuilder VGPRCopy =
2391 .
addDef(NewUseReg, {}, 0)
2392 .addUse(DstReg, {}, 0);
2393 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2396 RU->setReg(NewUseReg);
2402 RewriteRegs.
insert(DstReg);
2412 std::pair<unsigned, DenseMap<Register, SmallPtrSet<MachineOperand *, 8>>>;
2413 for (RUBType RUBlockEntry : ReachingUseTracker) {
2414 using RUDType = std::pair<Register, SmallPtrSet<MachineOperand *, 8>>;
2415 for (RUDType RUDst : RUBlockEntry.second) {
2416 MachineOperand *OpBegin = *RUDst.second.begin();
2417 SlotIndex InstPt =
DAG.LIS->getInstructionIndex(*OpBegin->
getParent());
2420 for (MachineOperand *User : RUDst.second) {
2421 SlotIndex NewInstPt =
DAG.LIS->getInstructionIndex(*
User->getParent());
2426 const TargetRegisterClass *DstRC =
DAG.MRI.getRegClass(RUDst.first);
2427 const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC);
2428 Register NewUseReg =
DAG.MRI.createVirtualRegister(VGPRRC);
2429 MachineInstr *UseInst =
DAG.LIS->getInstructionFromIndex(InstPt);
2431 MachineInstrBuilder VGPRCopy =
2434 .
addDef(NewUseReg, {}, 0)
2435 .addUse(RUDst.first, {}, 0);
2436 DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy);
2441 FirstMIToRegion.
find(UseInst);
2442 if (FI != FirstMIToRegion.
end()) {
2443 unsigned UpdateRegion = FI->second;
2444 DAG.Regions[UpdateRegion].first = VGPRCopy;
2445 FirstMIToRegion.
erase(UseInst);
2449 for (MachineOperand *User : RUDst.second) {
2450 User->setReg(NewUseReg);
2461 for (std::pair<Register, Register> NewDef : RedefMap) {
2466 for (MachineOperand *ReplaceOp :
ReplaceMap[OldReg])
2467 ReplaceOp->setReg(NewReg);
2471 for (
Register RewriteReg : RewriteRegs) {
2472 Register RegToRewrite = RewriteReg;
2476 if (RI != RedefMap.end())
2477 RegToRewrite = RI->second;
2479 const TargetRegisterClass *CurrRC =
DAG.MRI.getRegClass(RegToRewrite);
2480 const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(CurrRC);
2482 DAG.MRI.setRegClass(RegToRewrite, AGPRRC);
2486 DAG.LIS->reanalyze(
DAG.MF);
2488 RegionPressureMap LiveInUpdater(&
DAG,
false);
2489 LiveInUpdater.buildLiveRegMap();
2492 DAG.LiveIns[Region] = LiveInUpdater.getLiveRegsForRegionIdx(Region);
2499bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() {
2505 DenseMap<unsigned, GCNRPTarget> OptRegions;
2506 unsigned MaxSGPRs =
ST.getMaxNumSGPRs(
F);
2507 unsigned MaxVGPRs =
ST.getMaxNumVGPRs(
F);
2508 bool HasVectorRegisterExcess;
2510 auto ResetTargetRegions = [&]() {
2512 HasVectorRegisterExcess =
false;
2513 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2514 const GCNRegPressure &
RP =
DAG.Pressure[
I];
2515 GCNRPTarget
Target(MaxSGPRs, MaxVGPRs,
MF, RP);
2525 HasVectorRegisterExcess |=
Target.hasVectorRegisterExcess();
2529 ResetTargetRegions();
2530 if (HasVectorRegisterExcess ||
DAG.MinOccupancy >=
MFI.getMaxWavesPerEU()) {
2533 TargetOcc = std::nullopt;
2537 TargetOcc =
DAG.MinOccupancy + 1;
2538 unsigned VGPRBlockSize =
2540 MaxSGPRs =
ST.getMaxNumSGPRs(*TargetOcc,
false);
2541 MaxVGPRs =
ST.getMaxNumVGPRs(*TargetOcc, VGPRBlockSize);
2542 ResetTargetRegions();
2545 dbgs() <<
"Analyzing ";
2546 MF.getFunction().printAsOperand(
dbgs(),
false);
2548 if (OptRegions.
empty()) {
2549 dbgs() <<
"no objective to achieve, occupancy is maximal at "
2550 <<
MFI.getMaxWavesPerEU();
2551 }
else if (!TargetOcc) {
2552 dbgs() <<
"reduce spilling (minimum target occupancy is "
2553 <<
MFI.getMinWavesPerEU() <<
')';
2555 dbgs() <<
"increase occupancy from " <<
DAG.MinOccupancy <<
" to "
2559 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2560 if (
auto OptIt = OptRegions.
find(
I); OptIt != OptRegions.
end()) {
2566 if (OptRegions.
empty())
2573 auto ReduceRPInRegion = [&](
auto OptIt,
Register Reg, LaneBitmask
Mask,
2574 bool &Progress) ->
bool {
2575 GCNRPTarget &
Target = OptIt->getSecond();
2581 OptRegions.
erase(OptIt->getFirst());
2582 return OptRegions.
empty();
2587 DAG.RegionLiveOuts.buildLiveRegMap();
2590 DenseSet<unsigned> RematRegs;
2593 for (
unsigned I = 0,
E =
DAG.Regions.size();
I !=
E; ++
I) {
2598 if (!isReMaterializable(
DefMI))
2609 MachineInstr *
UseMI =
DAG.MRI.getOneNonDBGUser(
Reg);
2612 auto UseRegion = MIRegion.find(
UseMI);
2613 if (UseRegion != MIRegion.end() && UseRegion->second ==
I)
2622 if (Rematerializations.contains(
UseMI) ||
2624 return MO.isReg() && RematRegs.contains(MO.getReg());
2631 SlotIndex UseIdx =
DAG.LIS->getInstructionIndex(*UseMI).getRegSlot(
true);
2637 RematInstruction &Remat =
2638 Rematerializations.try_emplace(&
DefMI,
UseMI).first->second;
2640 bool RematUseful =
false;
2641 if (
auto It = OptRegions.
find(
I); It != OptRegions.
end()) {
2647 LaneBitmask
Mask =
DAG.RegionLiveOuts.getLiveRegsForRegionIdx(
I)[
Reg];
2648 if (ReduceRPInRegion(It,
Reg, Mask, RematUseful))
2652 for (
unsigned LIRegion = 0; LIRegion !=
E; ++LIRegion) {
2655 auto It =
DAG.LiveIns[LIRegion].find(
Reg);
2656 if (It ==
DAG.LiveIns[LIRegion].end() || It->second.none())
2658 Remat.LiveInRegions.insert(LIRegion);
2666 if (
auto It = OptRegions.
find(LIRegion); It != OptRegions.
end()) {
2668 if (ReduceRPInRegion(It,
Reg,
DAG.LiveIns[LIRegion][
Reg],
2677 Rematerializations.pop_back();
2678 REMAT_DEBUG(
dbgs() <<
" No impact, not rematerializing instruction\n");
2688 Rematerializations.clear();
2692 return !Rematerializations.empty();
2695void PreRARematStage::rematerialize() {
2696 const SIInstrInfo *
TII =
MF.getSubtarget<GCNSubtarget>().getInstrInfo();
2700 DenseSet<unsigned> RecomputeRP;
2703 for (
auto &[
DefMI, Remat] : Rematerializations) {
2706 unsigned DefRegion = MIRegion.at(
DefMI);
2709 TII->reMaterialize(*InsertPos->getParent(), InsertPos,
Reg,
2710 AMDGPU::NoSubRegister, *
DefMI);
2711 Remat.RematMI = &*std::prev(InsertPos);
2712 DAG.LIS->InsertMachineInstrInMaps(*Remat.RematMI);
2717 DAG.updateRegionBoundaries(
DAG.Regions[DefRegion],
DefMI,
nullptr);
2718 auto UseRegion = MIRegion.find(Remat.UseMI);
2719 if (UseRegion != MIRegion.end()) {
2720 DAG.updateRegionBoundaries(
DAG.Regions[UseRegion->second], InsertPos,
2723 DAG.LIS->RemoveMachineInstrFromMaps(*
DefMI);
2728 for (
unsigned I : Remat.LiveInRegions) {
2729 ImpactedRegions.insert({
I,
DAG.Pressure[
I]});
2732#ifdef EXPENSIVE_CHECKS
2743 LiveInterval &LI =
DAG.LIS->getInterval(
UseReg);
2744 LaneBitmask LM =
DAG.MRI.getMaxLaneMaskForVReg(MO.
getReg());
2748 LaneBitmask LiveInMask = RegionLiveIns.
at(
UseReg);
2749 LaneBitmask UncoveredLanes = LM & ~(LiveInMask & LM);
2753 if (UncoveredLanes.
any()) {
2755 for (LiveInterval::SubRange &SR : LI.
subranges())
2756 assert((SR.LaneMask & UncoveredLanes).none());
2766 LaneBitmask PrevMask = RegionLiveIns[
Reg];
2768 RegMasks.insert({{
I, Remat.RematMI->getOperand(0).getReg()}, PrevMask});
2769 if (Remat.UseMI->getParent() !=
DAG.Regions[
I].first->getParent())
2776 ImpactedRegions.insert({DefRegion,
DAG.Pressure[DefRegion]});
2777 RecomputeRP.
insert(DefRegion);
2780 Register RematReg = Remat.RematMI->getOperand(0).getReg();
2781 DAG.LIS->removeInterval(RematReg);
2782 DAG.LIS->createAndComputeVirtRegInterval(RematReg);
2788 unsigned DynamicVGPRBlockSize =
2790 AchievedOcc =
MFI.getMaxWavesPerEU();
2791 for (
auto &[
I, OriginalRP] : ImpactedRegions) {
2792 bool IsEmptyRegion =
DAG.Regions[
I].first ==
DAG.Regions[
I].second;
2793 RescheduleRegions[
I] = !IsEmptyRegion;
2798 if (IsEmptyRegion) {
2801 GCNDownwardRPTracker RPT(*
DAG.LIS);
2803 DAG.Regions[
I].second);
2804 if (NonDbgMI ==
DAG.Regions[
I].second) {
2808 RPT.reset(*NonDbgMI, &
DAG.LiveIns[
I]);
2809 RPT.advance(
DAG.Regions[
I].second);
2810 RP = RPT.moveMaxPressure();
2815 std::min(AchievedOcc,
RP.getOccupancy(
ST, DynamicVGPRBlockSize));
2821bool PreRARematStage::isReMaterializable(
const MachineInstr &
MI) {
2822 if (!
DAG.TII->isReMaterializable(
MI))
2825 for (
const MachineOperand &MO :
MI.all_uses()) {
2829 if (
DAG.MRI.isConstantPhysReg(MO.
getReg()) ||
DAG.TII->isIgnorableUse(MO))
2844 unsigned MaxOcc = std::max(AchievedOcc,
DAG.MinOccupancy);
2845 if (!TargetOcc || MaxOcc >= *TargetOcc)
2849 const SIInstrInfo *
TII =
MF.getSubtarget<GCNSubtarget>().getInstrInfo();
2852 for (
const auto &[
DefMI, Remat] : Rematerializations) {
2853 MachineInstr &RematMI = *Remat.RematMI;
2854 unsigned DefRegion = MIRegion.at(
DefMI);
2856 MachineBasicBlock *
MBB = RegionBB[DefRegion];
2862 TII->reMaterialize(*
MBB, InsertPos,
Reg, AMDGPU::NoSubRegister, RematMI);
2863 MachineInstr *NewMI = &*std::prev(InsertPos);
2864 DAG.LIS->InsertMachineInstrInMaps(*NewMI);
2866 auto UseRegion = MIRegion.find(Remat.UseMI);
2867 if (UseRegion != MIRegion.end()) {
2868 DAG.updateRegionBoundaries(
DAG.Regions[UseRegion->second], RematMI,
2871 DAG.updateRegionBoundaries(
DAG.Regions[DefRegion], InsertPos, NewMI);
2874 DAG.LIS->RemoveMachineInstrFromMaps(RematMI);
2878 DAG.LIS->removeInterval(
Reg);
2879 DAG.LIS->createAndComputeVirtRegInterval(
Reg);
2882 for (
unsigned LIRegion : Remat.LiveInRegions)
2883 DAG.LiveIns[LIRegion].insert({
Reg, RegMasks.at({LIRegion,
Reg})});
2887 for (
auto &[
I, OriginalRP] : ImpactedRegions)
2888 DAG.Pressure[
I] = OriginalRP;
2893void GCNScheduleDAGMILive::updateRegionBoundaries(
2896 assert((!NewMI || NewMI != RegionBounds.second) &&
2897 "cannot remove at region end");
2899 if (RegionBounds.first == RegionBounds.second) {
2900 assert(NewMI &&
"cannot remove from an empty region");
2901 RegionBounds.first = NewMI;
2907 if (
MI != RegionBounds.first)
2910 RegionBounds.first = std::next(
MI);
2912 RegionBounds.first = NewMI;
2929 if (HasIGLPInstrs) {
2930 SavedMutations.clear();
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
static cl::opt< bool > GCNTrackers("amdgpu-use-amdgpu-trackers", cl::Hidden, cl::desc("Use the AMDGPU specific RPTrackers during scheduling"), cl::init(false))
static cl::opt< bool > DisableClusteredLowOccupancy("amdgpu-disable-clustered-low-occupancy-reschedule", cl::Hidden, cl::desc("Disable clustered low occupancy " "rescheduling for ILP scheduling stage."), cl::init(false))
#define REMAT_PREFIX
Allows to easily filter for this stage's debug output.
static MachineInstr * getLastMIForRegion(MachineBasicBlock::iterator RegionBegin, MachineBasicBlock::iterator RegionEnd)
static bool shouldCheckPending(SchedBoundary &Zone, const TargetSchedModel *SchedModel)
static cl::opt< bool > RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden, cl::desc("Relax occupancy targets for kernels which are memory " "bound (amdgpu-membound-threshold), or " "Wave Limited (amdgpu-limit-wave-threshold)."), cl::init(false))
static cl::opt< bool > DisableUnclusterHighRP("amdgpu-disable-unclustered-high-rp-reschedule", cl::Hidden, cl::desc("Disable unclustered high register pressure " "reduction scheduling stage."), cl::init(false))
static void printScheduleModel(std::set< std::pair< MachineInstr *, unsigned >, EarlierIssuingCycle > &ReadyCycles)
static cl::opt< bool > PrintMaxRPRegUsageAfterScheduler("amdgpu-print-max-reg-pressure-regusage-after-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure after scheduling."), cl::init(false))
static bool hasIGLPInstrs(ScheduleDAGInstrs *DAG)
static bool canUsePressureDiffs(const SUnit &SU)
Checks whether SU can use the cached DAG pressure diffs to compute the current register pressure.
static cl::opt< unsigned > PendingQueueLimit("amdgpu-scheduler-pending-queue-limit", cl::Hidden, cl::desc("Max (Available+Pending) size to inspect pending queue (0 disables)"), cl::init(256))
static void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
static cl::opt< bool > PrintMaxRPRegUsageBeforeScheduler("amdgpu-print-max-reg-pressure-regusage-before-scheduler", cl::Hidden, cl::desc("Print a list of live registers along with their def/uses at the " "point of maximum register pressure before scheduling."), cl::init(false))
static SUnit * pickOnlyChoice(SchedBoundary &Zone, const TargetSchedModel *SchedModel)
static cl::opt< unsigned > ScheduleMetricBias("amdgpu-schedule-metric-bias", cl::Hidden, cl::desc("Sets the bias which adds weight to occupancy vs latency. Set it to " "100 to chase the occupancy only."), cl::init(10))
static cl::opt< bool > DisableRewriteMFMAFormSchedStage("amdgpu-disable-rewrite-mfma-form-sched-stage", cl::Hidden, cl::desc("Disable rewrie mfma rewrite scheduling stage"), cl::init(true))
static Register UseReg(const MachineOperand &MO)
const HexagonInstrInfo * TII
static constexpr std::pair< StringLiteral, StringLiteral > ReplaceMap[]
A common definition of LaneBitmask for use in TableGen and CodeGen.
Promote Memory to Register
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
uint64_t getFrequency() const
Returns the frequency as a fixpoint number scaled by the entry frequency.
bool initGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
GCNRegPressure bumpDownwardPressure(const MachineInstr *MI, const SIRegisterInfo *TRI) const
Mostly copy/paste from CodeGen/RegisterPressure.cpp Calculate the impact MI will have on CurPressure ...
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
GCNRegPressure getPressure() const
DenseMap< unsigned, LaneBitmask > LiveRegSet
virtual bool initGCNRegion()
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineBasicBlock * CurrentMBB
This is a minimal scheduler strategy.
GCNDownwardRPTracker DownwardTracker
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
unsigned SGPRCriticalLimit
std::vector< unsigned > MaxPressure
bool hasNextStage() const
SUnit * pickNodeBidirectional(bool &IsTopNode, bool &PickedPending)
GCNSchedStageID getCurrentStage()
bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Evaluates instructions in the pending queue using a subset of scheduling heuristics.
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
unsigned VGPRCriticalLimit
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &IsPending, bool IsBottomUp)
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
void recede(const MachineInstr &MI)
Move to the state of RP just before the MI .
void traceCandidate(const SchedCandidate &Cand)
LLVM_ABI void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
MachineSchedPolicy RegionPolicy
const TargetSchedModel * SchedModel
const MachineSchedContext * Context
const TargetRegisterInfo * TRI
SchedCandidate BotCand
Candidate last picked from Bot boundary.
SchedCandidate TopCand
Candidate last picked from Top boundary.
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
GenericScheduler(const MachineSchedContext *C)
bool shouldRevertScheduling(unsigned WavesAfter) override
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
LLVM_ABI void dump() const
MachineBasicBlock * getMBBFromIndex(SlotIndex index) const
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
succ_iterator succ_begin()
unsigned succ_size() const
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const
getblockFreq - Return block frequency.
LLVM_ABI BlockFrequency getEntryFreq() const
Divide a block's BlockFrequency::getFrequency() value by this value to obtain the entry block - relat...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
const MachineBasicBlock * getParent() const
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool shouldRevertScheduling(unsigned WavesAfter) override
bool initGCNRegion() override
bool initGCNSchedStage() override
Capture a change in pressure for a single pressure set.
Helpers for implementing custom MachineSchedStrategy classes.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void advance()
Advance across the current instruction.
LLVM_ABI void getDownwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction top-down.
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
LLVM_ABI void getUpwardPressure(const MachineInstr *MI, std::vector< unsigned > &PressureResult, std::vector< unsigned > &MaxPressureResult)
Get the pressure of each PSet after traversing this instruction bottom-up.
RegionT * getParent() const
Get the parent of the Region.
List of registers defined and used by a machine instruction.
LLVM_ABI void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
LLVM_ABI void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the VReg...
LLVM_ABI void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
bool isIGLPMutationOnly(unsigned Opcode) const
static bool isMAI(const MCInstrDesc &Desc)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getOccupancy() const
unsigned getDynamicVGPRBlockSize() const
unsigned getMinAllowedOccupancy() const
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
unsigned NodeNum
Entry # of node in the node vector.
unsigned short Latency
Node latency.
bool isScheduled
True once scheduled.
unsigned ParentClusterIdx
The parent cluster id.
unsigned BotReadyCycle
Cycle relative to end when node is ready.
bool isBottomReady() const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Each Scheduling boundary is associated with ready queues.
LLVM_ABI void releasePending()
Release pending ready nodes in to the available queue.
LLVM_ABI unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
LLVM_ABI SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
LLVM_ABI void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LLVM_ABI bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
A ScheduleDAG for scheduling lists of MachineInstr.
bool ScheduleSingleMIRegions
True if regions with a single MI should be scheduled.
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
const MachineLoopInfo * MLI
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
RegPressureTracker RPTracker
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
MachineFunction & MF
Machine function.
static const unsigned ScaleFactor
unsigned getMetric() const
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
SlotIndex - An opaque wrapper around machine indexes.
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
static bool isEarlierInstr(SlotIndex A, SlotIndex B)
isEarlierInstr - Return true if A refers to an instruction earlier than B.
SlotIndex getPrevSlot() const
Returns the previous slot in the index list.
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
uint8_t getCopyCost() const
Return the cost of copying a value between two registers in this class.
Provide an instruction scheduling machine model to CodeGen passes.
LLVM_ABI bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned getMicroOpBufferSize() const
Number of micro-ops that may be buffered for OOO execution.
bool initGCNSchedStage() override
bool initGCNRegion() override
void finalizeGCNSchedStage() override
bool shouldRevertScheduling(unsigned WavesAfter) override
VNInfo - Value Number Information.
SlotIndex def
The index of the defining instruction.
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
static bool allUsesAvailableAt(const MachineInstr *MI, SlotIndex UseIdx, const LiveIntervals &LIS, const MachineRegisterInfo &MRI, const TargetInstrInfo &TII)
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
LLVM_READONLY int getMFMASrcCVDstAGPROp(uint16_t Opcode)
unsigned getDynamicVGPRBlockSize(const Function &F)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI unsigned getWeakLeft(const SUnit *SU, bool isTop)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
GCNRegPressure getRegPressure(const MachineRegisterInfo &MRI, Range &&LiveRegs)
std::unique_ptr< ScheduleDAGMutation > createIGroupLPDAGMutation(AMDGPU::SchedulingPhase Phase)
Phase specifes whether or not this is a reentry into the IGroupLPDAGMutation.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
@ UnclusteredHighRPReschedule
@ MemoryClauseInitialSchedule
@ ClusteredLowOccupancyReschedule
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
LLVM_ABI cl::opt< bool > VerifyScheduling
LLVM_ABI bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool isTheSameCluster(unsigned A, unsigned B)
Return whether the input cluster ID's are the same and valid.
DWARFExpression::Operation Op
LLVM_ABI bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
DenseMap< MachineInstr *, GCNRPTracker::LiveRegSet > getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS)
creates a map MachineInstr -> LiveRegSet R - range of iterators on instructions After - upon entry or...
GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS)
LLVM_ABI bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
LLVM_ABI void dumpMaxRegPressure(MachineFunction &MF, GCNRegPressure::RegKind Kind, LiveIntervals &LIS, const MachineLoopInfo *MLI)
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
LLVM_ABI int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Implement std::hash so that hash_code can be used in STL containers.
bool operator()(std::pair< MachineInstr *, unsigned > A, std::pair< MachineInstr *, unsigned > B) const
unsigned getArchVGPRNum() const
unsigned getAGPRNum() const
unsigned getSGPRNum() const
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
void setBest(SchedCandidate &Best)
void reset(const CandPolicy &NewPolicy)
LLVM_ABI void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
SchedResourceDelta ResDelta
Status of an instruction's critical resource consumption.
unsigned DemandedResources
constexpr bool any() const
static constexpr LaneBitmask getNone()
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
PressureChange CriticalMax
PressureChange CurrentMax