51#include "llvm/Config/llvm-config.h"
74#define DEBUG_TYPE "machine-scheduler"
76STATISTIC(NumClustered,
"Number of load/store pairs clustered");
81 cl::desc(
"Force top-down list scheduling"));
83 cl::desc(
"Force bottom-up list scheduling"));
86 cl::desc(
"Print critical path length to stdout"));
90 cl::desc(
"Verify machine instrs before and after machine scheduling"));
95 cl::desc(
"Pop up a window to show MISched dags after they are processed"));
100 cl::desc(
"Dump resource usage at schedule boundary."));
103 cl::desc(
"Show details of invoking getNextResoufceCycle."));
108#ifdef LLVM_ENABLE_DUMP
119 cl::desc(
"Hide nodes with more predecessor/successor than cutoff"));
125 cl::desc(
"Only schedule this function"));
127 cl::desc(
"Only schedule this MBB#"));
142 cl::desc(
"Enable memop clustering."),
146 cl::desc(
"Switch to fast cluster algorithm with the lost "
147 "of some fusion opportunities"),
151 cl::desc(
"The threshold for fast cluster"),
154#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
157 cl::desc(
"Dump resource usage at schedule boundary."));
160 cl::desc(
"Set width of the columns with "
161 "the resources and schedule units"),
165 cl::desc(
"Set width of the columns showing resource booking."),
169 cl::desc(
"Sort the resources printed in the dump trace"));
180void MachineSchedStrategy::anchor() {}
182void ScheduleDAGMutation::anchor() {}
211class MachineScheduler :
public MachineSchedulerBase {
226class PostMachineScheduler :
public MachineSchedulerBase {
228 PostMachineScheduler();
242char MachineScheduler::ID = 0;
247 "Machine Instruction Scheduler",
false,
false)
256MachineScheduler::MachineScheduler() : MachineSchedulerBase(
ID) {
260void MachineScheduler::getAnalysisUsage(
AnalysisUsage &AU)
const {
273char PostMachineScheduler::ID = 0;
278 "PostRA Machine Instruction Scheduler",
false,
false)
285PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(
ID) {
289void PostMachineScheduler::getAnalysisUsage(
AnalysisUsage &AU)
const {
312 cl::desc(
"Machine instruction scheduler to use"));
320 cl::desc(
"Enable the machine instruction scheduling pass."),
cl::init(
true),
324 "enable-post-misched",
325 cl::desc(
"Enable the post-ra machine instruction scheduling pass."),
332 assert(
I != Beg &&
"reached the top of the region, cannot decrement");
334 if (!
I->isDebugOrPseudoInstr())
353 for(;
I !=
End; ++
I) {
354 if (!
I->isDebugOrPseudoInstr())
427 MLI = &getAnalysis<MachineLoopInfo>();
428 MDT = &getAnalysis<MachineDominatorTree>();
429 PassConfig = &getAnalysis<TargetPassConfig>();
430 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
432 LIS = &getAnalysis<LiveIntervals>();
436 MF->verify(
this,
"Before machine scheduling.");
438 RegClassInfo->runOnMachineFunction(*MF);
442 std::unique_ptr<ScheduleDAGInstrs>
Scheduler(createMachineScheduler());
447 MF->verify(
this,
"After machine scheduling.");
466 MLI = &getAnalysis<MachineLoopInfo>();
467 PassConfig = &getAnalysis<TargetPassConfig>();
468 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
471 MF->verify(
this,
"Before post machine scheduling.");
475 std::unique_ptr<ScheduleDAGInstrs>
Scheduler(createPostMachineScheduler());
479 MF->verify(
this,
"After post machine scheduling.");
510 unsigned NumRegionInstrs;
514 RegionBegin(
B), RegionEnd(
E), NumRegionInstrs(
N) {}
523 bool RegionsTopDown) {
529 RegionEnd !=
MBB->
begin(); RegionEnd =
I) {
532 if (RegionEnd !=
MBB->
end() ||
539 unsigned NumRegionInstrs = 0;
545 if (!
MI.isDebugOrPseudoInstr()) {
554 if (NumRegionInstrs != 0)
555 Regions.
push_back(SchedRegion(
I, RegionEnd, NumRegionInstrs));
559 std::reverse(Regions.
begin(), Regions.
end());
598 for (
const SchedRegion &R : MBBRegions) {
601 unsigned NumRegionInstrs =
R.NumRegionInstrs;
608 if (
I == RegionEnd ||
I == std::prev(RegionEnd)) {
619 else dbgs() <<
"End\n";
620 dbgs() <<
" RegionInstrs: " << NumRegionInstrs <<
'\n');
622 errs() << MF->getName();
648#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
650 dbgs() <<
"Queue " << Name <<
": ";
651 for (
const SUnit *SU : Queue)
652 dbgs() << SU->NodeNum <<
" ";
681 dbgs() <<
"*** Scheduling failed! ***\n";
683 dbgs() <<
" has been released too many times!\n";
718 dbgs() <<
"*** Scheduling failed! ***\n";
720 dbgs() <<
" has been released too many times!\n";
757 unsigned regioninstrs)
785#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
790 ++NumInstrsScheduled;
822 bool IsTopNode =
false;
824 LLVM_DEBUG(
dbgs() <<
"** ScheduleDAGMI::schedule picking next node\n");
865 dbgs() <<
"*** Final schedule for "
882 assert(!SU.isBoundaryNode() &&
"Boundary node should not be in SUnits");
885 SU.biasCriticalPath();
888 if (!SU.NumPredsLeft)
891 if (!SU.NumSuccsLeft)
907 for (
SUnit *SU : TopRoots)
946 for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
948 std::pair<MachineInstr *, MachineInstr *>
P = *std::prev(DI);
959#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
971 dbgs() <<
" * Schedule table (TopDown):\n";
989 for (
unsigned C = FirstCycle;
C <= LastCycle; ++
C)
996 dbgs() <<
"Missing SUnit\n";
999 std::string NodeName(
"SU(");
1000 NodeName += std::to_string(SU->
NodeNum) +
")";
1002 unsigned C = FirstCycle;
1003 for (;
C <= LastCycle; ++
C) {
1020 return LHS.AcquireAtCycle <
RHS.AcquireAtCycle ||
1021 (
LHS.AcquireAtCycle ==
RHS.AcquireAtCycle &&
1022 LHS.ReleaseAtCycle <
RHS.ReleaseAtCycle);
1026 const std::string ResName =
1032 for (
unsigned I = 0,
E = PI.ReleaseAtCycle - PI.AcquireAtCycle;
I !=
E;
1035 while (
C++ <= LastCycle)
1052 dbgs() <<
" * Schedule table (BottomUp):\n";
1065 if ((
int)SU->
BotReadyCycle - PI->ReleaseAtCycle + 1 < LastCycle)
1066 LastCycle = (int)SU->
BotReadyCycle - PI->ReleaseAtCycle + 1;
1071 for (
int C = FirstCycle;
C >= LastCycle; --
C)
1078 dbgs() <<
"Missing SUnit\n";
1081 std::string NodeName(
"SU(");
1082 NodeName += std::to_string(SU->
NodeNum) +
")";
1085 for (;
C >= LastCycle; --
C) {
1101 return LHS.AcquireAtCycle <
RHS.AcquireAtCycle ||
1102 (
LHS.AcquireAtCycle ==
RHS.AcquireAtCycle &&
1103 LHS.ReleaseAtCycle <
RHS.ReleaseAtCycle);
1107 const std::string ResName =
1113 for (
unsigned I = 0,
E = PI.ReleaseAtCycle - PI.AcquireAtCycle;
I !=
E;
1116 while (
C-- >= LastCycle)
1125#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1133 dbgs() <<
"* Schedule table (Bidirectional): not implemented\n";
1141 dbgs() <<
"Missing SUnit\n";
1166 if (!Reg.isVirtual())
1171 bool FoundDef =
false;
1173 if (MO2.getReg() == Reg && !MO2.isDead()) {
1200 unsigned regioninstrs)
1214 "ShouldTrackLaneMasks requires ShouldTrackPressure");
1265 dbgs() <<
"Bottom Pressure:\n";
1271 "Can't find the region bottom");
1295 const std::vector<unsigned> &NewMaxPressure) {
1301 unsigned ID = PC.getPSet();
1306 && NewMaxPressure[
ID] <= (
unsigned)std::numeric_limits<int16_t>::max())
1310 if (NewMaxPressure[
ID] >= Limit - 2) {
1312 << NewMaxPressure[
ID]
1313 << ((NewMaxPressure[
ID] > Limit) ?
" > " :
" <= ")
1327 if (!Reg.isVirtual())
1335 bool Decrement =
P.LaneMask.any();
1339 SUnit &SU = *V2SU.SU;
1368 assert(VNI &&
"No live value at use.");
1371 SUnit *SU = V2SU.SU;
1391#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1397 dbgs() <<
" Pressure Diff : ";
1400 dbgs() <<
" Single Issue : ";
1444 bool IsTopNode =
false;
1446 LLVM_DEBUG(
dbgs() <<
"** ScheduleDAGMILive::schedule picking next node\n");
1475 dbgs() <<
"*** Final schedule for "
1547 unsigned MaxCyclicLatency = 0;
1551 if (!Reg.isVirtual())
1563 unsigned LiveOutHeight = DefSU->
getHeight();
1568 SUnit *SU = V2SU.SU;
1580 unsigned CyclicLatency = 0;
1582 CyclicLatency = LiveOutDepth - SU->
getDepth();
1585 if (LiveInHeight > LiveOutHeight) {
1586 if (LiveInHeight - LiveOutHeight < CyclicLatency)
1587 CyclicLatency = LiveInHeight - LiveOutHeight;
1592 << SU->
NodeNum <<
") = " << CyclicLatency <<
"c\n");
1593 if (CyclicLatency > MaxCyclicLatency)
1594 MaxCyclicLatency = CyclicLatency;
1597 LLVM_DEBUG(
dbgs() <<
"Cyclic Critical Path: " << MaxCyclicLatency <<
"c\n");
1598 return MaxCyclicLatency;
1650 if (&*priorII ==
MI)
1703 int64_t
Offset,
unsigned Width)
1704 : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()),
Offset(
Offset),
1709 if (
A->getType() !=
B->getType())
1710 return A->getType() <
B->getType();
1712 return A->getReg() <
B->getReg();
1718 return StackGrowsDown ?
A->getIndex() >
B->getIndex()
1719 :
A->getIndex() <
B->getIndex();
1729 if (std::lexicographical_compare(BaseOps.
begin(), BaseOps.
end(),
1730 RHS.BaseOps.begin(),
RHS.BaseOps.end(),
1733 if (std::lexicographical_compare(
RHS.BaseOps.begin(),
RHS.BaseOps.end(),
1734 BaseOps.
begin(), BaseOps.
end(), Compare))
1749 :
TII(tii),
TRI(tri), IsLoad(IsLoad) {}
1756 void collectMemOpRecords(std::vector<SUnit> &SUnits,
1762class StoreClusterMutation :
public BaseMemOpClusterMutation {
1766 : BaseMemOpClusterMutation(tii, tri,
false) {}
1769class LoadClusterMutation :
public BaseMemOpClusterMutation {
1772 : BaseMemOpClusterMutation(tii, tri,
true) {}
1779std::unique_ptr<ScheduleDAGMutation>
1786std::unique_ptr<ScheduleDAGMutation>
1800void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1810 auto MemOpa = MemOpRecords[
Idx];
1813 unsigned NextIdx =
Idx + 1;
1814 for (; NextIdx <
End; ++NextIdx)
1817 if (!SUnit2ClusterInfo.
count(MemOpRecords[NextIdx].SU->NodeNum) &&
1819 (!DAG->
IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
1820 !DAG->
IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
1825 auto MemOpb = MemOpRecords[NextIdx];
1826 unsigned ClusterLength = 2;
1827 unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width;
1828 if (SUnit2ClusterInfo.
count(MemOpa.SU->NodeNum)) {
1829 ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
1830 CurrentClusterBytes =
1831 SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
1834 if (!
TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
1835 CurrentClusterBytes))
1838 SUnit *SUa = MemOpa.SU;
1839 SUnit *SUb = MemOpb.SU;
1879 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
1880 CurrentClusterBytes};
1883 <<
", Curr cluster bytes: " << CurrentClusterBytes
1888void BaseMemOpClusterMutation::collectMemOpRecords(
1890 for (
auto &SU : SUnits) {
1898 bool OffsetIsScalable;
1901 OffsetIsScalable, Width,
TRI)) {
1905 <<
Offset <<
", OffsetIsScalable: " << OffsetIsScalable
1906 <<
", Width: " << Width <<
"\n");
1909 for (
const auto *
Op : BaseOps)
1915bool BaseMemOpClusterMutation::groupMemOps(
1922 for (
const auto &
MemOp : MemOps) {
1923 unsigned ChainPredID = DAG->
SUnits.size();
1925 for (
const SDep &Pred :
MemOp.SU->Preds) {
1949 collectMemOpRecords(DAG->
SUnits, MemOpRecords);
1951 if (MemOpRecords.
size() < 2)
1958 bool FastCluster = groupMemOps(MemOpRecords, DAG,
Groups);
1960 for (
auto &Group :
Groups) {
1966 clusterNeighboringMemOps(Group.second, FastCluster, DAG);
2000std::unique_ptr<ScheduleDAGMutation>
2003 return std::make_unique<CopyConstrain>(
TII,
TRI);
2048 unsigned LocalReg = SrcReg;
2049 unsigned GlobalReg = DstReg;
2051 if (!LocalLI->
isLocal(RegionBeginIdx, RegionEndIdx)) {
2055 if (!LocalLI->
isLocal(RegionBeginIdx, RegionEndIdx))
2066 if (GlobalSegment == GlobalLI->
end())
2073 if (GlobalSegment->contains(LocalLI->
beginIndex()))
2076 if (GlobalSegment == GlobalLI->
end())
2080 if (GlobalSegment != GlobalLI->
begin()) {
2083 GlobalSegment->start)) {
2094 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2095 "Disconnected LRG within the scheduling region.");
2111 for (
const SDep &Succ : LastLocalSU->
Succs) {
2126 for (
const SDep &Pred : GlobalSU->
Preds) {
2129 if (Pred.
getSUnit() == FirstLocalSU)
2137 for (
SUnit *LU : LocalUses) {
2138 LLVM_DEBUG(
dbgs() <<
" Local use SU(" << LU->NodeNum <<
") -> SU("
2139 << GlobalSU->
NodeNum <<
")\n");
2142 for (
SUnit *GU : GlobalUses) {
2143 LLVM_DEBUG(
dbgs() <<
" Global use SU(" << GU->NodeNum <<
") -> SU("
2144 << FirstLocalSU->
NodeNum <<
")\n");
2156 if (FirstPos == DAG->
end())
2184 unsigned Latency,
bool AfterSchedNode) {
2185 int ResCntFactor = (int)(Count - (
Latency * LFactor));
2187 return ResCntFactor >= (int)LFactor;
2189 return ResCntFactor > (int)LFactor;
2202 CheckPending =
false;
2205 MinReadyCycle = std::numeric_limits<unsigned>::max();
2206 ExpectedLatency = 0;
2207 DependentLatency = 0;
2209 MaxExecutedResCount = 0;
2211 IsResourceLimited =
false;
2212 ReservedCycles.clear();
2213 ReservedResourceSegments.clear();
2214 ReservedCyclesIndex.
clear();
2215 ResourceGroupSubUnitMasks.clear();
2216#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2220 MaxObservedStall = 0;
2223 ExecutedResCounts.
resize(1);
2224 assert(!ExecutedResCounts[0] &&
"nonzero count for bad resource");
2240 unsigned PIdx = PI->ProcResourceIdx;
2242 assert(PI->ReleaseAtCycle >= PI->AcquireAtCycle);
2244 (Factor * (PI->ReleaseAtCycle - PI->AcquireAtCycle));
2257 ReservedCyclesIndex.
resize(ResourceCount);
2258 ExecutedResCounts.
resize(ResourceCount);
2259 ResourceGroupSubUnitMasks.resize(ResourceCount,
APInt(ResourceCount, 0));
2260 unsigned NumUnits = 0;
2262 for (
unsigned i = 0; i < ResourceCount; ++i) {
2263 ReservedCyclesIndex[i] = NumUnits;
2269 ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2289 if (ReadyCycle > CurrCycle)
2290 return ReadyCycle - CurrCycle;
2297 unsigned ReleaseAtCycle,
2298 unsigned AcquireAtCycle) {
2301 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
2302 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2304 return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
2305 CurrCycle, AcquireAtCycle, ReleaseAtCycle);
2308 unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2314 NextUnreserved = std::max(CurrCycle, NextUnreserved + ReleaseAtCycle);
2315 return NextUnreserved;
2321std::pair<unsigned, unsigned>
2323 unsigned ReleaseAtCycle,
2324 unsigned AcquireAtCycle) {
2326 LLVM_DEBUG(
dbgs() <<
" Resource booking (@" << CurrCycle <<
"c): \n");
2328 LLVM_DEBUG(
dbgs() <<
" getNextResourceCycle (@" << CurrCycle <<
"c): \n");
2331 unsigned InstanceIdx = 0;
2332 unsigned StartIndex = ReservedCyclesIndex[PIdx];
2334 assert(NumberOfInstances > 0 &&
2335 "Cannot have zero instances of a ProcResource");
2352 if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2354 StartIndex, ReleaseAtCycle, AcquireAtCycle),
2358 for (
unsigned I = 0,
End = NumberOfInstances;
I <
End; ++
I) {
2359 unsigned NextUnreserved, NextInstanceIdx;
2360 std::tie(NextUnreserved, NextInstanceIdx) =
2362 if (MinNextUnreserved > NextUnreserved) {
2363 InstanceIdx = NextInstanceIdx;
2364 MinNextUnreserved = NextUnreserved;
2367 return std::make_pair(MinNextUnreserved, InstanceIdx);
2370 for (
unsigned I = StartIndex,
End = StartIndex + NumberOfInstances;
I <
End;
2372 unsigned NextUnreserved =
2376 << NextUnreserved <<
"c\n");
2377 if (MinNextUnreserved > NextUnreserved) {
2379 MinNextUnreserved = NextUnreserved;
2384 <<
"[" << InstanceIdx - StartIndex <<
"]"
2385 <<
" available @" << MinNextUnreserved <<
"c"
2387 return std::make_pair(MinNextUnreserved, InstanceIdx);
2420 << (
isTop() ?
"begin" :
"end") <<
" group\n");
2429 unsigned ResIdx = PE.ProcResourceIdx;
2430 unsigned ReleaseAtCycle = PE.ReleaseAtCycle;
2431 unsigned AcquireAtCycle = PE.AcquireAtCycle;
2432 unsigned NRCycle, InstanceIdx;
2433 std::tie(NRCycle, InstanceIdx) =
2435 if (NRCycle > CurrCycle) {
2436#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2437 MaxObservedStall = std::max(ReleaseAtCycle, MaxObservedStall);
2441 <<
'[' << InstanceIdx - ReservedCyclesIndex[ResIdx] <<
']'
2442 <<
"=" << NRCycle <<
"c\n");
2453 SUnit *LateSU =
nullptr;
2454 unsigned RemLatency = 0;
2455 for (
SUnit *SU : ReadySUs) {
2457 if (L > RemLatency) {
2464 << LateSU->
NodeNum <<
") " << RemLatency <<
"c\n");
2483 PIdx != PEnd; ++PIdx) {
2485 if (OtherCount > OtherCritCount) {
2486 OtherCritCount = OtherCount;
2487 OtherCritIdx = PIdx;
2496 return OtherCritCount;
2503#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2507 if (ReadyCycle > CurrCycle)
2508 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2511 if (ReadyCycle < MinReadyCycle)
2512 MinReadyCycle = ReadyCycle;
2517 bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2520 if (!HazardDetected) {
2535 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2536 "MinReadyCycle uninitialized");
2537 if (MinReadyCycle > NextCycle)
2538 NextCycle = MinReadyCycle;
2542 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2545 if ((NextCycle - CurrCycle) > DependentLatency)
2546 DependentLatency = 0;
2548 DependentLatency -= (NextCycle - CurrCycle);
2552 CurrCycle = NextCycle;
2555 for (; CurrCycle != NextCycle; ++CurrCycle) {
2562 CheckPending =
true;
2572 ExecutedResCounts[PIdx] += Count;
2573 if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2574 MaxExecutedResCount = ExecutedResCounts[PIdx];
2588 unsigned ReleaseAtCycle,
2590 unsigned AcquireAtCycle) {
2592 unsigned Count = Factor * (ReleaseAtCycle- AcquireAtCycle);
2594 << ReleaseAtCycle <<
"x" << Factor <<
"u\n");
2604 ZoneCritResIdx = PIdx;
2611 unsigned NextAvailable, InstanceIdx;
2612 std::tie(NextAvailable, InstanceIdx) =
2614 if (NextAvailable > CurrCycle) {
2617 <<
'[' << InstanceIdx - ReservedCyclesIndex[PIdx] <<
']'
2618 <<
" reserved until @" << NextAvailable <<
"\n");
2620 return NextAvailable;
2634 CheckPending =
true;
2642 "Cannot schedule this instruction's MicroOps in the current cycle.");
2647 unsigned NextCycle = CurrCycle;
2650 assert(ReadyCycle <= CurrCycle &&
"Broken PendingQueue");
2653 if (ReadyCycle > NextCycle) {
2654 NextCycle = ReadyCycle;
2655 LLVM_DEBUG(
dbgs() <<
" *** Stall until: " << ReadyCycle <<
"\n");
2664 NextCycle = ReadyCycle;
2667 RetiredMOps += IncMOps;
2674 if (ZoneCritResIdx) {
2676 unsigned ScaledMOps =
2693 countResource(SC, PI->ProcResourceIdx, PI->ReleaseAtCycle, NextCycle,
2694 PI->AcquireAtCycle);
2695 if (RCycle > NextCycle)
2706 unsigned PIdx = PI->ProcResourceIdx;
2710 unsigned ReservedUntil, InstanceIdx;
2712 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2714 ReservedResourceSegments[InstanceIdx].add(
2716 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2719 ReservedResourceSegments[InstanceIdx].add(
2721 NextCycle, PI->AcquireAtCycle, PI->ReleaseAtCycle),
2726 unsigned ReservedUntil, InstanceIdx;
2728 SC, PIdx, PI->ReleaseAtCycle, PI->AcquireAtCycle);
2730 ReservedCycles[InstanceIdx] =
2731 std::max(ReservedUntil, NextCycle + PI->ReleaseAtCycle);
2733 ReservedCycles[InstanceIdx] = NextCycle;
2740 unsigned &TopLatency =
isTop() ? ExpectedLatency : DependentLatency;
2741 unsigned &BotLatency =
isTop() ? DependentLatency : ExpectedLatency;
2745 << SU->
NodeNum <<
") " << TopLatency <<
"c\n");
2750 << SU->
NodeNum <<
") " << BotLatency <<
"c\n");
2753 if (NextCycle > CurrCycle)
2766 CurrMOps += IncMOps;
2780 LLVM_DEBUG(
dbgs() <<
" *** Max MOps " << CurrMOps <<
" at cycle "
2781 << CurrCycle <<
'\n');
2792 MinReadyCycle = std::numeric_limits<unsigned>::max();
2800 if (ReadyCycle < MinReadyCycle)
2801 MinReadyCycle = ReadyCycle;
2812 CheckPending =
false;
2858#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2868 unsigned StartIdx = 0;
2870 for (
unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
2873 for (
unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
2874 dbgs() << ResName <<
"(" << UnitIdx <<
") = ";
2876 if (ReservedResourceSegments.count(StartIdx + UnitIdx))
2877 dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
2881 dbgs() << ReservedCycles[StartIdx + UnitIdx] <<
"\n";
2883 StartIdx += NumUnits;
2892 if (ZoneCritResIdx) {
2897 ResCount = RetiredMOps * ResFactor;
2901 <<
" Retired: " << RetiredMOps;
2903 dbgs() <<
"\n Critical: " << ResCount / LFactor <<
"c, "
2904 << ResCount / ResFactor <<
" "
2906 <<
"\n ExpectedLatency: " << ExpectedLatency <<
"c\n"
2907 << (IsResourceLimited ?
" - Resource" :
" - Latency")
2953 RemLatency = std::max(RemLatency,
2955 RemLatency = std::max(RemLatency,
2962bool GenericSchedulerBase::shouldReduceLatency(
const CandPolicy &Policy,
2964 bool ComputeRemLatency,
2965 unsigned &RemLatency)
const {
2975 if (ComputeRemLatency)
2991 unsigned OtherCritIdx = 0;
2992 unsigned OtherCount =
2995 bool OtherResLimited =
false;
2996 unsigned RemLatency = 0;
2997 bool RemLatencyComputed =
false;
3000 RemLatencyComputed =
true;
3002 OtherCount, RemLatency,
false);
3008 if (!OtherResLimited &&
3009 (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
3013 <<
" RemainingLatency " << RemLatency <<
" + "
3022 dbgs() <<
" " << CurrZone.Available.getName() <<
" ResourceLimited: "
3023 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) <<
"\n";
3024 }
if (OtherResLimited)
dbgs()
3025 <<
" RemainingLimit: "
3028 <<
" Latency limited both directions.\n");
3033 if (OtherResLimited)
3041 case NoCand:
return "NOCAND ";
3042 case Only1:
return "ONLY1 ";
3043 case PhysReg:
return "PHYS-REG ";
3046 case Stall:
return "STALL ";
3047 case Cluster:
return "CLUSTER ";
3048 case Weak:
return "WEAK ";
3049 case RegMax:
return "REG-MAX ";
3064 unsigned ResIdx = 0;
3100 <<
":" <<
P.getUnitInc() <<
" ";
3123 if (TryVal < CandVal) {
3127 if (TryVal > CandVal) {
3128 if (Cand.
Reason > Reason)
3139 if (TryVal > CandVal) {
3143 if (TryVal < CandVal) {
3144 if (Cand.
Reason > Reason)
3196 "(PreRA)GenericScheduler needs vreg liveness");
3201 if (RegionPolicy.ComputeDFSResult)
3202 DAG->computeDFSResult();
3213 if (!Top.HazardRec) {
3218 if (!Bot.HazardRec) {
3223 TopCand.SU =
nullptr;
3224 BotCand.SU =
nullptr;
3230 unsigned NumRegionInstrs) {
3237 RegionPolicy.ShouldTrackPressure =
true;
3238 for (
unsigned VT = MVT::i32; VT > (
unsigned)MVT::i1; --VT) {
3243 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
3249 RegionPolicy.OnlyBottomUp =
true;
3256 RegionPolicy.ShouldTrackPressure =
false;
3257 RegionPolicy.ShouldTrackLaneMasks =
false;
3263 "-misched-topdown incompatible with -misched-bottomup");
3266 if (RegionPolicy.OnlyBottomUp)
3267 RegionPolicy.OnlyTopDown =
false;
3271 if (RegionPolicy.OnlyTopDown)
3272 RegionPolicy.OnlyBottomUp =
false;
3278#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3279 dbgs() <<
"GenericScheduler RegionPolicy: "
3280 <<
" ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
3281 <<
" OnlyTopDown=" << RegionPolicy.OnlyTopDown
3282 <<
" OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
3301 unsigned IterCount =
3307 unsigned InFlightCount =
3309 unsigned BufferLimit =
3315 dbgs() <<
"IssueCycles="
3318 <<
"c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3328 for (
const SUnit *SU : Bot.Available) {
3339 checkAcyclicLatency();
3366 if (TryPSet == CandPSet) {
3371 int TryRank = TryP.
isValid() ?
TRI->getRegPressureSetScore(MF, TryPSet) :
3372 std::numeric_limits<int>::max();
3374 int CandRank = CandP.
isValid() ?
TRI->getRegPressureSetScore(MF, CandPSet) :
3375 std::numeric_limits<int>::max();
3380 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3398 unsigned ScheduledOper = isTop ? 1 : 0;
3399 unsigned UnscheduledOper = isTop ? 0 : 1;
3402 if (
MI->getOperand(ScheduledOper).getReg().isPhysical())
3407 if (
MI->getOperand(UnscheduledOper).getReg().isPhysical())
3408 return AtBoundary ? -1 : 1;
3411 if (
MI->isMoveImmediate()) {
3417 if (
Op.isReg() && !
Op.getReg().isPhysical()) {
3424 return isTop ? -1 : 1;
3437 if (DAG->isTrackingPressure()) {
3442 DAG->getRegionCriticalPSets(),
3443 DAG->getRegPressure().MaxSetPressure);
3448 &DAG->getPressureDiff(Cand.
SU),
3450 DAG->getRegionCriticalPSets(),
3451 DAG->getRegPressure().MaxSetPressure);
3455 DAG->getPressureDiff(Cand.
SU),
3457 DAG->getRegionCriticalPSets(),
3458 DAG->getRegPressure().MaxSetPressure);
3463 <<
" Try SU(" << Cand.
SU->
NodeNum <<
") "
3512 bool SameBoundary = Zone !=
nullptr;
3533 const SUnit *CandNextClusterSU =
3535 const SUnit *TryCandNextClusterSU =
3538 Cand.
SU == CandNextClusterSU,
3546 TryCand, Cand,
Weak))
3598 for (
SUnit *SU : Q) {
3601 initCandidate(TryCand, SU, Zone.
isTop(), RPTracker, TempTracker);
3604 if (tryCandidate(Cand, TryCand, ZoneArg)) {
3618 if (
SUnit *SU = Bot.pickOnlyChoice()) {
3623 if (
SUnit *SU = Top.pickOnlyChoice()) {
3639 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3640 BotCand.Policy != BotPolicy) {
3642 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3643 assert(BotCand.Reason !=
NoCand &&
"failed to find the first candidate");
3650 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3652 "Last pick result should correspond to re-picking right now");
3659 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3660 TopCand.Policy != TopPolicy) {
3662 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3663 assert(TopCand.Reason !=
NoCand &&
"failed to find the first candidate");
3670 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3672 "Last pick result should correspond to re-picking right now");
3678 assert(BotCand.isValid());
3679 assert(TopCand.isValid());
3682 if (tryCandidate(Cand, TopCand,
nullptr)) {
3687 IsTopNode = Cand.
AtTop;
3695 assert(Top.Available.empty() && Top.Pending.empty() &&
3696 Bot.Available.empty() && Bot.Pending.empty() &&
"ReadyQ garbage");
3701 if (RegionPolicy.OnlyTopDown) {
3702 SU = Top.pickOnlyChoice();
3705 TopCand.reset(NoPolicy);
3706 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3707 assert(TopCand.Reason !=
NoCand &&
"failed to find a candidate");
3712 }
else if (RegionPolicy.OnlyBottomUp) {
3713 SU = Bot.pickOnlyChoice();
3716 BotCand.reset(NoPolicy);
3717 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3718 assert(BotCand.Reason !=
NoCand &&
"failed to find a candidate");
3724 SU = pickNodeBidirectional(IsTopNode);
3729 Top.removeReady(SU);
3731 Bot.removeReady(SU);
3746 for (
SDep &Dep : Deps) {
3750 SUnit *DepSU = Dep.getSUnit();
3751 if (isTop ? DepSU->
Succs.size() > 1 : DepSU->
Preds.size() > 1)
3754 if (!Copy->isCopy() && !Copy->isMoveImmediate())
3774 reschedulePhysReg(SU,
true);
3779 reschedulePhysReg(SU,
false);
3821 if (!Top.HazardRec) {
3832 for (
const SUnit *SU : BotRoots) {
3856 if (
tryLess(Top.getLatencyStallCycles(TryCand.
SU),
3857 Top.getLatencyStallCycles(Cand.
SU), TryCand, Cand,
Stall))
3891 for (
SUnit *SU : Q) {
3894 TryCand.
AtTop =
true;
3896 if (tryCandidate(Cand, TryCand)) {
3906 assert(Top.Available.empty() && Top.Pending.empty() &&
"ReadyQ garbage");
3911 SU = Top.pickOnlyChoice();
3920 pickNodeFromQueue(TopCand);
3928 Top.removeReady(SU);
3943 return new ScheduleDAGMI(
C, std::make_unique<PostGenericScheduler>(
C),
3956 const BitVector *ScheduledTrees =
nullptr;
3959 ILPOrder(
bool MaxILP) : MaximizeILP(MaxILP) {}
3964 bool operator()(
const SUnit *
A,
const SUnit *
B)
const {
3967 if (SchedTreeA != SchedTreeB) {
3969 if (ScheduledTrees->
test(SchedTreeA) != ScheduledTrees->
test(SchedTreeB))
3970 return ScheduledTrees->
test(SchedTreeB);
3991 std::vector<SUnit*> ReadyQ;
3994 ILPScheduler(
bool MaximizeILP) :
Cmp(MaximizeILP) {}
4005 void registerRoots()
override {
4007 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4014 SUnit *pickNode(
bool &IsTopNode)
override {
4015 if (ReadyQ.empty())
return nullptr;
4016 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4017 SUnit *SU = ReadyQ.back();
4021 <<
"SU(" << SU->
NodeNum <<
") "
4028 <<
"Scheduling " << *SU->
getInstr());
4033 void scheduleTree(
unsigned SubtreeID)
override {
4034 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4039 void schedNode(
SUnit *SU,
bool IsTopNode)
override {
4040 assert(!IsTopNode &&
"SchedDFSResult needs bottom-up");
4043 void releaseTopNode(
SUnit *)
override { }
4045 void releaseBottomNode(
SUnit *SU)
override {
4046 ReadyQ.push_back(SU);
4047 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
4074template<
bool IsReverse>
4078 return A->NodeNum >
B->NodeNum;
4080 return A->NodeNum <
B->NodeNum;
4100 InstructionShuffler(
bool alternate,
bool topdown)
4101 : IsAlternating(alternate), IsTopDown(topdown) {}
4111 SUnit *pickNode(
bool &IsTopNode)
override {
4115 if (TopQ.empty())
return nullptr;
4122 if (BottomQ.empty())
return nullptr;
4129 IsTopDown = !IsTopDown;
4133 void schedNode(
SUnit *SU,
bool IsTopNode)
override {}
4135 void releaseTopNode(
SUnit *SU)
override {
4138 void releaseBottomNode(
SUnit *SU)
override {
4149 "-misched-topdown incompatible with -misched-bottomup");
4151 C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
4155 "shuffle",
"Shuffle machine instructions alternating directions",
4174 return std::string(
G->MF.getName());
4194 return "color=cyan,style=dashed";
4196 return "color=blue,style=dashed";
4213 return G->getGraphNodeLabel(SU);
4217 std::string Str(
"shape=Mrecord");
4222 Str +=
",style=filled,fillcolor=\"#";
4239 errs() <<
"ScheduleDAGMI::viewGraph is only available in debug builds on "
4240 <<
"systems with Graphviz or gv!\n";
4246 viewGraph(getDAGName(),
"Scheduling-Units Graph for " + getDAGName());
4255 return A.first <
B.first;
4258unsigned ResourceSegments::getFirstAvailableAt(
4259 unsigned CurrCycle,
unsigned AcquireAtCycle,
unsigned Cycle,
4261 IntervalBuilder)
const {
4262 assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),
4264 "Cannot execute on an un-sorted set of intervals.");
4265 unsigned RetCycle = CurrCycle;
4267 IntervalBuilder(RetCycle, AcquireAtCycle,
Cycle);
4268 for (
auto &
Interval : _Intervals) {
4275 "Invalid intervals configuration.");
4277 NewInterval = IntervalBuilder(RetCycle, AcquireAtCycle,
Cycle);
4283 const unsigned CutOff) {
4284 assert(
A.first <
A.second &&
"Cannot add empty resource usage");
4285 assert(CutOff > 0 &&
"0-size interval history has no use.");
4290 "A resource is being overwritten");
4291 _Intervals.push_back(
A);
4297 while (_Intervals.size() > CutOff)
4298 _Intervals.pop_front();
4303 assert(
A.first <=
A.second &&
"Invalid interval");
4304 assert(
B.first <=
B.second &&
"Invalid interval");
4307 if ((
A.first ==
B.first) || (
A.second ==
B.second))
4312 if ((
A.first >
B.first) && (
A.second <
B.second))
4317 if ((
A.first >
B.first) && (
A.first <
B.second) && (
A.second >
B.second))
4322 if ((
A.first <
B.first) && (
B.first <
A.second) && (
B.second >
B.first))
4328void ResourceSegments::sortAndMerge() {
4329 if (_Intervals.size() <= 1)
4336 auto next = std::next(std::begin(_Intervals));
4337 auto E = std::end(_Intervals);
4338 for (; next !=
E; ++next) {
4339 if (std::prev(next)->second >= next->first) {
4340 next->first = std::prev(next)->first;
4341 _Intervals.erase(std::prev(next));
MachineInstrBuilder MachineInstrBuilder & DefMI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
static std::optional< ArrayRef< InsnRange >::iterator > intersects(const MachineInstr *StartMI, const MachineInstr *EndMI, const ArrayRef< InsnRange > &Ranges, const InstructionOrdering &Ordering)
Check if the instruction range [StartMI, EndMI] intersects any instruction range in Ranges.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
static const unsigned MinSubtreeSize
static const unsigned InvalidCycle
static cl::opt< bool > MISchedSortResourcesInTrace("misched-sort-resources-in-trace", cl::Hidden, cl::init(true), cl::desc("Sort the resources printed in the dump trace"))
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched)
static cl::opt< unsigned > HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, cl::desc("Set width of the columns with " "the resources and schedule units"), cl::init(19))
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))
static cl::opt< unsigned > FastClusterThreshold("fast-cluster-threshold", cl::Hidden, cl::desc("The threshold for fast cluster"), cl::init(1000))
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...