51#include "llvm/Config/llvm-config.h"
74#define DEBUG_TYPE "machine-scheduler"
76STATISTIC(NumClustered,
"Number of load/store pairs clustered");
81 cl::desc(
"Force top-down list scheduling"));
83 cl::desc(
"Force bottom-up list scheduling"));
86 cl::desc(
"Print critical path length to stdout"));
90 cl::desc(
"Verify machine instrs before and after machine scheduling"));
95 cl::desc(
"Pop up a window to show MISched dags after they are processed"));
100 cl::desc(
"Dump resource usage at schedule boundary."));
104#ifdef LLVM_ENABLE_DUMP
115 cl::desc(
"Hide nodes with more predecessor/successor than cutoff"));
121 cl::desc(
"Only schedule this function"));
123 cl::desc(
"Only schedule this MBB#"));
138 cl::desc(
"Enable memop clustering."),
142 cl::desc(
"Switch to fast cluster algorithm with the lost "
143 "of some fusion opportunities"),
147 cl::desc(
"The threshold for fast cluster"),
150#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
153 cl::desc(
"Dump resource usage at schedule boundary."));
156 cl::desc(
"Set width of the columns with "
157 "the resources and schedule units"),
161 cl::desc(
"Set width of the columns showing resource booking."),
169void MachineSchedStrategy::anchor() {}
171void ScheduleDAGMutation::anchor() {}
200class MachineScheduler :
public MachineSchedulerBase {
215class PostMachineScheduler :
public MachineSchedulerBase {
217 PostMachineScheduler();
231char MachineScheduler::ID = 0;
236 "Machine Instruction Scheduler",
false,
false)
245MachineScheduler::MachineScheduler() : MachineSchedulerBase(
ID) {
249void MachineScheduler::getAnalysisUsage(
AnalysisUsage &AU)
const {
262char PostMachineScheduler::ID = 0;
267 "PostRA Machine Instruction Scheduler",
false,
false)
274PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(
ID) {
278void PostMachineScheduler::getAnalysisUsage(
AnalysisUsage &AU)
const {
301 cl::desc(
"Machine instruction scheduler to use"));
309 cl::desc(
"Enable the machine instruction scheduling pass."),
cl::init(
true),
313 "enable-post-misched",
314 cl::desc(
"Enable the post-ra machine instruction scheduling pass."),
321 assert(
I != Beg &&
"reached the top of the region, cannot decrement");
323 if (!
I->isDebugOrPseudoInstr())
342 for(;
I !=
End; ++
I) {
343 if (!
I->isDebugOrPseudoInstr())
416 MLI = &getAnalysis<MachineLoopInfo>();
417 MDT = &getAnalysis<MachineDominatorTree>();
418 PassConfig = &getAnalysis<TargetPassConfig>();
419 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
421 LIS = &getAnalysis<LiveIntervals>();
425 MF->verify(
this,
"Before machine scheduling.");
427 RegClassInfo->runOnMachineFunction(*MF);
431 std::unique_ptr<ScheduleDAGInstrs>
Scheduler(createMachineScheduler());
436 MF->verify(
this,
"After machine scheduling.");
455 MLI = &getAnalysis<MachineLoopInfo>();
456 PassConfig = &getAnalysis<TargetPassConfig>();
457 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
460 MF->verify(
this,
"Before post machine scheduling.");
464 std::unique_ptr<ScheduleDAGInstrs>
Scheduler(createPostMachineScheduler());
468 MF->verify(
this,
"After post machine scheduling.");
499 unsigned NumRegionInstrs;
503 RegionBegin(
B), RegionEnd(
E), NumRegionInstrs(
N) {}
512 bool RegionsTopDown) {
518 RegionEnd !=
MBB->
begin(); RegionEnd =
I) {
521 if (RegionEnd !=
MBB->
end() ||
528 unsigned NumRegionInstrs = 0;
534 if (!
MI.isDebugOrPseudoInstr()) {
543 if (NumRegionInstrs != 0)
544 Regions.
push_back(SchedRegion(
I, RegionEnd, NumRegionInstrs));
548 std::reverse(Regions.
begin(), Regions.
end());
587 for (
const SchedRegion &R : MBBRegions) {
590 unsigned NumRegionInstrs =
R.NumRegionInstrs;
597 if (
I == RegionEnd ||
I == std::prev(RegionEnd)) {
608 else dbgs() <<
"End\n";
609 dbgs() <<
" RegionInstrs: " << NumRegionInstrs <<
'\n');
611 errs() << MF->getName();
637#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
639 dbgs() <<
"Queue " << Name <<
": ";
640 for (
const SUnit *SU : Queue)
641 dbgs() << SU->NodeNum <<
" ";
670 dbgs() <<
"*** Scheduling failed! ***\n";
672 dbgs() <<
" has been released too many times!\n";
707 dbgs() <<
"*** Scheduling failed! ***\n";
709 dbgs() <<
" has been released too many times!\n";
746 unsigned regioninstrs)
774#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
779 ++NumInstrsScheduled;
811 bool IsTopNode =
false;
813 LLVM_DEBUG(
dbgs() <<
"** ScheduleDAGMI::schedule picking next node\n");
854 dbgs() <<
"*** Final schedule for "
871 assert(!SU.isBoundaryNode() &&
"Boundary node should not be in SUnits");
874 SU.biasCriticalPath();
877 if (!SU.NumPredsLeft)
880 if (!SU.NumSuccsLeft)
896 for (
SUnit *SU : TopRoots)
935 for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
937 std::pair<MachineInstr *, MachineInstr *>
P = *std::prev(DI);
948#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
956 dbgs() <<
" * Schedule table (TopDown):\n";
974 for (
unsigned C = FirstCycle;
C <= LastCycle; ++
C)
981 dbgs() <<
"Missing SUnit\n";
984 std::string NodeName(
"SU(");
985 NodeName += std::to_string(SU->
NodeNum) +
")";
987 unsigned C = FirstCycle;
988 for (;
C <= LastCycle; ++
C) {
1000 const std::string ResName =
1006 for (
unsigned i = 0; i < PI->Cycles; ++i, ++
C)
1008 while (
C++ <= LastCycle)
1021 dbgs() <<
" * Schedule table (BottomUp):\n";
1040 for (
int C = FirstCycle;
C >= LastCycle; --
C)
1047 dbgs() <<
"Missing SUnit\n";
1050 std::string NodeName(
"SU(");
1051 NodeName += std::to_string(SU->
NodeNum) +
")";
1054 for (;
C >= LastCycle; --
C) {
1066 const std::string ResName =
1072 for (
unsigned i = 0; i < PI->Cycles; ++i, --
C)
1074 while (
C-- >= LastCycle)
1083#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1091 dbgs() <<
"* Schedule table (Bidirectional): not implemented\n";
1099 dbgs() <<
"Missing SUnit\n";
1124 if (!Reg.isVirtual())
1129 bool FoundDef =
false;
1131 if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
1158 unsigned regioninstrs)
1172 "ShouldTrackLaneMasks requires ShouldTrackPressure");
1223 dbgs() <<
"Bottom Pressure:\n";
1229 "Can't find the region bottom");
1253 const std::vector<unsigned> &NewMaxPressure) {
1259 unsigned ID = PC.getPSet();
1264 && NewMaxPressure[
ID] <= (
unsigned)std::numeric_limits<int16_t>::max())
1268 if (NewMaxPressure[
ID] >= Limit - 2) {
1270 << NewMaxPressure[
ID]
1271 << ((NewMaxPressure[
ID] > Limit) ?
" > " :
" <= ")
1285 if (!Reg.isVirtual())
1293 bool Decrement =
P.LaneMask.any();
1297 SUnit &SU = *V2SU.SU;
1326 assert(VNI &&
"No live value at use.");
1329 SUnit *SU = V2SU.SU;
1349#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1355 dbgs() <<
" Pressure Diff : ";
1358 dbgs() <<
" Single Issue : ";
1402 bool IsTopNode =
false;
1404 LLVM_DEBUG(
dbgs() <<
"** ScheduleDAGMILive::schedule picking next node\n");
1433 dbgs() <<
"*** Final schedule for "
1505 unsigned MaxCyclicLatency = 0;
1509 if (!Reg.isVirtual())
1521 unsigned LiveOutHeight = DefSU->
getHeight();
1526 SUnit *SU = V2SU.SU;
1538 unsigned CyclicLatency = 0;
1540 CyclicLatency = LiveOutDepth - SU->
getDepth();
1543 if (LiveInHeight > LiveOutHeight) {
1544 if (LiveInHeight - LiveOutHeight < CyclicLatency)
1545 CyclicLatency = LiveInHeight - LiveOutHeight;
1550 << SU->
NodeNum <<
") = " << CyclicLatency <<
"c\n");
1551 if (CyclicLatency > MaxCyclicLatency)
1552 MaxCyclicLatency = CyclicLatency;
1555 LLVM_DEBUG(
dbgs() <<
"Cyclic Critical Path: " << MaxCyclicLatency <<
"c\n");
1556 return MaxCyclicLatency;
1608 if (&*priorII ==
MI)
1661 int64_t
Offset,
unsigned Width)
1662 : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()),
Offset(
Offset),
1667 if (
A->getType() !=
B->getType())
1668 return A->getType() <
B->getType();
1670 return A->getReg() <
B->getReg();
1676 return StackGrowsDown ?
A->getIndex() >
B->getIndex()
1677 :
A->getIndex() <
B->getIndex();
1687 if (std::lexicographical_compare(BaseOps.
begin(), BaseOps.
end(),
1688 RHS.BaseOps.begin(),
RHS.BaseOps.end(),
1691 if (std::lexicographical_compare(
RHS.BaseOps.begin(),
RHS.BaseOps.end(),
1692 BaseOps.
begin(), BaseOps.
end(), Compare))
1707 :
TII(tii),
TRI(tri), IsLoad(IsLoad) {}
1714 void collectMemOpRecords(std::vector<SUnit> &SUnits,
1720class StoreClusterMutation :
public BaseMemOpClusterMutation {
1724 : BaseMemOpClusterMutation(tii, tri,
false) {}
1727class LoadClusterMutation :
public BaseMemOpClusterMutation {
1730 : BaseMemOpClusterMutation(tii, tri,
true) {}
1737std::unique_ptr<ScheduleDAGMutation>
1744std::unique_ptr<ScheduleDAGMutation>
1758void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1768 auto MemOpa = MemOpRecords[
Idx];
1771 unsigned NextIdx =
Idx + 1;
1772 for (; NextIdx <
End; ++NextIdx)
1775 if (!SUnit2ClusterInfo.
count(MemOpRecords[NextIdx].SU->NodeNum) &&
1777 (!DAG->
IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
1778 !DAG->
IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
1783 auto MemOpb = MemOpRecords[NextIdx];
1784 unsigned ClusterLength = 2;
1785 unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width;
1786 if (SUnit2ClusterInfo.
count(MemOpa.SU->NodeNum)) {
1787 ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
1788 CurrentClusterBytes =
1789 SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
1792 if (!
TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
1793 CurrentClusterBytes))
1796 SUnit *SUa = MemOpa.SU;
1797 SUnit *SUb = MemOpb.SU;
1837 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
1838 CurrentClusterBytes};
1841 <<
", Curr cluster bytes: " << CurrentClusterBytes
1846void BaseMemOpClusterMutation::collectMemOpRecords(
1848 for (
auto &SU : SUnits) {
1856 bool OffsetIsScalable;
1859 OffsetIsScalable, Width,
TRI)) {
1863 <<
Offset <<
", OffsetIsScalable: " << OffsetIsScalable
1864 <<
", Width: " << Width <<
"\n");
1867 for (
const auto *Op : BaseOps)
1873bool BaseMemOpClusterMutation::groupMemOps(
1880 for (
const auto &
MemOp : MemOps) {
1881 unsigned ChainPredID = DAG->
SUnits.size();
1883 for (
const SDep &Pred :
MemOp.SU->Preds) {
1907 collectMemOpRecords(DAG->
SUnits, MemOpRecords);
1909 if (MemOpRecords.
size() < 2)
1916 bool FastCluster = groupMemOps(MemOpRecords, DAG,
Groups);
1918 for (
auto &Group :
Groups) {
1924 clusterNeighboringMemOps(Group.second, FastCluster, DAG);
1958std::unique_ptr<ScheduleDAGMutation>
1961 return std::make_unique<CopyConstrain>(
TII,
TRI);
2006 unsigned LocalReg = SrcReg;
2007 unsigned GlobalReg = DstReg;
2009 if (!LocalLI->
isLocal(RegionBeginIdx, RegionEndIdx)) {
2013 if (!LocalLI->
isLocal(RegionBeginIdx, RegionEndIdx))
2024 if (GlobalSegment == GlobalLI->
end())
2031 if (GlobalSegment->contains(LocalLI->
beginIndex()))
2034 if (GlobalSegment == GlobalLI->
end())
2038 if (GlobalSegment != GlobalLI->
begin()) {
2041 GlobalSegment->start)) {
2052 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2053 "Disconnected LRG within the scheduling region.");
2069 for (
const SDep &Succ : LastLocalSU->
Succs) {
2084 for (
const SDep &Pred : GlobalSU->
Preds) {
2087 if (Pred.
getSUnit() == FirstLocalSU)
2095 for (
SUnit *LU : LocalUses) {
2096 LLVM_DEBUG(
dbgs() <<
" Local use SU(" << LU->NodeNum <<
") -> SU("
2097 << GlobalSU->
NodeNum <<
")\n");
2100 for (
SUnit *GU : GlobalUses) {
2101 LLVM_DEBUG(
dbgs() <<
" Global use SU(" << GU->NodeNum <<
") -> SU("
2102 << FirstLocalSU->
NodeNum <<
")\n");
2114 if (FirstPos == DAG->
end())
2142 unsigned Latency,
bool AfterSchedNode) {
2143 int ResCntFactor = (int)(Count - (
Latency * LFactor));
2145 return ResCntFactor >= (int)LFactor;
2147 return ResCntFactor > (int)LFactor;
2160 CheckPending =
false;
2163 MinReadyCycle = std::numeric_limits<unsigned>::max();
2164 ExpectedLatency = 0;
2165 DependentLatency = 0;
2167 MaxExecutedResCount = 0;
2169 IsResourceLimited =
false;
2170 ReservedCycles.
clear();
2171 ReservedCyclesIndex.
clear();
2172 ResourceGroupSubUnitMasks.clear();
2173#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2177 MaxObservedStall = 0;
2180 ExecutedResCounts.
resize(1);
2181 assert(!ExecutedResCounts[0] &&
"nonzero count for bad resource");
2197 unsigned PIdx = PI->ProcResourceIdx;
2212 ReservedCyclesIndex.
resize(ResourceCount);
2213 ExecutedResCounts.
resize(ResourceCount);
2214 ResourceGroupSubUnitMasks.resize(ResourceCount,
APInt(ResourceCount, 0));
2215 unsigned NumUnits = 0;
2217 for (
unsigned i = 0; i < ResourceCount; ++i) {
2218 ReservedCyclesIndex[i] = NumUnits;
2224 ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2244 if (ReadyCycle > CurrCycle)
2245 return ReadyCycle - CurrCycle;
2253 unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2259 NextUnreserved += Cycles;
2260 return NextUnreserved;
2266std::pair<unsigned, unsigned>
2271 unsigned InstanceIdx = 0;
2272 unsigned StartIndex = ReservedCyclesIndex[PIdx];
2274 assert(NumberOfInstances > 0 &&
2275 "Cannot have zero instances of a ProcResource");
2290 if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2291 return std::make_pair(0u, StartIndex);
2294 for (
unsigned I = 0,
End = NumberOfInstances;
I <
End; ++
I) {
2295 unsigned NextUnreserved, NextInstanceIdx;
2296 std::tie(NextUnreserved, NextInstanceIdx) =
2298 if (MinNextUnreserved > NextUnreserved) {
2299 InstanceIdx = NextInstanceIdx;
2300 MinNextUnreserved = NextUnreserved;
2303 return std::make_pair(MinNextUnreserved, InstanceIdx);
2306 for (
unsigned I = StartIndex,
End = StartIndex + NumberOfInstances;
I <
End;
2309 if (MinNextUnreserved > NextUnreserved) {
2311 MinNextUnreserved = NextUnreserved;
2314 return std::make_pair(MinNextUnreserved, InstanceIdx);
2347 << (
isTop() ?
"begin" :
"end") <<
" group\n");
2356 unsigned ResIdx = PE.ProcResourceIdx;
2357 unsigned Cycles = PE.Cycles;
2358 unsigned NRCycle, InstanceIdx;
2360 if (NRCycle > CurrCycle) {
2361#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2362 MaxObservedStall = std::max(Cycles, MaxObservedStall);
2366 <<
'[' << InstanceIdx - ReservedCyclesIndex[ResIdx] <<
']'
2367 <<
"=" << NRCycle <<
"c\n");
2378 SUnit *LateSU =
nullptr;
2379 unsigned RemLatency = 0;
2380 for (
SUnit *SU : ReadySUs) {
2382 if (L > RemLatency) {
2389 << LateSU->
NodeNum <<
") " << RemLatency <<
"c\n");
2408 PIdx != PEnd; ++PIdx) {
2410 if (OtherCount > OtherCritCount) {
2411 OtherCritCount = OtherCount;
2412 OtherCritIdx = PIdx;
2421 return OtherCritCount;
2428#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2432 if (ReadyCycle > CurrCycle)
2433 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2436 if (ReadyCycle < MinReadyCycle)
2437 MinReadyCycle = ReadyCycle;
2442 bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2445 if (!HazardDetected) {
2460 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2461 "MinReadyCycle uninitialized");
2462 if (MinReadyCycle > NextCycle)
2463 NextCycle = MinReadyCycle;
2467 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2470 if ((NextCycle - CurrCycle) > DependentLatency)
2471 DependentLatency = 0;
2473 DependentLatency -= (NextCycle - CurrCycle);
2477 CurrCycle = NextCycle;
2480 for (; CurrCycle != NextCycle; ++CurrCycle) {
2487 CheckPending =
true;
2497 ExecutedResCounts[PIdx] += Count;
2498 if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2499 MaxExecutedResCount = ExecutedResCounts[PIdx];
2510 unsigned Cycles,
unsigned NextCycle) {
2512 unsigned Count = Factor * Cycles;
2514 << Cycles <<
"x" << Factor <<
"u\n");
2524 ZoneCritResIdx = PIdx;
2531 unsigned NextAvailable, InstanceIdx;
2533 if (NextAvailable > CurrCycle) {
2536 <<
'[' << InstanceIdx - ReservedCyclesIndex[PIdx] <<
']'
2537 <<
" reserved until @" << NextAvailable <<
"\n");
2539 return NextAvailable;
2553 CheckPending =
true;
2561 "Cannot schedule this instruction's MicroOps in the current cycle.");
2566 unsigned NextCycle = CurrCycle;
2569 assert(ReadyCycle <= CurrCycle &&
"Broken PendingQueue");
2572 if (ReadyCycle > NextCycle) {
2573 NextCycle = ReadyCycle;
2574 LLVM_DEBUG(
dbgs() <<
" *** Stall until: " << ReadyCycle <<
"\n");
2583 NextCycle = ReadyCycle;
2586 RetiredMOps += IncMOps;
2593 if (ZoneCritResIdx) {
2595 unsigned ScaledMOps =
2612 countResource(SC, PI->ProcResourceIdx, PI->Cycles, NextCycle);
2613 if (RCycle > NextCycle)
2624 unsigned PIdx = PI->ProcResourceIdx;
2626 unsigned ReservedUntil, InstanceIdx;
2627 std::tie(ReservedUntil, InstanceIdx) =
2630 ReservedCycles[InstanceIdx] =
2631 std::max(ReservedUntil, NextCycle + PI->Cycles);
2633 ReservedCycles[InstanceIdx] = NextCycle;
2639 unsigned &TopLatency =
isTop() ? ExpectedLatency : DependentLatency;
2640 unsigned &BotLatency =
isTop() ? DependentLatency : ExpectedLatency;
2644 << SU->
NodeNum <<
") " << TopLatency <<
"c\n");
2649 << SU->
NodeNum <<
") " << BotLatency <<
"c\n");
2652 if (NextCycle > CurrCycle)
2665 CurrMOps += IncMOps;
2679 LLVM_DEBUG(
dbgs() <<
" *** Max MOps " << CurrMOps <<
" at cycle "
2680 << CurrCycle <<
'\n');
2691 MinReadyCycle = std::numeric_limits<unsigned>::max();
2699 if (ReadyCycle < MinReadyCycle)
2700 MinReadyCycle = ReadyCycle;
2711 CheckPending =
false;
2757#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2767 unsigned StartIdx = 0;
2769 for (
unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
2772 for (
unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
2773 dbgs() << ResName <<
"(" << UnitIdx
2774 <<
") = " << ReservedCycles[StartIdx + UnitIdx] <<
"\n";
2776 StartIdx += NumUnits;
2785 if (ZoneCritResIdx) {
2790 ResCount = RetiredMOps * ResFactor;
2794 <<
" Retired: " << RetiredMOps;
2796 dbgs() <<
"\n Critical: " << ResCount / LFactor <<
"c, "
2797 << ResCount / ResFactor <<
" "
2799 <<
"\n ExpectedLatency: " << ExpectedLatency <<
"c\n"
2800 << (IsResourceLimited ?
" - Resource" :
" - Latency")
2846 RemLatency = std::max(RemLatency,
2848 RemLatency = std::max(RemLatency,
2855bool GenericSchedulerBase::shouldReduceLatency(
const CandPolicy &Policy,
2857 bool ComputeRemLatency,
2858 unsigned &RemLatency)
const {
2868 if (ComputeRemLatency)
2884 unsigned OtherCritIdx = 0;
2885 unsigned OtherCount =
2888 bool OtherResLimited =
false;
2889 unsigned RemLatency = 0;
2890 bool RemLatencyComputed =
false;
2893 RemLatencyComputed =
true;
2895 OtherCount, RemLatency,
false);
2901 if (!OtherResLimited &&
2902 (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2906 <<
" RemainingLatency " << RemLatency <<
" + "
2915 dbgs() <<
" " << CurrZone.Available.getName() <<
" ResourceLimited: "
2916 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) <<
"\n";
2917 }
if (OtherResLimited)
dbgs()
2918 <<
" RemainingLimit: "
2921 <<
" Latency limited both directions.\n");
2926 if (OtherResLimited)
2934 case NoCand:
return "NOCAND ";
2935 case Only1:
return "ONLY1 ";
2936 case PhysReg:
return "PHYS-REG ";
2939 case Stall:
return "STALL ";
2940 case Cluster:
return "CLUSTER ";
2941 case Weak:
return "WEAK ";
2942 case RegMax:
return "REG-MAX ";
2957 unsigned ResIdx = 0;
2993 <<
":" <<
P.getUnitInc() <<
" ";
3016 if (TryVal < CandVal) {
3020 if (TryVal > CandVal) {
3021 if (Cand.
Reason > Reason)
3032 if (TryVal > CandVal) {
3036 if (TryVal < CandVal) {
3037 if (Cand.
Reason > Reason)
3089 "(PreRA)GenericScheduler needs vreg liveness");
3094 if (RegionPolicy.ComputeDFSResult)
3095 DAG->computeDFSResult();
3106 if (!Top.HazardRec) {
3111 if (!Bot.HazardRec) {
3116 TopCand.SU =
nullptr;
3117 BotCand.SU =
nullptr;
3123 unsigned NumRegionInstrs) {
3130 RegionPolicy.ShouldTrackPressure =
true;
3131 for (
unsigned VT = MVT::i32; VT > (
unsigned)MVT::i1; --VT) {
3136 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
3142 RegionPolicy.OnlyBottomUp =
true;
3149 RegionPolicy.ShouldTrackPressure =
false;
3150 RegionPolicy.ShouldTrackLaneMasks =
false;
3156 "-misched-topdown incompatible with -misched-bottomup");
3159 if (RegionPolicy.OnlyBottomUp)
3160 RegionPolicy.OnlyTopDown =
false;
3164 if (RegionPolicy.OnlyTopDown)
3165 RegionPolicy.OnlyBottomUp =
false;
3171#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3172 dbgs() <<
"GenericScheduler RegionPolicy: "
3173 <<
" ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
3174 <<
" OnlyTopDown=" << RegionPolicy.OnlyTopDown
3175 <<
" OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
3194 unsigned IterCount =
3200 unsigned InFlightCount =
3202 unsigned BufferLimit =
3208 dbgs() <<
"IssueCycles="
3211 <<
"c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3221 for (
const SUnit *SU : Bot.Available) {
3232 checkAcyclicLatency();
3259 if (TryPSet == CandPSet) {
3264 int TryRank = TryP.
isValid() ?
TRI->getRegPressureSetScore(MF, TryPSet) :
3265 std::numeric_limits<int>::max();
3267 int CandRank = CandP.
isValid() ?
TRI->getRegPressureSetScore(MF, CandPSet) :
3268 std::numeric_limits<int>::max();
3273 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3291 unsigned ScheduledOper = isTop ? 1 : 0;
3292 unsigned UnscheduledOper = isTop ? 0 : 1;
3295 if (
MI->getOperand(ScheduledOper).getReg().isPhysical())
3300 if (
MI->getOperand(UnscheduledOper).getReg().isPhysical())
3301 return AtBoundary ? -1 : 1;
3304 if (
MI->isMoveImmediate()) {
3310 if (Op.isReg() && !Op.getReg().isPhysical()) {
3317 return isTop ? -1 : 1;
3330 if (DAG->isTrackingPressure()) {
3335 DAG->getRegionCriticalPSets(),
3336 DAG->getRegPressure().MaxSetPressure);
3341 &DAG->getPressureDiff(Cand.
SU),
3343 DAG->getRegionCriticalPSets(),
3344 DAG->getRegPressure().MaxSetPressure);
3348 DAG->getPressureDiff(Cand.
SU),
3350 DAG->getRegionCriticalPSets(),
3351 DAG->getRegPressure().MaxSetPressure);
3356 <<
" Try SU(" << Cand.
SU->
NodeNum <<
") "
3405 bool SameBoundary = Zone !=
nullptr;
3426 const SUnit *CandNextClusterSU =
3428 const SUnit *TryCandNextClusterSU =
3431 Cand.
SU == CandNextClusterSU,
3439 TryCand, Cand,
Weak))
3491 for (
SUnit *SU : Q) {
3494 initCandidate(TryCand, SU, Zone.
isTop(), RPTracker, TempTracker);
3497 if (tryCandidate(Cand, TryCand, ZoneArg)) {
3511 if (
SUnit *SU = Bot.pickOnlyChoice()) {
3516 if (
SUnit *SU = Top.pickOnlyChoice()) {
3532 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3533 BotCand.Policy != BotPolicy) {
3535 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3536 assert(BotCand.Reason !=
NoCand &&
"failed to find the first candidate");
3543 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3545 "Last pick result should correspond to re-picking right now");
3552 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3553 TopCand.Policy != TopPolicy) {
3555 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3556 assert(TopCand.Reason !=
NoCand &&
"failed to find the first candidate");
3563 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3565 "Last pick result should correspond to re-picking right now");
3571 assert(BotCand.isValid());
3572 assert(TopCand.isValid());
3575 if (tryCandidate(Cand, TopCand,
nullptr)) {
3580 IsTopNode = Cand.
AtTop;
3588 assert(Top.Available.empty() && Top.Pending.empty() &&
3589 Bot.Available.empty() && Bot.Pending.empty() &&
"ReadyQ garbage");
3594 if (RegionPolicy.OnlyTopDown) {
3595 SU = Top.pickOnlyChoice();
3598 TopCand.reset(NoPolicy);
3599 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3600 assert(TopCand.Reason !=
NoCand &&
"failed to find a candidate");
3605 }
else if (RegionPolicy.OnlyBottomUp) {
3606 SU = Bot.pickOnlyChoice();
3609 BotCand.reset(NoPolicy);
3610 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3611 assert(BotCand.Reason !=
NoCand &&
"failed to find a candidate");
3617 SU = pickNodeBidirectional(IsTopNode);
3622 Top.removeReady(SU);
3624 Bot.removeReady(SU);
3639 for (
SDep &Dep : Deps) {
3643 SUnit *DepSU = Dep.getSUnit();
3644 if (isTop ? DepSU->
Succs.size() > 1 : DepSU->
Preds.size() > 1)
3647 if (!Copy->isCopy() && !Copy->isMoveImmediate())
3667 reschedulePhysReg(SU,
true);
3672 reschedulePhysReg(SU,
false);
3714 if (!Top.HazardRec) {
3725 for (
const SUnit *SU : BotRoots) {
3749 if (
tryLess(Top.getLatencyStallCycles(TryCand.
SU),
3750 Top.getLatencyStallCycles(Cand.
SU), TryCand, Cand,
Stall))
3784 for (
SUnit *SU : Q) {
3787 TryCand.
AtTop =
true;
3789 if (tryCandidate(Cand, TryCand)) {
3799 assert(Top.Available.empty() && Top.Pending.empty() &&
"ReadyQ garbage");
3804 SU = Top.pickOnlyChoice();
3813 pickNodeFromQueue(TopCand);
3821 Top.removeReady(SU);
3836 return new ScheduleDAGMI(
C, std::make_unique<PostGenericScheduler>(
C),
3849 const BitVector *ScheduledTrees =
nullptr;
3852 ILPOrder(
bool MaxILP) : MaximizeILP(MaxILP) {}
3857 bool operator()(
const SUnit *
A,
const SUnit *
B)
const {
3860 if (SchedTreeA != SchedTreeB) {
3862 if (ScheduledTrees->
test(SchedTreeA) != ScheduledTrees->
test(SchedTreeB))
3863 return ScheduledTrees->
test(SchedTreeB);
3884 std::vector<SUnit*> ReadyQ;
3887 ILPScheduler(
bool MaximizeILP) :
Cmp(MaximizeILP) {}
3898 void registerRoots()
override {
3900 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3907 SUnit *pickNode(
bool &IsTopNode)
override {
3908 if (ReadyQ.empty())
return nullptr;
3909 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3910 SUnit *SU = ReadyQ.back();
3914 <<
"SU(" << SU->
NodeNum <<
") "
3921 <<
"Scheduling " << *SU->
getInstr());
3926 void scheduleTree(
unsigned SubtreeID)
override {
3927 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3932 void schedNode(
SUnit *SU,
bool IsTopNode)
override {
3933 assert(!IsTopNode &&
"SchedDFSResult needs bottom-up");
3936 void releaseTopNode(
SUnit *)
override { }
3938 void releaseBottomNode(
SUnit *SU)
override {
3939 ReadyQ.push_back(SU);
3940 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3967template<
bool IsReverse>
3971 return A->NodeNum >
B->NodeNum;
3973 return A->NodeNum <
B->NodeNum;
3993 InstructionShuffler(
bool alternate,
bool topdown)
3994 : IsAlternating(alternate), IsTopDown(topdown) {}
4004 SUnit *pickNode(
bool &IsTopNode)
override {
4008 if (TopQ.empty())
return nullptr;
4015 if (BottomQ.empty())
return nullptr;
4022 IsTopDown = !IsTopDown;
4026 void schedNode(
SUnit *SU,
bool IsTopNode)
override {}
4028 void releaseTopNode(
SUnit *SU)
override {
4031 void releaseBottomNode(
SUnit *SU)
override {
4042 "-misched-topdown incompatible with -misched-bottomup");
4044 C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
4048 "shuffle",
"Shuffle machine instructions alternating directions",
4067 return std::string(
G->MF.getName());
4087 return "color=cyan,style=dashed";
4089 return "color=blue,style=dashed";
4106 return G->getGraphNodeLabel(SU);
4110 std::string Str(
"shape=Mrecord");
4115 Str +=
",style=filled,fillcolor=\"#";
4132 errs() <<
"ScheduleDAGMI::viewGraph is only available in debug builds on "
4133 <<
"systems with Graphviz or gv!\n";
4139 viewGraph(getDAGName(),
"Scheduling-Units Graph for " + getDAGName());
MachineInstrBuilder MachineInstrBuilder & DefMI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
static const unsigned MinSubtreeSize
static const unsigned InvalidCycle
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched)
static cl::opt< unsigned > HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, cl::desc("Set width of the columns with " "the resources and schedule units"), cl::init(19))
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))
static cl::opt< unsigned > FastClusterThreshold("fast-cluster-threshold", cl::Hidden, cl::desc("The threshold for fast cluster"), cl::init(1000))
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line.
static cl::opt< unsigned > ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden, cl::desc("Set width of the columns showing resource booking."), cl::init(5))
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
static const char * scheduleTableLegend
static ScheduleDAGInstrs * createConvergingSched(MachineSchedContext *C)
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
static cl::opt< bool > MISchedDumpScheduleTrace("misched-dump-schedule-trace", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static const X86InstrFMA3Group Groups[]
Class recording the (high level) value of a variable.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
reverse_iterator rend() const
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
reverse_iterator rbegin() const
bool test(unsigned Idx) const
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
void clear()
clear - Removes all bits from the bitvector.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
void traceCandidate(const SchedCandidate &Cand)
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
const TargetSchedModel * SchedModel
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
const MachineSchedContext * Context
CandReason
Represent the type of SchedCandidate found within a single queue.
const TargetRegisterInfo * TRI
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void dumpPolicy() const override
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
void reschedulePhysReg(SUnit *SU, bool isTop)
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
Itinerary data supplied by a subtarget to be used by a target.
LiveInterval - This class represents the liveness of a register, or stack slot.
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
Result of a LiveRange query.
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx,...
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.