73#include "llvm/Config/llvm-config.h"
103#define DEBUG_TYPE "pipeliner"
105STATISTIC(NumTrytoPipeline,
"Number of loops that we attempt to pipeline");
106STATISTIC(NumPipelined,
"Number of loops software pipelined");
107STATISTIC(NumNodeOrderIssues,
"Number of node order issues found");
108STATISTIC(NumFailBranch,
"Pipeliner abort due to unknown branch");
109STATISTIC(NumFailLoop,
"Pipeliner abort due to unsupported loop");
110STATISTIC(NumFailPreheader,
"Pipeliner abort due to missing preheader");
111STATISTIC(NumFailLargeMaxMII,
"Pipeliner abort due to MaxMII too large");
112STATISTIC(NumFailZeroMII,
"Pipeliner abort due to zero MII");
113STATISTIC(NumFailNoSchedule,
"Pipeliner abort due to no schedule found");
114STATISTIC(NumFailZeroStage,
"Pipeliner abort due to zero stage");
115STATISTIC(NumFailLargeMaxStage,
"Pipeliner abort due to too many stages");
119 cl::desc(
"Enable Software Pipelining"));
128 cl::desc(
"Size limit for the MII."),
134 cl::desc(
"Force pipeliner to use specified II."),
140 cl::desc(
"Maximum stages allowed in the generated scheduled."),
147 cl::desc(
"Prune dependences between unrelated Phi nodes."),
154 cl::desc(
"Prune loop carried order dependences."),
172 cl::desc(
"Instead of emitting the pipelined code, annotate instructions "
173 "with the generated schedule for feeding into the "
174 "-modulo-schedule-test pass"));
179 "Use the experimental peeling code generator for software pipelining"));
187 cl::desc(
"Limit register pressure of scheduled loop"));
192 cl::desc(
"Margin representing the unused percentage of "
193 "the register pressure limit"));
200 cl::desc(
"Enable CopyToPhi DAG Mutation"));
205 "pipeliner-force-issue-width",
211unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
219 "Modulo Software Pipelining",
false,
false)
229 if (skipFunction(mf.getFunction()))
235 if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&
239 if (!mf.getSubtarget().enableMachinePipeliner())
244 if (mf.getSubtarget().useDFAforSMS() &&
245 (!mf.getSubtarget().getInstrItineraryData() ||
246 mf.getSubtarget().getInstrItineraryData()->isEmpty()))
250 MLI = &getAnalysis<MachineLoopInfo>();
251 MDT = &getAnalysis<MachineDominatorTree>();
252 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
253 TII = MF->getSubtarget().getInstrInfo();
254 RegClassInfo.runOnMachineFunction(*MF);
256 for (
const auto &L : *MLI)
266bool MachinePipeliner::scheduleLoop(
MachineLoop &L) {
267 bool Changed =
false;
268 for (
const auto &InnerLoop : L)
269 Changed |= scheduleLoop(*InnerLoop);
281 setPragmaPipelineOptions(L);
282 if (!canPipelineLoop(L)) {
286 L.getStartLoc(), L.getHeader())
287 <<
"Failed to pipeline loop";
296 Changed = swingModuloScheduler(L);
302void MachinePipeliner::setPragmaPipelineOptions(
MachineLoop &L) {
321 if (LoopID ==
nullptr)
328 MDNode *MD = dyn_cast<MDNode>(MDO);
338 if (S->
getString() ==
"llvm.loop.pipeline.initiationinterval") {
340 "Pipeline initiation interval hint metadata should have two operands.");
342 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
344 }
else if (S->
getString() ==
"llvm.loop.pipeline.disable") {
353bool MachinePipeliner::canPipelineLoop(
MachineLoop &L) {
354 if (
L.getNumBlocks() != 1) {
357 L.getStartLoc(),
L.getHeader())
358 <<
"Not a single basic block: "
359 <<
ore::NV(
"NumBlocks",
L.getNumBlocks());
367 L.getStartLoc(),
L.getHeader())
368 <<
"Disabled by Pragma.";
379 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeBranch, can NOT pipeline Loop\n");
383 L.getStartLoc(),
L.getHeader())
384 <<
"The branch can't be understood";
393 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeLoop, can NOT pipeline Loop\n");
397 L.getStartLoc(),
L.getHeader())
398 <<
"The loop structure is not supported";
403 if (!
L.getLoopPreheader()) {
404 LLVM_DEBUG(
dbgs() <<
"Preheader not found, can NOT pipeline Loop\n");
408 L.getStartLoc(),
L.getHeader())
409 <<
"No loop preheader found";
415 preprocessPhiNodes(*
L.getHeader());
421 SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
426 auto *RC =
MRI.getRegClass(DefOp.
getReg());
428 for (
unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
453bool MachinePipeliner::swingModuloScheduler(
MachineLoop &L) {
454 assert(
L.getBlocks().size() == 1 &&
"SMS works on single blocks only.");
477 return SMS.hasNewSchedule();
490void SwingSchedulerDAG::setMII(
unsigned ResMII,
unsigned RecMII) {
493 else if (II_setByPragma > 0)
494 MII = II_setByPragma;
496 MII = std::max(ResMII, RecMII);
499void SwingSchedulerDAG::setMAX_II() {
502 else if (II_setByPragma > 0)
503 MAX_II = II_setByPragma;
513 addLoopCarriedDependences(AA);
514 updatePhiDependences();
521 findCircuits(NodeSets);
525 unsigned ResMII = calculateResMII();
526 unsigned RecMII = calculateRecMII(NodeSets);
534 setMII(ResMII, RecMII);
538 <<
" (rec=" << RecMII <<
", res=" << ResMII <<
")\n");
544 Pass.ORE->emit([&]() {
547 <<
"Invalid Minimal Initiation Interval: 0";
555 <<
", we don't pipeline large loops\n");
556 NumFailLargeMaxMII++;
557 Pass.ORE->emit([&]() {
560 <<
"Minimal Initiation Interval too large: "
561 <<
ore::NV(
"MII", (
int)MII) <<
" > "
563 <<
"Refer to -pipeliner-max-mii.";
568 computeNodeFunctions(NodeSets);
570 registerPressureFilter(NodeSets);
572 colocateNodeSets(NodeSets);
574 checkNodeSets(NodeSets);
577 for (
auto &
I : NodeSets) {
578 dbgs() <<
" Rec NodeSet ";
585 groupRemainingNodes(NodeSets);
587 removeDuplicateNodes(NodeSets);
590 for (
auto &
I : NodeSets) {
591 dbgs() <<
" NodeSet ";
596 computeNodeOrder(NodeSets);
599 checkValidNodeOrder(Circuits);
602 Scheduled = schedulePipeline(Schedule);
607 Pass.ORE->emit([&]() {
610 <<
"Unable to find schedule";
617 if (numStages == 0) {
620 Pass.ORE->emit([&]() {
623 <<
"No need to pipeline - no overlapped iterations in schedule.";
630 <<
" : too many stages, abort\n");
631 NumFailLargeMaxStage++;
632 Pass.ORE->emit([&]() {
635 <<
"Too many stages in schedule: "
636 <<
ore::NV(
"numStages", (
int)numStages) <<
" > "
638 <<
". Refer to -pipeliner-max-stages.";
643 Pass.ORE->emit([&]() {
646 <<
"Pipelined succesfully!";
651 std::vector<MachineInstr *> OrderedInsts;
655 OrderedInsts.push_back(SU->getInstr());
656 Cycles[SU->getInstr()] =
Cycle;
661 for (
auto &KV : NewMIs) {
662 Cycles[KV.first] = Cycles[KV.second];
663 Stages[KV.first] = Stages[KV.second];
664 NewInstrChanges[KV.first] = InstrChanges[
getSUnit(KV.first)];
671 "Cannot serialize a schedule with InstrChanges!");
690 for (
auto &KV : NewMIs)
701 unsigned &InitVal,
unsigned &LoopVal) {
702 assert(Phi.isPHI() &&
"Expecting a Phi.");
706 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
707 if (Phi.getOperand(i + 1).getMBB() !=
Loop)
708 InitVal = Phi.getOperand(i).getReg();
710 LoopVal = Phi.getOperand(i).getReg();
712 assert(InitVal != 0 && LoopVal != 0 &&
"Unexpected Phi structure.");
718 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
719 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
720 return Phi.getOperand(i).getReg();
729 while (!Worklist.
empty()) {
731 for (
const auto &SI : SU->
Succs) {
732 SUnit *SuccSU = SI.getSUnit();
734 if (Visited.
count(SuccSU))
749 return MI.isCall() ||
MI.mayRaiseFPException() ||
750 MI.hasUnmodeledSideEffects() ||
751 (
MI.hasOrderedMemoryRef() &&
752 (!
MI.mayLoad() || !
MI.isDereferenceableInvariantLoad()));
760 if (!
MI->hasOneMemOperand())
766 for (
const Value *V : Objs) {
778void SwingSchedulerDAG::addLoopCarriedDependences(
AliasAnalysis *AA) {
785 PendingLoads.
clear();
786 else if (
MI.mayLoad()) {
791 for (
const auto *V : Objs) {
795 }
else if (
MI.mayStore()) {
800 for (
const auto *V : Objs) {
802 PendingLoads.
find(V);
803 if (
I == PendingLoads.
end())
805 for (
auto *Load :
I->second) {
813 int64_t Offset1, Offset2;
814 bool Offset1IsScalable, Offset2IsScalable;
816 Offset1IsScalable,
TRI) &&
818 Offset2IsScalable,
TRI)) {
820 Offset1IsScalable == Offset2IsScalable &&
821 (
int)Offset1 < (
int)Offset2) {
823 "What happened to the chain edge?");
874void SwingSchedulerDAG::updatePhiDependences() {
882 unsigned HasPhiUse = 0;
883 unsigned HasPhiDef = 0;
907 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
912 }
else if (MO.isUse()) {
915 if (
DefMI ==
nullptr)
922 ST.adjustSchedDependency(SU, 0, &
I, MO.getOperandNo(), Dep);
928 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
937 for (
auto &PI :
I.Preds) {
940 if (
I.getInstr()->isPHI()) {
949 for (
int i = 0, e = RemoveDeps.
size(); i != e; ++i)
950 I.removePred(RemoveDeps[i]);
956void SwingSchedulerDAG::changeDependences() {
961 unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
962 int64_t NewOffset = 0;
963 if (!canUseLastOffsetValue(
I.getInstr(), BasePos, OffsetPos, NewBase,
968 Register OrigBase =
I.getInstr()->getOperand(BasePos).getReg();
988 for (
const SDep &
P :
I.Preds)
989 if (
P.getSUnit() == DefSU)
991 for (
int i = 0, e = Deps.
size(); i != e; i++) {
993 I.removePred(Deps[i]);
997 for (
auto &
P : LastSU->
Preds)
1000 for (
int i = 0, e = Deps.
size(); i != e; i++) {
1013 InstrChanges[&
I] = std::make_pair(NewBase, NewOffset);
1024 std::vector<MachineInstr *> &OrderedInsts,
1032 Stage <= LastStage; ++Stage) {
1035 Instrs[
Cycle].push_front(SU);
1042 std::deque<SUnit *> &CycleInstrs = Instrs[
Cycle];
1044 for (
SUnit *SU : CycleInstrs) {
1046 OrderedInsts.push_back(
MI);
1056struct FuncUnitSorter {
1062 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
1070 unsigned min = UINT_MAX;
1071 if (InstrItins && !InstrItins->
isEmpty()) {
1074 InstrItins->
endStage(SchedClass))) {
1077 if (numAlternatives < min) {
1078 min = numAlternatives;
1095 if (!PRE.ReleaseAtCycle)
1099 unsigned NumUnits = ProcResource->
NumUnits;
1100 if (NumUnits < min) {
1102 F = PRE.ProcResourceIdx;
1107 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1116 unsigned SchedClass =
MI.getDesc().getSchedClass();
1117 if (InstrItins && !InstrItins->
isEmpty()) {
1120 InstrItins->
endStage(SchedClass))) {
1123 Resources[FuncUnits]++;
1138 if (!PRE.ReleaseAtCycle)
1140 Resources[PRE.ProcResourceIdx]++;
1144 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1150 unsigned MFUs1 = minFuncUnits(IS1, F1);
1151 unsigned MFUs2 = minFuncUnits(IS2, F2);
1154 return MFUs1 > MFUs2;
1159class HighRegisterPressureDetector {
1165 const unsigned PSetNum;
1171 std::vector<unsigned> InitSetPressure;
1175 std::vector<unsigned> PressureSetLimit;
1182 using OrderedInstsTy = std::vector<MachineInstr *>;
1186 static void dumpRegisterPressures(
const std::vector<unsigned> &Pressures) {
1187 if (Pressures.size() == 0) {
1191 for (
unsigned P : Pressures) {
1201 for (
auto PSetIter =
MRI.getPressureSets(
Reg); PSetIter.isValid();
1203 dbgs() << *PSetIter <<
' ';
1208 void increaseRegisterPressure(std::vector<unsigned> &Pressure,
1210 auto PSetIter =
MRI.getPressureSets(
Reg);
1211 unsigned Weight = PSetIter.getWeight();
1212 for (; PSetIter.isValid(); ++PSetIter)
1213 Pressure[*PSetIter] += Weight;
1216 void decreaseRegisterPressure(std::vector<unsigned> &Pressure,
1218 auto PSetIter =
MRI.getPressureSets(
Reg);
1219 unsigned Weight = PSetIter.getWeight();
1220 for (; PSetIter.isValid(); ++PSetIter) {
1221 auto &
P = Pressure[*PSetIter];
1223 "register pressure must be greater than or equal weight");
1230 return Reg.isPhysical() &&
TRI->isFixedRegister(MF,
Reg.asMCReg());
1234 return Reg.isVirtual() &&
MRI.getVRegDef(
Reg)->getParent() == OrigMBB;
1245 void computeLiveIn() {
1247 for (
auto &
MI : *OrigMBB) {
1248 if (
MI.isDebugInstr())
1256 if (isFixedRegister(
Reg))
1258 if (isDefinedInThisLoop(
Reg))
1264 for (
auto LiveIn : Used)
1265 increaseRegisterPressure(InitSetPressure, LiveIn);
1270 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1279 if (isFixedRegister(
Reg))
1284 for (
auto Reg : FixedRegs) {
1286 const int *Sets =
TRI->getRegUnitPressureSets(
Reg);
1287 for (; *Sets != -1; Sets++) {
1288 dbgs() <<
TRI->getRegPressureSetName(*Sets) <<
", ";
1294 for (
auto Reg : FixedRegs) {
1297 auto PSetIter =
MRI.getPressureSets(
Reg);
1298 unsigned Weight = PSetIter.getWeight();
1299 for (; PSetIter.isValid(); ++PSetIter) {
1300 unsigned &Limit = PressureSetLimit[*PSetIter];
1301 assert(Limit >= Weight &&
1302 "register pressure limit must be greater than or equal weight");
1305 <<
" (decreased by " << Weight <<
")\n");
1321 Instr2LastUsesTy computeLastUses(
const OrderedInstsTy &OrderedInsts,
1322 Instr2StageTy &Stages)
const {
1328 const auto UpdateTargetRegs = [
this, &TargetRegs](
Register Reg) {
1329 if (isDefinedInThisLoop(
Reg))
1335 UpdateTargetRegs(
Reg);
1337 for (
auto &
Use : ROMap.
find(
MI)->getSecond().Uses)
1338 UpdateTargetRegs(
Use.RegUnit);
1343 return Stages[
MI] +
MI->isPHI();
1348 for (
auto Use : ROMap.
find(
MI)->getSecond().Uses) {
1352 auto Ite = LastUseMI.
find(
Reg);
1353 if (Ite == LastUseMI.
end()) {
1354 LastUseMI[
Reg] =
MI;
1358 if (InstrScore(Orig) < InstrScore(New))
1359 LastUseMI[
Reg] = New;
1364 Instr2LastUsesTy LastUses;
1365 for (
auto &Entry : LastUseMI)
1366 LastUses[Entry.second].insert(Entry.first);
1382 std::vector<unsigned>
1383 computeMaxSetPressure(
const OrderedInstsTy &OrderedInsts,
1384 Instr2StageTy &Stages,
1385 const unsigned StageCount)
const {
1392 auto CurSetPressure = InitSetPressure;
1393 auto MaxSetPressure = InitSetPressure;
1394 auto LastUses = computeLastUses(OrderedInsts, Stages);
1397 dbgs() <<
"Ordered instructions:\n";
1399 dbgs() <<
"Stage " << Stages[
MI] <<
": ";
1404 const auto InsertReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1406 if (!
Reg.isValid() || isFixedRegister(
Reg))
1414 increaseRegisterPressure(CurSetPressure,
Reg);
1418 const auto EraseReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1420 if (!
Reg.isValid() || isFixedRegister(
Reg))
1424 if (!RegSet.contains(
Reg))
1429 decreaseRegisterPressure(CurSetPressure,
Reg);
1433 for (
unsigned I = 0;
I < StageCount;
I++) {
1435 const auto Stage = Stages[
MI];
1439 const unsigned Iter =
I - Stage;
1441 for (
auto &Def : ROMap.
find(
MI)->getSecond().Defs)
1442 InsertReg(LiveRegSets[Iter],
Def.RegUnit);
1444 for (
auto LastUse : LastUses[
MI]) {
1447 EraseReg(LiveRegSets[Iter - 1], LastUse);
1449 EraseReg(LiveRegSets[Iter], LastUse);
1453 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1454 MaxSetPressure[PSet] =
1455 std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);
1458 dbgs() <<
"CurSetPressure=";
1459 dumpRegisterPressures(CurSetPressure);
1460 dbgs() <<
" iter=" << Iter <<
" stage=" << Stage <<
":";
1466 return MaxSetPressure;
1472 : OrigMBB(OrigMBB), MF(MF),
MRI(MF.getRegInfo()),
1473 TRI(MF.getSubtarget().getRegisterInfo()),
1474 PSetNum(
TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),
1475 PressureSetLimit(PSetNum, 0) {}
1481 if (
MI.isDebugInstr())
1483 ROMap[&
MI].collect(
MI, *
TRI,
MRI,
false,
true);
1487 computePressureSetLimit(RCI);
1493 const unsigned MaxStage)
const {
1495 "the percentage of the margin must be between 0 to 100");
1497 OrderedInstsTy OrderedInsts;
1498 Instr2StageTy Stages;
1500 const auto MaxSetPressure =
1501 computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);
1504 dbgs() <<
"Dump MaxSetPressure:\n";
1505 for (
unsigned I = 0;
I < MaxSetPressure.size();
I++) {
1506 dbgs() <<
format(
"MaxSetPressure[%d]=%d\n",
I, MaxSetPressure[
I]);
1511 for (
unsigned PSet = 0; PSet < PSetNum; PSet++) {
1512 unsigned Limit = PressureSetLimit[PSet];
1515 <<
" Margin=" << Margin <<
"\n");
1516 if (Limit < MaxSetPressure[PSet] + Margin) {
1519 <<
"Rejected the schedule because of too high register pressure\n");
1535unsigned SwingSchedulerDAG::calculateResMII() {
1538 return RM.calculateResMII();
1547unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1548 unsigned RecMII = 0;
1550 for (
NodeSet &Nodes : NodeSets) {
1554 unsigned Delay = Nodes.getLatency();
1555 unsigned Distance = 1;
1558 unsigned CurMII = (Delay + Distance - 1) / Distance;
1559 Nodes.setRecMII(CurMII);
1560 if (CurMII > RecMII)
1571 for (
SUnit &SU : SUnits) {
1574 DepsAdded.
push_back(std::make_pair(&SU, Pred));
1576 for (std::pair<SUnit *, SDep> &
P : DepsAdded) {
1580 SUnit *TargetSU =
D.getSUnit();
1581 unsigned Reg =
D.getReg();
1582 unsigned Lat =
D.getLatency();
1591void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1595 for (
int i = 0, e = SUnits.size(); i != e; ++i) {
1598 for (
auto &SI : SUnits[i].Succs) {
1602 int N =
SI.getSUnit()->NodeNum;
1604 auto Dep = OutputDeps.
find(BackEdge);
1605 if (Dep != OutputDeps.
end()) {
1606 BackEdge = Dep->second;
1607 OutputDeps.
erase(Dep);
1609 OutputDeps[
N] = BackEdge;
1613 if (
SI.getSUnit()->isBoundaryNode() ||
SI.isArtificial() ||
1614 (
SI.getKind() ==
SDep::Anti && !
SI.getSUnit()->getInstr()->isPHI()))
1616 int N =
SI.getSUnit()->NodeNum;
1618 AdjK[i].push_back(
N);
1624 for (
auto &PI : SUnits[i].Preds) {
1625 if (!SUnits[i].getInstr()->mayStore() ||
1628 if (PI.getKind() ==
SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1629 int N = PI.getSUnit()->NodeNum;
1631 AdjK[i].push_back(
N);
1638 for (
auto &OD : OutputDeps)
1639 if (!
Added.test(OD.second)) {
1640 AdjK[OD.first].push_back(OD.second);
1641 Added.set(OD.second);
1647bool SwingSchedulerDAG::Circuits::circuit(
int V,
int S, NodeSetType &NodeSets,
1654 for (
auto W : AdjK[V]) {
1655 if (NumPaths > MaxPaths)
1665 }
else if (!Blocked.test(W)) {
1666 if (circuit(W, S, NodeSets,
1667 Node2Idx->at(W) < Node2Idx->at(V) ?
true : HasBackedge))
1675 for (
auto W : AdjK[V]) {
1686void SwingSchedulerDAG::Circuits::unblock(
int U) {
1689 while (!BU.
empty()) {
1691 assert(SI != BU.
end() &&
"Invalid B set.");
1694 if (Blocked.test(
W->NodeNum))
1695 unblock(
W->NodeNum);
1701void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1706 Circuits Cir(
SUnits, Topo);
1708 Cir.createAdjacencyStructure(
this);
1709 for (
int i = 0, e =
SUnits.size(); i != e; ++i) {
1711 Cir.circuit(i, i, NodeSets);
1747 for (
auto &Dep : SU.
Preds) {
1748 SUnit *TmpSU = Dep.getSUnit();
1760 if (PHISUs.
size() == 0 || SrcSUs.
size() == 0)
1768 for (
auto &Dep : PHISUs[
Index]->Succs) {
1772 SUnit *TmpSU = Dep.getSUnit();
1782 if (UseSUs.
size() == 0)
1787 for (
auto *
I : UseSUs) {
1788 for (
auto *Src : SrcSUs) {
1802 if (
D.isArtificial() ||
D.getSUnit()->isBoundaryNode())
1813void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1814 ScheduleInfo.resize(
SUnits.size());
1817 for (
int I : Topo) {
1825 for (
int I : Topo) {
1827 int zeroLatencyDepth = 0;
1831 if (
P.getLatency() == 0)
1836 asap = std::max(asap, (
int)(
getASAP(
pred) +
P.getLatency() -
1839 maxASAP = std::max(maxASAP, asap);
1840 ScheduleInfo[
I].ASAP = asap;
1841 ScheduleInfo[
I].ZeroLatencyDepth = zeroLatencyDepth;
1847 int zeroLatencyHeight = 0;
1862 ScheduleInfo[
I].ALAP = alap;
1863 ScheduleInfo[
I].ZeroLatencyHeight = zeroLatencyHeight;
1868 I.computeNodeSetInfo(
this);
1871 for (
unsigned i = 0; i <
SUnits.size(); i++) {
1872 dbgs() <<
"\tNode " << i <<
":\n";
1893 if (S && S->count(Pred.
getSUnit()) == 0)
1904 if (S && S->count(Succ.
getSUnit()) == 0)
1910 return !Preds.
empty();
1922 if (S && S->count(Succ.
getSUnit()) == 0)
1932 if (S && S->count(Pred.
getSUnit()) == 0)
1938 return !Succs.
empty();
1953 if (!Visited.
insert(Cur).second)
1954 return Path.contains(Cur);
1955 bool FoundPath =
false;
1956 for (
auto &SI : Cur->
Succs)
1959 computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1960 for (
auto &PI : Cur->
Preds)
1963 computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1978 for (
SUnit *SU : NS) {
1984 if (Reg.isVirtual())
1986 else if (
MRI.isAllocatable(Reg))
1991 for (
SUnit *SU : NS)
1995 if (Reg.isVirtual()) {
1996 if (!
Uses.count(Reg))
1999 }
else if (
MRI.isAllocatable(Reg)) {
2001 if (!
Uses.count(Unit))
2011void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
2012 for (
auto &NS : NodeSets) {
2018 RecRPTracker.init(&
MF, &RegClassInfo, &LIS,
BB,
BB->
end(),
false,
true);
2020 RecRPTracker.closeBottom();
2022 std::vector<SUnit *>
SUnits(NS.begin(), NS.end());
2024 return A->NodeNum >
B->NodeNum;
2027 for (
auto &SU :
SUnits) {
2033 RecRPTracker.setPos(std::next(CurInstI));
2037 RecRPTracker.getMaxUpwardPressureDelta(SU->
getInstr(),
nullptr, RPDelta,
2042 dbgs() <<
"Excess register pressure: SU(" << SU->
NodeNum <<
") "
2045 NS.setExceedPressure(SU);
2048 RecRPTracker.recede();
2055void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2056 unsigned Colocate = 0;
2057 for (
int i = 0, e = NodeSets.size(); i < e; ++i) {
2062 for (
int j = i + 1;
j <
e; ++
j) {
2083void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2088 for (
auto &NS : NodeSets) {
2089 if (NS.getRecMII() > 2)
2091 if (NS.getMaxDepth() > MII)
2100void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2128 NodesAdded.
insert(
I.begin(),
I.end());
2137 addConnectedNodes(
I, NewSet, NodesAdded);
2138 if (!NewSet.
empty())
2139 NodeSets.push_back(NewSet);
2146 addConnectedNodes(
I, NewSet, NodesAdded);
2147 if (!NewSet.
empty())
2148 NodeSets.push_back(NewSet);
2153 if (NodesAdded.
count(&SU) == 0) {
2155 addConnectedNodes(&SU, NewSet, NodesAdded);
2156 if (!NewSet.
empty())
2157 NodeSets.push_back(NewSet);
2163void SwingSchedulerDAG::addConnectedNodes(
SUnit *SU,
NodeSet &NewSet,
2167 for (
auto &SI : SU->
Succs) {
2169 if (!
SI.isArtificial() && !
Successor->isBoundaryNode() &&
2171 addConnectedNodes(
Successor, NewSet, NodesAdded);
2173 for (
auto &PI : SU->
Preds) {
2174 SUnit *Predecessor = PI.getSUnit();
2175 if (!PI.isArtificial() && NodesAdded.
count(Predecessor) == 0)
2176 addConnectedNodes(Predecessor, NewSet, NodesAdded);
2185 for (
SUnit *SU : Set1) {
2186 if (Set2.
count(SU) != 0)
2189 return !Result.empty();
2193void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2202 for (
SUnit *SU : *J)
2214void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2218 J->remove_if([&](
SUnit *SUJ) {
return I->count(SUJ); });
2233void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2237 for (
auto &Nodes : NodeSets) {
2242 R.insert(
N.begin(),
N.end());
2246 R.insert(
N.begin(),
N.end());
2254 }
else if (NodeSets.size() == 1) {
2255 for (
const auto &
N : Nodes)
2256 if (
N->Succs.size() == 0)
2262 SUnit *maxASAP =
nullptr;
2263 for (
SUnit *SU : Nodes) {
2273 while (!
R.empty()) {
2274 if (Order == TopDown) {
2278 while (!
R.empty()) {
2279 SUnit *maxHeight =
nullptr;
2292 NodeOrder.insert(maxHeight);
2294 R.remove(maxHeight);
2295 for (
const auto &
I : maxHeight->
Succs) {
2296 if (Nodes.count(
I.getSUnit()) == 0)
2298 if (NodeOrder.contains(
I.getSUnit()))
2302 R.insert(
I.getSUnit());
2305 for (
const auto &
I : maxHeight->
Preds) {
2308 if (Nodes.count(
I.getSUnit()) == 0)
2310 if (NodeOrder.contains(
I.getSUnit()))
2312 R.insert(
I.getSUnit());
2318 if (
pred_L(NodeOrder,
N, &Nodes))
2319 R.insert(
N.begin(),
N.end());
2324 while (!
R.empty()) {
2325 SUnit *maxDepth =
nullptr;
2337 NodeOrder.insert(maxDepth);
2340 if (Nodes.isExceedSU(maxDepth)) {
2343 R.insert(Nodes.getNode(0));
2346 for (
const auto &
I : maxDepth->
Preds) {
2347 if (Nodes.count(
I.getSUnit()) == 0)
2349 if (NodeOrder.contains(
I.getSUnit()))
2351 R.insert(
I.getSUnit());
2354 for (
const auto &
I : maxDepth->
Succs) {
2357 if (Nodes.count(
I.getSUnit()) == 0)
2359 if (NodeOrder.contains(
I.getSUnit()))
2361 R.insert(
I.getSUnit());
2367 if (
succ_L(NodeOrder,
N, &Nodes))
2368 R.insert(
N.begin(),
N.end());
2375 dbgs() <<
"Node order: ";
2376 for (
SUnit *
I : NodeOrder)
2377 dbgs() <<
" " <<
I->NodeNum <<
" ";
2384bool SwingSchedulerDAG::schedulePipeline(
SMSchedule &Schedule) {
2386 if (NodeOrder.empty()){
2391 bool scheduleFound =
false;
2392 std::unique_ptr<HighRegisterPressureDetector> HRPDetector;
2396 HRPDetector->init(RegClassInfo);
2399 for (
unsigned II = MII; II <= MAX_II && !scheduleFound; ++II) {
2411 int EarlyStart = INT_MIN;
2412 int LateStart = INT_MAX;
2415 int SchedEnd = INT_MAX;
2416 int SchedStart = INT_MIN;
2417 Schedule.
computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
2426 dbgs() <<
format(
"\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
2427 LateStart, SchedEnd, SchedStart);
2430 if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
2431 SchedStart > LateStart)
2432 scheduleFound =
false;
2433 else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
2434 SchedEnd = std::min(SchedEnd, EarlyStart + (
int)II - 1);
2435 scheduleFound = Schedule.
insert(SU, EarlyStart, SchedEnd, II);
2436 }
else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
2437 SchedStart = std::max(SchedStart, LateStart - (
int)II + 1);
2438 scheduleFound = Schedule.
insert(SU, LateStart, SchedStart, II);
2439 }
else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2441 std::min(SchedEnd, std::min(LateStart, EarlyStart + (
int)II - 1));
2446 scheduleFound = Schedule.
insert(SU, SchedEnd, EarlyStart, II);
2448 scheduleFound = Schedule.
insert(SU, EarlyStart, SchedEnd, II);
2451 scheduleFound = Schedule.
insert(SU, FirstCycle +
getASAP(SU),
2452 FirstCycle +
getASAP(SU) + II - 1, II);
2459 scheduleFound =
false;
2463 dbgs() <<
"\tCan't schedule\n";
2465 }
while (++NI != NE && scheduleFound);
2487 if (scheduleFound) {
2493 if (scheduleFound) {
2495 Pass.ORE->emit([&]() {
2498 <<
"Schedule found with Initiation Interval: "
2500 <<
", MaxStageCount: "
2511bool SwingSchedulerDAG::computeDelta(
MachineInstr &
MI,
unsigned &Delta) {
2515 bool OffsetIsScalable;
2520 if (OffsetIsScalable)
2523 if (!BaseOp->
isReg())
2531 if (BaseDef && BaseDef->
isPHI()) {
2555 unsigned &OffsetPos,
2561 unsigned BasePosLd, OffsetPosLd;
2564 Register BaseReg =
MI->getOperand(BasePosLd).getReg();
2569 if (!Phi || !
Phi->isPHI())
2578 if (!PrevDef || PrevDef ==
MI)
2584 unsigned BasePos1 = 0, OffsetPos1 = 0;
2590 int64_t LoadOffset =
MI->getOperand(OffsetPosLd).getImm();
2600 BasePos = BasePosLd;
2601 OffsetPos = OffsetPosLd;
2613 InstrChanges.find(SU);
2614 if (It != InstrChanges.end()) {
2615 std::pair<unsigned, int64_t> RegAndOffset = It->second;
2616 unsigned BasePos, OffsetPos;
2619 Register BaseReg =
MI->getOperand(BasePos).getReg();
2625 if (BaseStageNum < DefStageNum) {
2627 int OffsetDiff = DefStageNum - BaseStageNum;
2628 if (DefCycleNum < BaseCycleNum) {
2634 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2649 while (Def->isPHI()) {
2650 if (!Visited.
insert(Def).second)
2652 for (
unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
2653 if (Def->getOperand(i + 1).getMBB() ==
BB) {
2680 assert(SI !=
nullptr && DI !=
nullptr &&
"Expecting SUnit with an MI.");
2694 unsigned DeltaS, DeltaD;
2695 if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
2699 int64_t OffsetS, OffsetD;
2700 bool OffsetSIsScalable, OffsetDIsScalable;
2708 assert(!OffsetSIsScalable && !OffsetDIsScalable &&
2709 "Expected offsets to be byte offsets");
2713 if (!DefS || !DefD || !DefS->
isPHI() || !DefD->
isPHI())
2716 unsigned InitValS = 0;
2717 unsigned LoopValS = 0;
2718 unsigned InitValD = 0;
2719 unsigned LoopValD = 0;
2743 if (DeltaS != DeltaD || DeltaS < AccessSizeS.
getValue() ||
2747 return (OffsetS + (int64_t)AccessSizeS.
getValue() <
2748 OffsetD + (int64_t)AccessSizeD.
getValue());
2751void SwingSchedulerDAG::postProcessDAG() {
2752 for (
auto &M : Mutations)
2762 bool forward =
true;
2764 dbgs() <<
"Trying to insert node between " << StartCycle <<
" and "
2765 << EndCycle <<
" II: " << II <<
"\n";
2767 if (StartCycle > EndCycle)
2771 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
2772 for (
int curCycle = StartCycle; curCycle != termCycle;
2773 forward ? ++curCycle : --curCycle) {
2778 dbgs() <<
"\tinsert at cycle " << curCycle <<
" ";
2783 ProcItinResources.reserveResources(*SU, curCycle);
2784 ScheduledInstrs[curCycle].push_back(SU);
2785 InstrToCycle.insert(std::make_pair(SU, curCycle));
2786 if (curCycle > LastCycle)
2787 LastCycle = curCycle;
2788 if (curCycle < FirstCycle)
2789 FirstCycle = curCycle;
2793 dbgs() <<
"\tfailed to insert at cycle " << curCycle <<
" ";
2805 int EarlyCycle = INT_MAX;
2806 while (!Worklist.
empty()) {
2809 if (Visited.
count(PrevSU))
2811 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
2812 if (it == InstrToCycle.end())
2814 EarlyCycle = std::min(EarlyCycle, it->second);
2815 for (
const auto &PI : PrevSU->
Preds)
2828 int LateCycle = INT_MIN;
2829 while (!Worklist.
empty()) {
2834 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
2835 if (it == InstrToCycle.end())
2837 LateCycle = std::max(LateCycle, it->second);
2838 for (
const auto &SI : SuccSU->
Succs)
2850 for (
auto &
P : SU->
Preds)
2851 if (DAG->
isBackedge(SU,
P) &&
P.getSUnit()->getInstr()->isPHI())
2852 for (
auto &S :
P.getSUnit()->Succs)
2854 return P.getSUnit();
2861 int *MinEnd,
int *MaxStart,
int II,
2866 for (
int cycle =
getFirstCycle(); cycle <= LastCycle; ++cycle) {
2872 for (
unsigned i = 0, e = (
unsigned)SU->
Preds.size(); i != e; ++i) {
2878 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2881 *MinEnd = std::min(*MinEnd,
End);
2886 *MinLateStart = std::min(*MinLateStart, LateStart);
2894 *MinLateStart = std::min(*MinLateStart, cycle);
2896 for (
unsigned i = 0, e = (
unsigned)SU->
Succs.size(); i != e; ++i) {
2897 if (SU->
Succs[i].getSUnit() ==
I) {
2902 *MinLateStart = std::min(*MinLateStart, LateStart);
2905 *MaxStart = std::max(*MaxStart, Start);
2910 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2922 std::deque<SUnit *> &Insts)
const {
2924 bool OrderBeforeUse =
false;
2925 bool OrderAfterDef =
false;
2926 bool OrderBeforeDef =
false;
2927 unsigned MoveDef = 0;
2928 unsigned MoveUse = 0;
2932 for (std::deque<SUnit *>::iterator
I = Insts.begin(), E = Insts.end();
I != E;
2935 if (!MO.isReg() || !MO.getReg().isVirtual())
2939 unsigned BasePos, OffsetPos;
2940 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
2941 if (
MI->getOperand(BasePos).getReg() == Reg)
2945 std::tie(Reads,
Writes) =
2946 (*I)->getInstr()->readsWritesVirtualRegister(Reg);
2948 OrderBeforeUse =
true;
2953 OrderAfterDef =
true;
2957 OrderBeforeUse =
true;
2961 OrderAfterDef =
true;
2965 OrderBeforeUse =
true;
2969 OrderAfterDef =
true;
2974 OrderBeforeUse =
true;
2980 OrderBeforeDef =
true;
2987 for (
auto &S : SU->
Succs) {
2991 OrderBeforeUse =
true;
2999 OrderBeforeUse =
true;
3000 if ((MoveUse == 0) || (Pos < MoveUse))
3004 for (
auto &
P : SU->
Preds) {
3005 if (
P.getSUnit() != *
I)
3008 OrderAfterDef =
true;
3015 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3016 OrderBeforeUse =
false;
3021 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3025 if (OrderBeforeUse && OrderAfterDef) {
3026 SUnit *UseSU = Insts.at(MoveUse);
3027 SUnit *DefSU = Insts.at(MoveDef);
3028 if (MoveUse > MoveDef) {
3029 Insts.erase(Insts.begin() + MoveUse);
3030 Insts.erase(Insts.begin() + MoveDef);
3032 Insts.erase(Insts.begin() + MoveDef);
3033 Insts.erase(Insts.begin() + MoveUse);
3043 Insts.push_front(SU);
3045 Insts.push_back(SU);
3053 assert(Phi.isPHI() &&
"Expecting a Phi.");
3058 unsigned InitVal = 0;
3059 unsigned LoopVal = 0;
3060 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3068 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3086 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3092 if (DMO.getReg() == LoopReg)
3104 for (
auto &SU : SSD->
SUnits)
3108 while (!Worklist.
empty()) {
3110 if (DoNotPipeline.
count(SU))
3113 DoNotPipeline.
insert(SU);
3114 for (
auto &Dep : SU->
Preds)
3117 for (
auto &Dep : SU->
Succs)
3121 return DoNotPipeline;
3130 int NewLastCycle = INT_MIN;
3135 NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
3141 for (
auto &Dep : SU.
Preds)
3142 NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle);
3144 int OldCycle = InstrToCycle[&SU];
3145 if (OldCycle != NewCycle) {
3146 InstrToCycle[&SU] = NewCycle;
3151 <<
") is not pipelined; moving from cycle " << OldCycle
3152 <<
" to " << NewCycle <<
" Instr:" << *SU.
getInstr());
3154 NewLastCycle = std::max(NewLastCycle, NewCycle);
3156 LastCycle = NewLastCycle;
3173 int CycleDef = InstrToCycle[&SU];
3174 assert(StageDef != -1 &&
"Instruction should have been scheduled.");
3175 for (
auto &SI : SU.
Succs)
3176 if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode())
3180 if (InstrToCycle[SI.getSUnit()] <= CycleDef)
3197void SwingSchedulerDAG::checkValidNodeOrder(
const NodeSetType &Circuits)
const {
3200 typedef std::pair<SUnit *, unsigned> UnitIndex;
3201 std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(
nullptr, 0));
3203 for (
unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
3204 Indices.push_back(std::make_pair(NodeOrder[i], i));
3206 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3207 return std::get<0>(i1) < std::get<0>(i2);
3220 for (
unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
3221 SUnit *SU = NodeOrder[i];
3224 bool PredBefore =
false;
3225 bool SuccBefore =
false;
3234 unsigned PredIndex = std::get<1>(
3250 unsigned SuccIndex = std::get<1>(
3263 Circuits, [SU](
const NodeSet &Circuit) {
return Circuit.
count(SU); });
3268 NumNodeOrderIssues++;
3272 <<
" are scheduled before node " << SU->
NodeNum
3279 dbgs() <<
"Invalid node order found!\n";
3290 unsigned OverlapReg = 0;
3291 unsigned NewBaseReg = 0;
3292 for (
SUnit *SU : Instrs) {
3294 for (
unsigned i = 0, e =
MI->getNumOperands(); i < e; ++i) {
3302 InstrChanges.find(SU);
3303 if (It != InstrChanges.end()) {
3304 unsigned BasePos, OffsetPos;
3310 MI->getOperand(OffsetPos).getImm() - It->second.second;
3323 unsigned TiedUseIdx = 0;
3324 if (
MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3326 OverlapReg =
MI->getOperand(TiedUseIdx).getReg();
3328 NewBaseReg =
MI->getOperand(i).getReg();
3337 const std::deque<SUnit *> &Instrs)
const {
3338 std::deque<SUnit *> NewOrderPhi;
3339 for (
SUnit *SU : Instrs) {
3341 NewOrderPhi.push_back(SU);
3343 std::deque<SUnit *> NewOrderI;
3344 for (
SUnit *SU : Instrs) {
3360 std::deque<SUnit *> &cycleInstrs =
3361 ScheduledInstrs[cycle + (stage * InitiationInterval)];
3363 ScheduledInstrs[cycle].push_front(SU);
3369 for (
int cycle =
getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3370 ScheduledInstrs.erase(cycle);
3380 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[
Cycle];
3389 os <<
"Num nodes " <<
size() <<
" rec " << RecMII <<
" mov " << MaxMOV
3390 <<
" depth " << MaxDepth <<
" col " << Colocate <<
"\n";
3391 for (
const auto &
I : Nodes)
3392 os <<
" SU(" <<
I->NodeNum <<
") " << *(
I->getInstr());
3396#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3403 for (
SUnit *CI : cycleInstrs->second) {
3405 os <<
"(" << CI->
NodeNum <<
") ";
3416void ResourceManager::dumpMRT()
const {
3420 std::stringstream SS;
3422 SS << std::setw(4) <<
"Slot";
3424 SS << std::setw(3) <<
I;
3425 SS << std::setw(7) <<
"#Mops"
3427 for (
int Slot = 0; Slot < InitiationInterval; ++Slot) {
3428 SS << std::setw(4) << Slot;
3430 SS << std::setw(3) << MRT[Slot][
I];
3431 SS << std::setw(7) << NumScheduledMops[Slot] <<
"\n";
3440 unsigned ProcResourceID = 0;
3445 "Too many kinds of resources, unsupported");
3451 if (
Desc.SubUnitsIdxBegin)
3453 Masks[
I] = 1ULL << ProcResourceID;
3459 if (!
Desc.SubUnitsIdxBegin)
3461 Masks[
I] = 1ULL << ProcResourceID;
3462 for (
unsigned U = 0; U <
Desc.NumUnits; ++U)
3463 Masks[
I] |= Masks[
Desc.SubUnitsIdxBegin[U]];
3468 dbgs() <<
"ProcResourceDesc:\n";
3471 dbgs() <<
format(
" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3472 ProcResource->
Name,
I, Masks[
I],
3475 dbgs() <<
" -----------------\n";
3483 dbgs() <<
"canReserveResources:\n";
3486 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3492 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3498 reserveResources(SCDesc,
Cycle);
3499 bool Result = !isOverbooked();
3500 unreserveResources(SCDesc,
Cycle);
3509 dbgs() <<
"reserveResources:\n";
3512 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3518 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3524 reserveResources(SCDesc,
Cycle);
3529 dbgs() <<
"reserveResources: done!\n\n";
3540 ++MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
3543 ++NumScheduledMops[positiveModulo(
C, InitiationInterval)];
3552 --MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
3555 --NumScheduledMops[positiveModulo(
C, InitiationInterval)];
3558bool ResourceManager::isOverbooked()
const {
3560 for (
int Slot = 0;
Slot < InitiationInterval; ++
Slot) {
3563 if (MRT[Slot][
I] >
Desc->NumUnits)
3566 if (NumScheduledMops[Slot] > IssueWidth)
3572int ResourceManager::calculateResMIIDFA()
const {
3577 FuncUnitSorter FUS = FuncUnitSorter(*ST);
3579 FUS.calcCriticalResources(*SU.
getInstr());
3590 while (!FuncUnitOrder.empty()) {
3592 FuncUnitOrder.pop();
3599 unsigned ReservedCycles = 0;
3600 auto *RI = Resources.
begin();
3601 auto *RE = Resources.
end();
3603 dbgs() <<
"Trying to reserve resource for " << NumCycles
3604 <<
" cycles for \n";
3607 for (
unsigned C = 0;
C < NumCycles; ++
C)
3609 if ((*RI)->canReserveResources(*
MI)) {
3610 (*RI)->reserveResources(*
MI);
3617 <<
", NumCycles:" << NumCycles <<
"\n");
3619 for (
unsigned C = ReservedCycles;
C < NumCycles; ++
C) {
3621 <<
"NewResource created to reserve resources"
3624 assert(NewResource->canReserveResources(*
MI) &&
"Reserve error.");
3625 NewResource->reserveResources(*
MI);
3626 Resources.
push_back(std::unique_ptr<DFAPacketizer>(NewResource));
3630 int Resmii = Resources.
size();
3637 return calculateResMIIDFA();
3656 <<
" WriteProcRes: ";
3667 dbgs() <<
Desc->Name <<
": " << PRE.ReleaseAtCycle <<
", ";
3670 ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;
3675 int Result = (NumMops + IssueWidth - 1) / IssueWidth;
3678 dbgs() <<
"#Mops: " << NumMops <<
", "
3679 <<
"IssueWidth: " << IssueWidth <<
", "
3680 <<
"Cycles: " << Result <<
"\n";
3685 std::stringstream SS;
3686 SS << std::setw(2) <<
"ID" << std::setw(16) <<
"Name" << std::setw(10)
3687 <<
"Units" << std::setw(10) <<
"Consumed" << std::setw(10) <<
"Cycles"
3694 int Cycles = (ResourceCount[
I] +
Desc->NumUnits - 1) /
Desc->NumUnits;
3697 std::stringstream SS;
3698 SS << std::setw(2) <<
I << std::setw(16) <<
Desc->Name << std::setw(10)
3699 <<
Desc->NumUnits << std::setw(10) << ResourceCount[
I]
3700 << std::setw(10) << Cycles <<
"\n";
3704 if (Cycles > Result)
3711 InitiationInterval = II;
3712 DFAResources.clear();
3713 DFAResources.resize(II);
3714 for (
auto &
I : DFAResources)
3715 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
3718 NumScheduledMops.
clear();
3719 NumScheduledMops.
resize(II);
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file declares an analysis pass that computes CycleInfo for LLVM IR, specialized from GenericCycl...
This file defines the DenseMap class.
SmallVector< uint32_t, 0 > Writes
Rewrite Partial Register Uses
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))
A command line argument to force pipeliner to use specified initial interval.
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)
Create an instruction stream that represents a single iteration and stage of each instruction.
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
Modulo Software Pipelining
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
static bool isDependenceBarrier(MachineInstr &MI)
Return true if the instruction causes a chain between memory references before and after it.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
static unsigned getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
unsigned const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static unsigned getSize(unsigned Kind)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
bool erase(const KeyT &Val)
Implements a dense probed hash-table based set.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
A possibly irreducible generalization of a Loop.
Itinerary data supplied by a subtarget to be used by a target.
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
bool isEmpty() const
Returns true if there are no itineraries.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
TypeSize getValue() const
BlockT * getHeader() const
Represents a single loop in the control flow graph.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Generic base class for all target subtargets.
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
Tracking metadata reference owned by Metadata.
StringRef getString() const
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
instr_iterator instr_end()
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool isRegSequence() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
const MachineOperand & getOperand(unsigned i) const
iterator_range< filtered_mop_iterator > all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
A description of a memory reference used in the backend.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
const Value * getValue() const
Return the base address of the memory access.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
The main class in the implementation of the target independent software pipeliner pass.
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
defusechain_iterator - This class provides iterator support for machine operands in the function that...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
use_instr_iterator use_instr_begin(Register RegNo) const
static use_instr_iterator use_instr_end()
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
This class implements a map that also provides access to all stored values in a deterministic order.
iterator find(const KeyT &Key)
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
void print(raw_ostream &os) const
void setRecMII(unsigned mii)
unsigned count(SUnit *SU) const
void setColocate(unsigned c)
int compareRecMII(NodeSet &RHS)
LLVM_DUMP_METHOD void dump() const
Pass interface - Implemented by all 'passes'.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A reimplementation of ModuloScheduleExpander.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Track the current register pressure at some position in the instruction stream, and remember the high...
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
Wrapper class representing virtual and physical registers.
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
int calculateResMII() const
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
void init(int II)
Initialize resources with the initiation interval II.
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Kind
These are the different kinds of scheduling dependencies.
@ Output
A register output-dependence (aka WAW).
@ Order
Any other ordering dependency.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
SmallSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
void dump() const
Utility function used for debugging to print the schedule.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
bool isValidSchedule(SwingSchedulerDAG *SSD)
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
unsigned short Latency
Node latency.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
bool hasPhysRegDefs
Has physreg defs that are being used.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
A ScheduleDAG for scheduling lists of MachineInstr.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock * BB
The block in which to insert instructions.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void dumpNode(const SUnit &SU) const override
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void dump() const override
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an edge to be removed from the specified node N from ...
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
MachineFunction & MF
Machine function.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
typename vector_type::const_iterator iterator
void clear()
Completely clear the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Implements a dense probed hash-table based set with some number of buckets stored inline.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
unsigned getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true)
Return true for an order or output dependence that is loop carried potentially.
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Object returned by analyzeLoopForPipelining.
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.
virtual bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS)
Return true if the proposed schedule should used.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form.
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const
Sometimes, it is possible for the target to tell, even without aliasing information,...
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static Type * getVoidTy(LLVMContext &C)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< PhiNode * > Phi
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void stable_sort(R &&Range)
int popcount(T Value) noexcept
Count the number of set bits in a value.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
cl::opt< bool > SwpEnableCopyToPhi
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Description of the encoding of one expression Op.
These values represent a non-pipelined step in the execution of an instruction.
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
static constexpr LaneBitmask getNone()
Define a kind of processor resource that will be modeled by the scheduler.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
unsigned getNumProcResourceKinds() const
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineInstr * LoopInductionVar
SmallVector< MachineOperand, 4 > BrCond
MachineInstr * LoopCompare
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopPipelinerInfo
Store the effects of a change in pressure on things that MI scheduler cares about.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.