Go to the documentation of this file.
67 #include "llvm/Config/llvm-config.h"
96 #define DEBUG_TYPE "pipeliner"
98 STATISTIC(NumTrytoPipeline,
"Number of loops that we attempt to pipeline");
99 STATISTIC(NumPipelined,
"Number of loops software pipelined");
100 STATISTIC(NumNodeOrderIssues,
"Number of node order issues found");
101 STATISTIC(NumFailBranch,
"Pipeliner abort due to unknown branch");
102 STATISTIC(NumFailLoop,
"Pipeliner abort due to unsupported loop");
103 STATISTIC(NumFailPreheader,
"Pipeliner abort due to missing preheader");
104 STATISTIC(NumFailLargeMaxMII,
"Pipeliner abort due to MaxMII too large");
105 STATISTIC(NumFailZeroMII,
"Pipeliner abort due to zero MII");
106 STATISTIC(NumFailNoSchedule,
"Pipeliner abort due to no schedule found");
107 STATISTIC(NumFailZeroStage,
"Pipeliner abort due to zero stage");
108 STATISTIC(NumFailLargeMaxStage,
"Pipeliner abort due to too many stages");
113 cl::desc(
"Enable Software Pipelining"));
122 cl::desc(
"Size limit for the MII."),
128 cl::desc(
"Maximum stages allowed in the generated scheduled."),
135 cl::desc(
"Prune dependences between unrelated Phi nodes."),
142 cl::desc(
"Prune loop carried order dependences."),
160 cl::desc(
"Instead of emitting the pipelined code, annotate instructions "
161 "with the generated schedule for feeding into the "
162 "-modulo-schedule-test pass"));
167 "Use the experimental peeling code generator for software pipelining"));
175 cl::desc(
"Enable CopyToPhi DAG Mutation"));
179 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
187 "Modulo Software Pipelining",
false,
false)
197 if (skipFunction(mf.getFunction()))
203 if (mf.getFunction().getAttributes().hasAttribute(
208 if (!mf.getSubtarget().enableMachinePipeliner())
213 if (mf.getSubtarget().useDFAforSMS() &&
214 (!mf.getSubtarget().getInstrItineraryData() ||
215 mf.getSubtarget().getInstrItineraryData()->isEmpty()))
219 MLI = &getAnalysis<MachineLoopInfo>();
220 MDT = &getAnalysis<MachineDominatorTree>();
221 ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
222 TII = MF->getSubtarget().getInstrInfo();
223 RegClassInfo.runOnMachineFunction(*MF);
235 bool MachinePipeliner::scheduleLoop(
MachineLoop &L) {
236 bool Changed =
false;
237 for (
auto &InnerLoop : L)
238 Changed |= scheduleLoop(*InnerLoop);
250 setPragmaPipelineOptions(L);
251 if (!canPipelineLoop(L)) {
255 L.getStartLoc(), L.getHeader())
256 <<
"Failed to pipeline loop";
264 Changed = swingModuloScheduler(L);
269 void MachinePipeliner::setPragmaPipelineOptions(
MachineLoop &L) {
288 if (LoopID ==
nullptr)
305 if (
S->getString() ==
"llvm.loop.pipeline.initiationinterval") {
307 "Pipeline initiation interval hint metadata should have two operands.");
309 mdconst::extract<ConstantInt>(MD->
getOperand(1))->getZExtValue();
311 }
else if (
S->getString() ==
"llvm.loop.pipeline.disable") {
320 bool MachinePipeliner::canPipelineLoop(
MachineLoop &L) {
325 <<
"Not a single basic block: "
335 <<
"Disabled by Pragma.";
346 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeBranch, can NOT pipeline Loop\n");
351 <<
"The branch can't be understood";
359 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeLoop, can NOT pipeline Loop\n");
364 <<
"The loop structure is not supported";
370 LLVM_DEBUG(
dbgs() <<
"Preheader not found, can NOT pipeline Loop\n");
375 <<
"No loop preheader found";
387 SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
394 for (
unsigned i = 1,
n = PI.getNumOperands();
i !=
n;
i += 2) {
419 bool MachinePipeliner::swingModuloScheduler(
MachineLoop &L) {
420 assert(L.
getBlocks().size() == 1 &&
"SMS works on single blocks only.");
443 return SMS.hasNewSchedule();
456 void SwingSchedulerDAG::setMII(
unsigned ResMII,
unsigned RecMII) {
457 if (II_setByPragma > 0)
458 MII = II_setByPragma;
463 void SwingSchedulerDAG::setMAX_II() {
464 if (II_setByPragma > 0)
465 MAX_II = II_setByPragma;
475 addLoopCarriedDependences(AA);
476 updatePhiDependences();
483 findCircuits(NodeSets);
487 unsigned ResMII = calculateResMII();
488 unsigned RecMII = calculateRecMII(NodeSets);
496 setMII(ResMII, RecMII);
500 <<
" (rec=" << RecMII <<
", res=" << ResMII <<
")\n");
506 Pass.ORE->emit([&]() {
509 <<
"Invalid Minimal Initiation Interval: 0";
517 <<
", we don't pipleline large loops\n");
518 NumFailLargeMaxMII++;
519 Pass.ORE->emit([&]() {
522 <<
"Minimal Initiation Interval too large: "
523 <<
ore::NV(
"MII", (
int)MII) <<
" > "
525 <<
"Refer to -pipeliner-max-mii.";
530 computeNodeFunctions(NodeSets);
532 registerPressureFilter(NodeSets);
534 colocateNodeSets(NodeSets);
536 checkNodeSets(NodeSets);
539 for (
auto &
I : NodeSets) {
540 dbgs() <<
" Rec NodeSet ";
547 groupRemainingNodes(NodeSets);
549 removeDuplicateNodes(NodeSets);
552 for (
auto &
I : NodeSets) {
553 dbgs() <<
" NodeSet ";
558 computeNodeOrder(NodeSets);
561 checkValidNodeOrder(Circuits);
564 Scheduled = schedulePipeline(Schedule);
569 Pass.ORE->emit([&]() {
572 <<
"Unable to find schedule";
579 if (numStages == 0) {
582 Pass.ORE->emit([&]() {
585 <<
"No need to pipeline - no overlapped iterations in schedule.";
592 <<
" : too many stages, abort\n");
593 NumFailLargeMaxStage++;
594 Pass.ORE->emit([&]() {
597 <<
"Too many stages in schedule: "
598 <<
ore::NV(
"numStages", (
int)numStages) <<
" > "
600 <<
". Refer to -pipeliner-max-stages.";
605 Pass.ORE->emit([&]() {
608 <<
"Pipelined succesfully!";
613 std::vector<MachineInstr *> OrderedInsts;
617 OrderedInsts.push_back(SU->getInstr());
618 Cycles[SU->getInstr()] = Cycle;
623 for (
auto &KV : NewMIs) {
624 Cycles[KV.first] = Cycles[KV.second];
625 Stages[KV.first] = Stages[KV.second];
626 NewInstrChanges[KV.first] = InstrChanges[
getSUnit(KV.first)];
633 "Cannot serialize a schedule with InstrChanges!");
652 for (
auto &KV : NewMIs)
663 unsigned &InitVal,
unsigned &LoopVal) {
674 assert(InitVal != 0 && LoopVal != 0 &&
"Unexpected Phi structure.");
689 Worklist.push_back(SUa);
690 while (!Worklist.empty()) {
695 if (Visited.
count(SuccSU))
699 Worklist.push_back(SuccSU);
710 return MI.isCall() ||
MI.mayRaiseFPException() ||
711 MI.hasUnmodeledSideEffects() ||
712 (
MI.hasOrderedMemoryRef() &&
713 (!
MI.mayLoad() || !
MI.isDereferenceableInvariantLoad(AA)));
721 if (!
MI->hasOneMemOperand())
727 for (
const Value *V : Objs) {
740 void SwingSchedulerDAG::addLoopCarriedDependences(
AliasAnalysis *AA) {
747 PendingLoads.
clear();
748 else if (
MI.mayLoad()) {
753 for (
auto V : Objs) {
757 }
else if (
MI.mayStore()) {
762 for (
auto V : Objs) {
764 PendingLoads.
find(V);
765 if (
I == PendingLoads.
end())
767 for (
auto Load :
I->second) {
775 int64_t Offset1, Offset2;
776 bool Offset1IsScalable, Offset2IsScalable;
778 Offset1IsScalable,
TRI) &&
780 Offset2IsScalable,
TRI)) {
782 Offset1IsScalable == Offset2IsScalable &&
783 (
int)Offset1 < (
int)Offset2) {
785 "What happened to the chain edge?");
836 void SwingSchedulerDAG::updatePhiDependences() {
844 unsigned HasPhiUse = 0;
845 unsigned HasPhiDef = 0;
849 MOE =
MI->operands_end();
871 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
876 }
else if (MOI->isUse()) {
879 if (
DefMI ==
nullptr)
886 ST.adjustSchedDependency(SU, 0, &
I,
MI->getOperandNo(MOI), Dep);
892 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
901 for (
auto &PI :
I.Preds) {
904 if (
I.getInstr()->isPHI()) {
910 RemoveDeps.push_back(PI);
913 for (
int i = 0,
e = RemoveDeps.size();
i !=
e; ++
i)
914 I.removePred(RemoveDeps[
i]);
920 void SwingSchedulerDAG::changeDependences() {
925 unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
926 int64_t NewOffset = 0;
927 if (!canUseLastOffsetValue(
I.getInstr(), BasePos, OffsetPos, NewBase,
932 Register OrigBase =
I.getInstr()->getOperand(BasePos).getReg();
952 for (
const SDep &
P :
I.Preds)
953 if (
P.getSUnit() == DefSU)
955 for (
int i = 0,
e = Deps.size();
i !=
e;
i++) {
957 I.removePred(Deps[
i]);
961 for (
auto &
P : LastSU->
Preds)
964 for (
int i = 0,
e = Deps.size();
i !=
e;
i++) {
977 InstrChanges[&
I] = std::make_pair(NewBase, NewOffset);
985 struct FuncUnitSorter {
991 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
999 unsigned min = UINT_MAX;
1000 if (InstrItins && !InstrItins->
isEmpty()) {
1003 InstrItins->
endStage(SchedClass))) {
1006 if (numAlternatives <
min) {
1007 min = numAlternatives;
1028 unsigned NumUnits = ProcResource->
NumUnits;
1029 if (NumUnits <
min) {
1031 F = PRE.ProcResourceIdx;
1036 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1045 unsigned SchedClass =
MI.getDesc().getSchedClass();
1046 if (InstrItins && !InstrItins->
isEmpty()) {
1049 InstrItins->
endStage(SchedClass))) {
1052 Resources[FuncUnits]++;
1069 Resources[PRE.ProcResourceIdx]++;
1073 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1079 unsigned MFUs1 = minFuncUnits(IS1, F1);
1080 unsigned MFUs2 = minFuncUnits(IS2, F2);
1083 return MFUs1 > MFUs2;
1095 unsigned SwingSchedulerDAG::calculateResMII() {
1108 FUS.calcCriticalResources(*
I);
1115 FuncUnitOrder.push(&*
I);
1117 while (!FuncUnitOrder.empty()) {
1119 FuncUnitOrder.pop();
1125 unsigned ReservedCycles = 0;
1129 dbgs() <<
"Trying to reserve resource for " << NumCycles
1130 <<
" cycles for \n";
1133 for (
unsigned C = 0;
C < NumCycles; ++
C)
1135 if ((*RI)->canReserveResources(*
MI)) {
1136 (*RI)->reserveResources(*
MI);
1143 <<
", NumCycles:" << NumCycles <<
"\n");
1145 for (
unsigned C = ReservedCycles;
C < NumCycles; ++
C) {
1147 <<
"NewResource created to reserve resources"
1152 Resources.push_back(NewResource);
1155 int Resmii = Resources.size();
1172 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1173 unsigned RecMII = 0;
1175 for (
NodeSet &Nodes : NodeSets) {
1179 unsigned Delay = Nodes.getLatency();
1180 unsigned Distance = 1;
1183 unsigned CurMII = (Delay + Distance - 1) / Distance;
1184 Nodes.setRecMII(CurMII);
1185 if (CurMII > RecMII)
1196 for (
unsigned i = 0,
e = SUnits.size();
i !=
e; ++
i) {
1202 DepsAdded.push_back(std::make_pair(SU, *
IP));
1205 for (std::pair<SUnit *, SDep> &
P : DepsAdded) {
1209 SUnit *TargetSU =
D.getSUnit();
1210 unsigned Reg =
D.getReg();
1211 unsigned Lat =
D.getLatency();
1220 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1224 for (
int i = 0,
e = SUnits.size();
i !=
e; ++
i) {
1227 for (
auto &
SI : SUnits[
i].Succs) {
1231 int N =
SI.getSUnit()->NodeNum;
1233 auto Dep = OutputDeps.
find(BackEdge);
1234 if (Dep != OutputDeps.
end()) {
1235 BackEdge = Dep->second;
1236 OutputDeps.
erase(Dep);
1238 OutputDeps[
N] = BackEdge;
1242 if (
SI.getSUnit()->isBoundaryNode() ||
SI.isArtificial() ||
1243 (
SI.getKind() ==
SDep::Anti && !
SI.getSUnit()->getInstr()->isPHI()))
1245 int N =
SI.getSUnit()->NodeNum;
1246 if (!Added.test(
N)) {
1247 AdjK[
i].push_back(
N);
1253 for (
auto &PI : SUnits[
i].Preds) {
1254 if (!SUnits[
i].getInstr()->mayStore() ||
1257 if (PI.getKind() ==
SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1258 int N = PI.getSUnit()->NodeNum;
1259 if (!Added.test(
N)) {
1260 AdjK[
i].push_back(
N);
1267 for (
auto &OD : OutputDeps)
1268 if (!Added.test(OD.second)) {
1269 AdjK[OD.first].push_back(OD.second);
1270 Added.set(OD.second);
1276 bool SwingSchedulerDAG::Circuits::circuit(
int V,
int S, NodeSetType &NodeSets,
1278 SUnit *SV = &SUnits[V];
1283 for (
auto W : AdjK[V]) {
1284 if (NumPaths > MaxPaths)
1294 }
else if (!Blocked.test(
W)) {
1295 if (circuit(
W,
S, NodeSets,
1296 Node2Idx->at(
W) < Node2Idx->at(V) ?
true : HasBackedge))
1304 for (
auto W : AdjK[V]) {
1316 void SwingSchedulerDAG::Circuits::unblock(
int U) {
1319 while (!BU.
empty()) {
1324 if (Blocked.test(
W->NodeNum))
1325 unblock(
W->NodeNum);
1331 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1336 Circuits Cir(
SUnits, Topo);
1338 Cir.createAdjacencyStructure(
this);
1341 Cir.circuit(
i,
i, NodeSets);
1377 for (
auto &Dep : SU.
Preds) {
1378 SUnit *TmpSU = Dep.getSUnit();
1383 PHISUs.push_back(TmpSU);
1387 SrcSUs.push_back(TmpSU);
1390 if (PHISUs.size() == 0 || SrcSUs.size() == 0)
1398 for (
auto &Dep : PHISUs[
Index]->Succs) {
1402 SUnit *TmpSU = Dep.getSUnit();
1405 PHISUs.push_back(TmpSU);
1408 UseSUs.push_back(TmpSU);
1412 if (UseSUs.size() == 0)
1417 for (
auto I : UseSUs) {
1418 for (
auto Src : SrcSUs) {
1419 if (!
SDAG->Topo.IsReachable(
I, Src) && Src !=
I) {
1421 SDAG->Topo.AddPred(Src,
I);
1432 if (
D.isArtificial())
1443 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1444 ScheduleInfo.resize(
SUnits.size());
1447 for (
int I : Topo) {
1455 for (
int I : Topo) {
1457 int zeroLatencyDepth = 0;
1460 EP = SU->
Preds.end();
1463 if (
IP->getLatency() == 0)
1472 ScheduleInfo[
I].ASAP = asap;
1473 ScheduleInfo[
I].ZeroLatencyDepth = zeroLatencyDepth;
1481 int zeroLatencyHeight = 0;
1484 ES = SU->
Succs.end();
1486 SUnit *succ = IS->getSUnit();
1487 if (IS->getLatency() == 0)
1496 ScheduleInfo[*
I].ALAP = alap;
1497 ScheduleInfo[*
I].ZeroLatencyHeight = zeroLatencyHeight;
1502 I.computeNodeSetInfo(
this);
1505 for (
unsigned i = 0;
i <
SUnits.size();
i++) {
1506 dbgs() <<
"\tNode " <<
i <<
":\n";
1527 for (
const SDep &Pred : (*I)->Preds) {
1536 for (
const SDep &Succ : (*I)->Succs) {
1545 return !Preds.
empty();
1557 for (
SDep &Succ : (*I)->Succs) {
1565 for (
SDep &Pred : (*I)->Preds) {
1574 return !Succs.
empty();
1589 if (!Visited.
insert(Cur).second)
1591 bool FoundPath =
false;
1593 FoundPath |=
computePath(
SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1594 for (
auto &PI : Cur->
Preds)
1597 computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1612 for (
SUnit *SU : NS) {
1617 if (MO.isReg() && MO.isUse()) {
1624 Uses.insert(*Units);
1627 for (
SUnit *SU : NS)
1629 if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1638 if (!
Uses.count(*Units))
1648 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1649 for (
auto &NS : NodeSets) {
1655 RecRPTracker.init(&
MF, &RegClassInfo, &LIS,
BB,
BB->
end(),
false,
true);
1657 RecRPTracker.closeBottom();
1659 std::vector<SUnit *>
SUnits(NS.begin(), NS.end());
1661 return A->NodeNum >
B->NodeNum;
1664 for (
auto &SU :
SUnits) {
1670 RecRPTracker.setPos(std::next(CurInstI));
1674 RecRPTracker.getMaxUpwardPressureDelta(SU->
getInstr(),
nullptr, RPDelta,
1679 dbgs() <<
"Excess register pressure: SU(" << SU->
NodeNum <<
") "
1682 NS.setExceedPressure(SU);
1685 RecRPTracker.recede();
1692 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
1693 unsigned Colocate = 0;
1694 for (
int i = 0,
e = NodeSets.size();
i <
e; ++
i) {
1699 for (
int j =
i + 1;
j <
e; ++
j) {
1720 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
1725 for (
auto &NS : NodeSets) {
1726 if (NS.getRecMII() > 2)
1728 if (NS.getMaxDepth() > MII)
1737 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
1765 NodesAdded.
insert(
I.begin(),
I.end());
1774 addConnectedNodes(
I, NewSet, NodesAdded);
1775 if (!NewSet.
empty())
1776 NodeSets.push_back(NewSet);
1783 addConnectedNodes(
I, NewSet, NodesAdded);
1784 if (!NewSet.
empty())
1785 NodeSets.push_back(NewSet);
1790 if (NodesAdded.
count(&SU) == 0) {
1792 addConnectedNodes(&SU, NewSet, NodesAdded);
1793 if (!NewSet.
empty())
1794 NodeSets.push_back(NewSet);
1800 void SwingSchedulerDAG::addConnectedNodes(
SUnit *SU,
NodeSet &NewSet,
1807 addConnectedNodes(
Successor, NewSet, NodesAdded);
1809 for (
auto &PI : SU->
Preds) {
1810 SUnit *Predecessor = PI.getSUnit();
1811 if (!PI.isArtificial() && NodesAdded.
count(Predecessor) == 0)
1812 addConnectedNodes(Predecessor, NewSet, NodesAdded);
1821 for (
unsigned i = 0,
e = Set1.
size();
i !=
e; ++
i) {
1823 if (Set2.
count(SU) != 0)
1826 return !Result.empty();
1830 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
1839 for (
SUnit *SU : *J)
1851 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
1855 J->remove_if([&](
SUnit *SUJ) {
return I->count(SUJ); });
1870 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
1874 for (
auto &Nodes : NodeSets) {
1879 R.insert(
N.begin(),
N.end());
1883 R.insert(
N.begin(),
N.end());
1891 }
else if (NodeSets.size() == 1) {
1892 for (
auto &
N : Nodes)
1893 if (
N->Succs.size() == 0)
1899 SUnit *maxASAP =
nullptr;
1900 for (
SUnit *SU : Nodes) {
1910 while (!
R.empty()) {
1911 if (Order == TopDown) {
1915 while (!
R.empty()) {
1916 SUnit *maxHeight =
nullptr;
1929 NodeOrder.insert(maxHeight);
1931 R.remove(maxHeight);
1932 for (
const auto &
I : maxHeight->
Succs) {
1933 if (Nodes.count(
I.getSUnit()) == 0)
1935 if (NodeOrder.contains(
I.getSUnit()))
1939 R.insert(
I.getSUnit());
1942 for (
const auto &
I : maxHeight->
Preds) {
1945 if (Nodes.count(
I.getSUnit()) == 0)
1947 if (NodeOrder.contains(
I.getSUnit()))
1949 R.insert(
I.getSUnit());
1955 if (
pred_L(NodeOrder,
N, &Nodes))
1956 R.insert(
N.begin(),
N.end());
1961 while (!
R.empty()) {
1962 SUnit *maxDepth =
nullptr;
1974 NodeOrder.insert(maxDepth);
1977 if (Nodes.isExceedSU(maxDepth)) {
1980 R.insert(Nodes.getNode(0));
1983 for (
const auto &
I : maxDepth->
Preds) {
1984 if (Nodes.count(
I.getSUnit()) == 0)
1986 if (NodeOrder.contains(
I.getSUnit()))
1988 R.insert(
I.getSUnit());
1991 for (
const auto &
I : maxDepth->
Succs) {
1994 if (Nodes.count(
I.getSUnit()) == 0)
1996 if (NodeOrder.contains(
I.getSUnit()))
1998 R.insert(
I.getSUnit());
2004 if (
succ_L(NodeOrder,
N, &Nodes))
2005 R.insert(
N.begin(),
N.end());
2012 dbgs() <<
"Node order: ";
2013 for (
SUnit *
I : NodeOrder)
2014 dbgs() <<
" " <<
I->NodeNum <<
" ";
2021 bool SwingSchedulerDAG::schedulePipeline(
SMSchedule &Schedule) {
2023 if (NodeOrder.empty()){
2028 bool scheduleFound =
false;
2030 for (
unsigned II = MII; II <= MAX_II && !scheduleFound; ++II) {
2042 int EarlyStart = INT_MIN;
2043 int LateStart = INT_MAX;
2046 int SchedEnd = INT_MAX;
2047 int SchedStart = INT_MIN;
2048 Schedule.
computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
2057 dbgs() <<
format(
"\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
2058 LateStart, SchedEnd, SchedStart);
2061 if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
2062 SchedStart > LateStart)
2063 scheduleFound =
false;
2064 else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
2065 SchedEnd =
std::min(SchedEnd, EarlyStart + (
int)II - 1);
2066 scheduleFound = Schedule.
insert(SU, EarlyStart, SchedEnd, II);
2067 }
else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
2068 SchedStart =
std::max(SchedStart, LateStart - (
int)II + 1);
2069 scheduleFound = Schedule.
insert(SU, LateStart, SchedStart, II);
2070 }
else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2077 scheduleFound = Schedule.
insert(SU, SchedEnd, EarlyStart, II);
2079 scheduleFound = Schedule.
insert(SU, EarlyStart, SchedEnd, II);
2082 scheduleFound = Schedule.
insert(SU, FirstCycle +
getASAP(SU),
2083 FirstCycle +
getASAP(SU) + II - 1, II);
2090 scheduleFound =
false;
2094 dbgs() <<
"\tCan't schedule\n";
2096 }
while (++NI !=
NE && scheduleFound);
2107 if (scheduleFound) {
2109 Pass.ORE->emit([&]() {
2112 <<
"Schedule found with Initiation Interval: "
2114 <<
", MaxStageCount: "
2125 bool SwingSchedulerDAG::computeDelta(
MachineInstr &
MI,
unsigned &Delta) {
2129 bool OffsetIsScalable;
2134 if (OffsetIsScalable)
2137 if (!BaseOp->
isReg())
2145 if (BaseDef && BaseDef->
isPHI()) {
2169 unsigned &OffsetPos,
2175 unsigned BasePosLd, OffsetPosLd;
2178 Register BaseReg =
MI->getOperand(BasePosLd).getReg();
2183 if (!Phi || !Phi->
isPHI())
2192 if (!PrevDef || PrevDef ==
MI)
2198 unsigned BasePos1 = 0, OffsetPos1 = 0;
2204 int64_t LoadOffset =
MI->getOperand(OffsetPosLd).getImm();
2214 BasePos = BasePosLd;
2215 OffsetPos = OffsetPosLd;
2227 InstrChanges.find(SU);
2228 if (It != InstrChanges.end()) {
2229 std::pair<unsigned, int64_t> RegAndOffset = It->second;
2230 unsigned BasePos, OffsetPos;
2233 Register BaseReg =
MI->getOperand(BasePos).getReg();
2239 if (BaseStageNum < DefStageNum) {
2241 int OffsetDiff = DefStageNum - BaseStageNum;
2242 if (DefCycleNum < BaseCycleNum) {
2248 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2263 while (
Def->isPHI()) {
2266 for (
unsigned i = 1,
e =
Def->getNumOperands();
i <
e;
i += 2)
2267 if (
Def->getOperand(
i + 1).getMBB() ==
BB) {
2294 assert(
SI !=
nullptr && DI !=
nullptr &&
"Expecting SUnit with an MI.");
2306 unsigned DeltaS, DeltaD;
2307 if (!computeDelta(*
SI, DeltaS) || !computeDelta(*DI, DeltaD))
2311 int64_t OffsetS, OffsetD;
2312 bool OffsetSIsScalable, OffsetDIsScalable;
2320 assert(!OffsetSIsScalable && !OffsetDIsScalable &&
2321 "Expected offsets to be byte offsets");
2329 if (!
Def || !
Def->isPHI())
2331 unsigned InitVal = 0;
2332 unsigned LoopVal = 0;
2339 uint64_t AccessSizeS = (*
SI->memoperands_begin())->getSize();
2348 if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD)
2351 return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
2354 void SwingSchedulerDAG::postprocessDAG() {
2355 for (
auto &M : Mutations)
2365 bool forward =
true;
2367 dbgs() <<
"Trying to insert node between " << StartCycle <<
" and "
2368 << EndCycle <<
" II: " << II <<
"\n";
2370 if (StartCycle > EndCycle)
2374 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
2375 for (
int curCycle = StartCycle; curCycle != termCycle;
2376 forward ? ++curCycle : --curCycle) {
2381 for (
int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
2382 checkCycle <= LastCycle; checkCycle += II) {
2383 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
2385 for (
SUnit *CI : cycleInstrs) {
2386 if (ST.getInstrInfo()->isZeroCost(CI->getInstr()->getOpcode()))
2389 "These instructions have already been scheduled.");
2396 dbgs() <<
"\tinsert at cycle " << curCycle <<
" ";
2400 ScheduledInstrs[curCycle].push_back(SU);
2401 InstrToCycle.insert(std::make_pair(SU, curCycle));
2402 if (curCycle > LastCycle)
2403 LastCycle = curCycle;
2404 if (curCycle < FirstCycle)
2405 FirstCycle = curCycle;
2409 dbgs() <<
"\tfailed to insert at cycle " << curCycle <<
" ";
2420 Worklist.push_back(Dep);
2421 int EarlyCycle = INT_MAX;
2422 while (!Worklist.empty()) {
2425 if (Visited.
count(PrevSU))
2427 std::map<SUnit *, int>::const_iterator
it = InstrToCycle.find(PrevSU);
2428 if (
it == InstrToCycle.end())
2430 EarlyCycle =
std::min(EarlyCycle,
it->second);
2431 for (
const auto &PI : PrevSU->
Preds)
2433 Worklist.push_back(PI);
2443 Worklist.push_back(Dep);
2444 int LateCycle = INT_MIN;
2445 while (!Worklist.empty()) {
2448 if (Visited.
count(SuccSU))
2450 std::map<SUnit *, int>::const_iterator
it = InstrToCycle.find(SuccSU);
2451 if (
it == InstrToCycle.end())
2454 for (
const auto &
SI : SuccSU->
Succs)
2456 Worklist.push_back(
SI);
2466 for (
auto &
P : SU->
Preds)
2467 if (DAG->
isBackedge(SU,
P) &&
P.getSUnit()->getInstr()->isPHI())
2468 for (
auto &
S :
P.getSUnit()->Succs)
2469 if (
S.getKind() ==
SDep::Data &&
S.getSUnit()->getInstr()->isPHI())
2470 return P.getSUnit();
2477 int *MinEnd,
int *MaxStart,
int II,
2482 for (
int cycle =
getFirstCycle(); cycle <= LastCycle; ++cycle) {
2488 for (
unsigned i = 0,
e = (
unsigned)SU->
Preds.size();
i !=
e; ++
i) {
2494 *MaxEarlyStart =
std::max(*MaxEarlyStart, EarlyStart);
2502 *MinLateStart =
std::min(*MinLateStart, LateStart);
2510 *MinLateStart =
std::min(*MinLateStart, cycle);
2512 for (
unsigned i = 0,
e = (
unsigned)SU->
Succs.size();
i !=
e; ++
i) {
2513 if (SU->
Succs[
i].getSUnit() ==
I) {
2518 *MinLateStart =
std::min(*MinLateStart, LateStart);
2521 *MaxStart =
std::max(*MaxStart, Start);
2526 *MaxEarlyStart =
std::max(*MaxEarlyStart, EarlyStart);
2538 std::deque<SUnit *> &Insts) {
2540 bool OrderBeforeUse =
false;
2541 bool OrderAfterDef =
false;
2542 bool OrderBeforeDef =
false;
2543 unsigned MoveDef = 0;
2544 unsigned MoveUse = 0;
2548 for (std::deque<SUnit *>::iterator
I = Insts.begin(),
E = Insts.end();
I !=
E;
2550 for (
unsigned i = 0,
e =
MI->getNumOperands();
i <
e; ++
i) {
2556 unsigned BasePos, OffsetPos;
2557 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
2558 if (
MI->getOperand(BasePos).getReg() ==
Reg)
2562 std::tie(Reads, Writes) =
2563 (*I)->getInstr()->readsWritesVirtualRegister(
Reg);
2565 OrderBeforeUse =
true;
2570 OrderAfterDef =
true;
2574 OrderBeforeUse =
true;
2578 OrderAfterDef =
true;
2582 OrderBeforeUse =
true;
2586 OrderAfterDef =
true;
2591 OrderBeforeUse =
true;
2597 OrderBeforeDef =
true;
2604 for (
auto &
S : SU->
Succs) {
2605 if (
S.getSUnit() != *
I)
2608 OrderBeforeUse =
true;
2616 OrderBeforeUse =
true;
2617 if ((MoveUse == 0) || (Pos < MoveUse))
2621 for (
auto &
P : SU->
Preds) {
2622 if (
P.getSUnit() != *
I)
2625 OrderAfterDef =
true;
2632 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
2633 OrderBeforeUse =
false;
2638 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
2642 if (OrderBeforeUse && OrderAfterDef) {
2643 SUnit *UseSU = Insts.at(MoveUse);
2644 SUnit *DefSU = Insts.at(MoveDef);
2645 if (MoveUse > MoveDef) {
2646 Insts.erase(Insts.begin() + MoveUse);
2647 Insts.erase(Insts.begin() + MoveDef);
2649 Insts.erase(Insts.begin() + MoveDef);
2650 Insts.erase(Insts.begin() + MoveUse);
2660 Insts.push_front(SU);
2662 Insts.push_back(SU);
2674 unsigned InitVal = 0;
2675 unsigned LoopVal = 0;
2684 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
2706 for (
unsigned i = 0,
e =
Def->getNumOperands();
i !=
e; ++
i) {
2710 if (DMO.
getReg() == LoopReg)
2725 assert(StageDef != -1 &&
"Instruction should have been scheduled.");
2727 if (
SI.isAssignedRegDep())
2745 void SwingSchedulerDAG::checkValidNodeOrder(
const NodeSetType &Circuits)
const {
2748 typedef std::pair<SUnit *, unsigned> UnitIndex;
2749 std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(
nullptr, 0));
2751 for (
unsigned i = 0,
s = NodeOrder.size();
i <
s; ++
i)
2752 Indices.push_back(std::make_pair(NodeOrder[
i],
i));
2754 auto CompareKey = [](UnitIndex
i1, UnitIndex i2) {
2755 return std::get<0>(
i1) < std::get<0>(i2);
2768 for (
unsigned i = 0,
s = NodeOrder.size();
i <
s; ++
i) {
2769 SUnit *SU = NodeOrder[
i];
2772 bool PredBefore =
false;
2773 bool SuccBefore =
false;
2782 unsigned PredIndex = std::get<1>(
2798 unsigned SuccIndex = std::get<1>(
2811 Circuits, [SU](
const NodeSet &Circuit) {
return Circuit.
count(SU); });
2816 NumNodeOrderIssues++;
2820 <<
" are scheduled before node " << SU->
NodeNum
2827 dbgs() <<
"Invalid node order found!\n";
2838 unsigned OverlapReg = 0;
2839 unsigned NewBaseReg = 0;
2840 for (
SUnit *SU : Instrs) {
2842 for (
unsigned i = 0,
e =
MI->getNumOperands();
i <
e; ++
i) {
2850 InstrChanges.find(SU);
2851 if (It != InstrChanges.end()) {
2852 unsigned BasePos, OffsetPos;
2858 MI->getOperand(OffsetPos).getImm() - It->second.second;
2871 unsigned TiedUseIdx = 0;
2872 if (
MI->isRegTiedToUseOperand(
i, &TiedUseIdx)) {
2874 OverlapReg =
MI->getOperand(TiedUseIdx).getReg();
2876 NewBaseReg =
MI->getOperand(
i).getReg();
2891 std::deque<SUnit *> &cycleInstrs =
2892 ScheduledInstrs[cycle + (stage * InitiationInterval)];
2893 for (std::deque<SUnit *>::reverse_iterator
I = cycleInstrs.rbegin(),
2894 E = cycleInstrs.rend();
2896 ScheduledInstrs[cycle].push_front(*
I);
2902 for (
int cycle =
getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
2903 ScheduledInstrs.erase(cycle);
2907 for (
int i = 0,
e = SSD->
SUnits.size();
i !=
e; ++
i) {
2915 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
2916 std::deque<SUnit *> newOrderPhi;
2917 for (
SUnit *SU : cycleInstrs) {
2919 newOrderPhi.push_back(SU);
2921 std::deque<SUnit *> newOrderI;
2922 for (
SUnit *SU : cycleInstrs) {
2927 cycleInstrs.swap(newOrderPhi);
2936 os <<
"Num nodes " <<
size() <<
" rec " << RecMII <<
" mov " << MaxMOV
2937 <<
" depth " << MaxDepth <<
" col " << Colocate <<
"\n";
2938 for (
const auto &
I : Nodes)
2939 os <<
" SU(" <<
I->NodeNum <<
") " << *(
I->getInstr());
2943 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2950 for (
SUnit *CI : cycleInstrs->second) {
2952 os <<
"(" << CI->
NodeNum <<
") ";
2967 unsigned ProcResourceID = 0;
2972 "Too many kinds of resources, unsupported");
2980 Masks[
I] = 1ULL << ProcResourceID;
2988 Masks[
I] = 1ULL << ProcResourceID;
2989 for (
unsigned U = 0; U < Desc.
NumUnits; ++U)
2995 dbgs() <<
"ProcResourceDesc:\n";
2998 dbgs() <<
format(
" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
2999 ProcResource->
Name,
I, Masks[
I],
3002 dbgs() <<
" -----------------\n";
3011 dbgs() <<
"canReserveResources:\n";
3014 return DFAResources->canReserveResources(MID);
3020 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3028 for (;
I !=
E; ++
I) {
3033 unsigned NumUnits = ProcResource->
NumUnits;
3036 dbgs() <<
format(
" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
3037 ProcResource->
Name,
I->ProcResourceIdx,
3038 ProcResourceCount[
I->ProcResourceIdx], NumUnits,
3041 if (ProcResourceCount[
I->ProcResourceIdx] >= NumUnits)
3051 dbgs() <<
"reserveResources:\n";
3054 return DFAResources->reserveResources(MID);
3060 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
3070 ++ProcResourceCount[PRE.ProcResourceIdx];
3075 dbgs() <<
format(
" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
3076 ProcResource->
Name, PRE.ProcResourceIdx,
3077 ProcResourceCount[PRE.ProcResourceIdx],
3078 ProcResource->
NumUnits, PRE.Cycles);
3084 dbgs() <<
"reserveResources: done!\n\n";
3098 return DFAResources->clearResources();
3099 std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0);
QP Compare Ordered outs ins xscmpudp No SDAG
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
unsigned getPosition() const
MachineRegisterInfo & MRI
Virtual/real register map.
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
MachineInstrBuilder & UseMI
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
void finishBlock() override
Clean up after the software pipeliner runs.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void print(raw_ostream &os) const
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void cleanup()
Performs final cleanup after expansion.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
LLVM_DUMP_METHOD void dump() const
Represents a schedule for a single-block loop.
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
Represents a single loop in the control flow graph.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
bool isPseudo() const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
size_type size() const
Determine the number of elements in the SetVector.
Reference model for inliner Oz decision policy Note this model is also referenced by test Transforms Inline ML tests if replacing it
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
int compareRecMII(NodeSet &RHS)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
@ Anti
A register anti-dependence (aka WAR).
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
typename vector_type::const_iterator iterator
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
bool erase(const KeyT &Val)
void setImm(int64_t immVal)
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
Kind
These are the different kinds of scheduling dependencies.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Track the current register pressure at some position in the instruction stream, and remember the high...
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
A description of a memory reference used in the backend.
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true)
Return true for an order or output dependence that is loop carried potentially.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
SmallVector< SDep, 4 > Succs
All sunit successors.
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
Decimal Convert From to National Zoned Signed int_ppc_altivec_bcdcfno i1
This class implements a map that also provides access to all stored values in a deterministic order.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
DiagnosticInfoOptimizationBase::Argument NV
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
LLVM_NODISCARD T pop_back_val()
A reimplementation of ModuloScheduleExpander.
SmallVector< MachineOperand, 4 > BrCond
void expand()
Performs the actual expansion.
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
unsigned const TargetRegisterInfo * TRI
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
SmallPtrSet< MachineInstr *, 2 > Uses
unsigned getNumProcResourceKinds() const
LLVM Basic Block Representation.
unsigned getNumOperands() const
Return number of MDNode operands.
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
void apply(Opt *O, const Mod &M, const Mods &... Ms)
iterator begin()
Get an iterator to the beginning of the SetVector.
void dump() const
Utility function used for debugging to print the schedule.
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
const Value * getValue() const
Return the base address of the memory access.
unsigned short Latency
Node latency.
(vector float) vec_cmpeq(*A, *B) C
const MachineOperand & getOperand(unsigned i) const
unsigned NodeNum
Entry # of node in the node vector.
void setSubReg(unsigned subReg)
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
static constexpr LaneBitmask getNone()
Represent the analysis usage information of a pass.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
int getFirstCycle() const
Return the first cycle in the completed schedule.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
const HexagonInstrInfo * TII
Describe properties that are true of each instruction in the target description file.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MachineOperand class - Representation of each machine instruction operand.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
bool isValidSchedule(SwingSchedulerDAG *SSD)
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
STATISTIC(NumFunctions, "Total number of functions")
This class implements an extremely fast bulk output stream that can only output to a stream.
void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts)
Order the instructions within a cycle so that the definitions occur before the uses.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
@ Output
A register output-dependence (aka WAW).
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA)
Return true if the instruction causes a chain between memory references before and after it.
@ Data
Regular data dependence (aka true-dependence).
std::vector< int >::const_reverse_iterator const_reverse_iterator
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool empty() const
Determine if the SetVector is empty or not.
void setColocate(unsigned c)
bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi)
Return true if the scheduled Phi has a loop carried operand.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
The main class in the implementation of the target independent software pipeliner pass.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
@ Order
Any other ordering dependency.
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
uint64_t FuncUnits
Bitmask representing a set of functional units.
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
bool canReserveResources(const MCInstrDesc *MID) const
Check if the resources occupied by a MCInstrDesc are available in the current state.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
const MDOperand & getOperand(unsigned I) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
unsigned countPopulation(T Value)
Count the number of set bits in a value.
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Define a kind of processor resource that will be modeled by the scheduler.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
iterator find(const KeyT &Key)
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Representation of each machine instruction.
void annotate()
Performs the annotation.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
int getInitiationInterval() const
Return the initiation interval for this schedule.
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
use_instr_iterator use_instr_begin(Register RegNo) const
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
multiplies can be turned into SHL s
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
MachineOptimizationRemarkEmitter * ORE
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
initializer< Ty > init(const Ty &Val)
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Modulo Software Pipelining
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
iterator find(const_arg_type_t< KeyT > Val)
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
MachineInstr * LoopCompare
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
MachineInstr * LoopInductionVar
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO)
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
void setRecMII(unsigned mii)
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Register getReg() const
getReg - Returns the register number.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
SmallVectorImpl< SDep >::const_iterator const_succ_iterator
instr_iterator instr_end()
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
bool insert(const value_type &X)
Insert a new element into the SetVector.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
const TargetRegisterInfo * TRI
Target processor register info.
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
MachineBasicBlock * getMBB() const
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
RegisterClassInfo RegClassInfo
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
cl::opt< bool > SwpEnableCopyToPhi
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
MachineFunction & MF
Machine function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
@ Barrier
An unknown scheduling barrier.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const unsigned * SubUnitsIdxBegin
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
void dump() const override
const MachineBasicBlock * getParent() const
unsigned count(SUnit *SU) const
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
TargetSubtargetInfo - Generic base class for all target subtargets.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
void reserveResources(const MCInstrDesc *MID)
Reserve the resources occupied by a MCInstrDesc and change the current state to reflect that change.
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
unsigned getSubReg() const
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
LLVM_NODISCARD bool empty() const
These values represent a non-pipelined step in the execution of an instruction.
void clear()
Completely clear the SetVector.
Store the effects of a change in pressure on things that MI scheduler cares about.
bool isRegSequence() const
std::vector< SUnit > SUnits
The scheduling units.
void stable_sort(R &&Range)
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Function & getFunction()
Return the LLVM function that this machine code represents.
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
const TargetInstrInfo * TII
void setLatency(unsigned Lat)
Sets the latency for this edge.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
const TargetInstrInfo * TII
Target instruction information.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
BlockT * getHeader() const
void sort(IteratorTy Start, IteratorTy End)
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
void clearResources()
Reset the state.
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const
Sometimes, it is possible for the target to tell, even without aliasing information,...
Machine model for scheduling, bundling, and heuristics.
bool hasPhysRegDefs
Has physreg defs that are being used.
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
void print(raw_ostream &os) const
Print the schedule information to the given output.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
MachineBasicBlock * BB
The block in which to insert instructions.
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
static Type * getVoidTy(LLVMContext &C)
iterator end()
Get an iterator to the end of the SetVector.
Pass interface - Implemented by all 'passes'.
void dumpNode(const SUnit &SU) const override
std::set< NodeId > NodeSet
virtual void finishBlock()
Cleans up after scheduling in the given block.
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
typename SuperClass::iterator iterator
LLVM_NODISCARD bool empty() const
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstrBuilder MachineInstrBuilder & DefMI
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
unsigned getNumOperands() const
Retuns the total number of operands.
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
Align max(MaybeAlign Lhs, Align Rhs)
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
SUnit * getNode(unsigned i) const
A SetVector that performs no allocations if smaller than a certain size.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Scheduling unit. This is a node in the scheduling DAG.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
SmallVectorImpl< SDep >::iterator pred_iterator
AnalysisUsage & addRequired()
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII"))
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
A ScheduleDAG for scheduling lists of MachineInstr.
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
A vector that has set insertion semantics.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
bool isEmpty() const
Returns true if there are no itineraries.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
Generic base class for all target subtargets.
iterator_range< mop_iterator > operands()
LLVM Value Representation.
static use_instr_iterator use_instr_end()
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
Itinerary data supplied by a subtarget to be used by a target.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
This class represents the scheduled code.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
unsigned getInstrBaseReg(SUnit *SU)
Return the new base register that was stored away for the changed instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.