72#include "llvm/Config/llvm-config.h"
101#define DEBUG_TYPE "pipeliner"
103STATISTIC(NumTrytoPipeline,
"Number of loops that we attempt to pipeline");
104STATISTIC(NumPipelined,
"Number of loops software pipelined");
105STATISTIC(NumNodeOrderIssues,
"Number of node order issues found");
106STATISTIC(NumFailBranch,
"Pipeliner abort due to unknown branch");
107STATISTIC(NumFailLoop,
"Pipeliner abort due to unsupported loop");
108STATISTIC(NumFailPreheader,
"Pipeliner abort due to missing preheader");
109STATISTIC(NumFailLargeMaxMII,
"Pipeliner abort due to MaxMII too large");
110STATISTIC(NumFailZeroMII,
"Pipeliner abort due to zero MII");
111STATISTIC(NumFailNoSchedule,
"Pipeliner abort due to no schedule found");
112STATISTIC(NumFailZeroStage,
"Pipeliner abort due to zero stage");
113STATISTIC(NumFailLargeMaxStage,
"Pipeliner abort due to too many stages");
114STATISTIC(NumFailTooManyStores,
"Pipeliner abort due to too many stores");
118 cl::desc(
"Enable Software Pipelining"));
127 cl::desc(
"Size limit for the MII."),
133 cl::desc(
"Force pipeliner to use specified II."),
139 cl::desc(
"Maximum stages allowed in the generated scheduled."),
146 cl::desc(
"Prune dependences between unrelated Phi nodes."),
153 cl::desc(
"Prune loop carried order dependences."),
171 cl::desc(
"Instead of emitting the pipelined code, annotate instructions "
172 "with the generated schedule for feeding into the "
173 "-modulo-schedule-test pass"));
178 "Use the experimental peeling code generator for software pipelining"));
186 cl::desc(
"Limit register pressure of scheduled loop"));
191 cl::desc(
"Margin representing the unused percentage of "
192 "the register pressure limit"));
196 cl::desc(
"Use the MVE code generator for software pipelining"));
201 "pipeliner-max-num-stores",
209 cl::desc(
"Enable CopyToPhi DAG Mutation"));
214 "pipeliner-force-issue-width",
221 cl::desc(
"Set how to use window scheduling algorithm."),
223 "Turn off window algorithm."),
225 "Use window algorithm after SMS algorithm fails."),
227 "Use window algorithm instead of SMS algorithm.")));
229unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
237 "Modulo Software Pipelining",
false,
false)
279 enum class InstrTag {
288 TaggedSUnit(
SUnit *SU, InstrTag Tag)
291 InstrTag
getTag()
const {
return InstrTag(getInt()); }
295 struct LoadStoreChunk {
299 void append(
SUnit *SU);
304 std::vector<SUnit> &SUnits;
310 std::vector<BitVector> LoopCarried;
323 std::vector<TaggedSUnit> TaggedSUnits;
337 return LoopCarried[Idx];
342 std::optional<InstrTag> getInstrTag(
SUnit *SU)
const;
344 void addLoopCarriedDepenenciesForChunks(
const LoadStoreChunk &From,
345 const LoadStoreChunk &To);
352 void computeDependenciesAux();
383 TII =
MF->getSubtarget().getInstrInfo();
386 for (
const auto &L : *
MLI)
396bool MachinePipeliner::scheduleLoop(
MachineLoop &L) {
398 for (
const auto &InnerLoop : L)
399 Changed |= scheduleLoop(*InnerLoop);
411 setPragmaPipelineOptions(L);
412 if (!canPipelineLoop(L)) {
416 L.getStartLoc(), L.getHeader())
417 <<
"Failed to pipeline loop";
420 LI.LoopPipelinerInfo.reset();
425 if (useSwingModuloScheduler())
426 Changed = swingModuloScheduler(L);
428 if (useWindowScheduler(
Changed))
429 Changed = runWindowScheduler(L);
431 LI.LoopPipelinerInfo.reset();
435void MachinePipeliner::setPragmaPipelineOptions(
MachineLoop &L) {
440 MachineBasicBlock *LBLK =
L.getTopBlock();
453 MDNode *LoopID = TI->
getMetadata(LLVMContext::MD_loop);
454 if (LoopID ==
nullptr)
471 if (S->
getString() ==
"llvm.loop.pipeline.initiationinterval") {
473 "Pipeline initiation interval hint metadata should have two operands.");
477 }
else if (S->
getString() ==
"llvm.loop.pipeline.disable") {
490 auto It = PhiDeps.find(
Reg);
491 if (It == PhiDeps.end())
502 for (
unsigned Dep : It->second) {
517 unsigned DefReg =
MI.getOperand(0).getReg();
521 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2)
522 Ins->second.push_back(
MI.getOperand(
I).getReg());
529 for (
const auto &KV : PhiDeps) {
530 unsigned Reg = KV.first;
541bool MachinePipeliner::canPipelineLoop(
MachineLoop &L) {
542 if (
L.getNumBlocks() != 1) {
544 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
545 L.getStartLoc(),
L.getHeader())
546 <<
"Not a single basic block: "
547 <<
ore::NV(
"NumBlocks",
L.getNumBlocks());
559 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
560 L.getStartLoc(),
L.getHeader())
561 <<
"Disabled by Pragma.";
571 if (
TII->analyzeBranch(*
L.getHeader(),
LI.TBB,
LI.FBB,
LI.BrCond)) {
572 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeBranch, can NOT pipeline Loop\n");
575 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
576 L.getStartLoc(),
L.getHeader())
577 <<
"The branch can't be understood";
582 LI.LoopInductionVar =
nullptr;
583 LI.LoopCompare =
nullptr;
584 LI.LoopPipelinerInfo =
TII->analyzeLoopForPipelining(
L.getTopBlock());
585 if (!
LI.LoopPipelinerInfo) {
586 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeLoop, can NOT pipeline Loop\n");
589 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
590 L.getStartLoc(),
L.getHeader())
591 <<
"The loop structure is not supported";
596 if (!
L.getLoopPreheader()) {
597 LLVM_DEBUG(
dbgs() <<
"Preheader not found, can NOT pipeline Loop\n");
600 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
601 L.getStartLoc(),
L.getHeader())
602 <<
"No loop preheader found";
607 unsigned NumStores = 0;
608 for (MachineInstr &
MI : *
L.getHeader())
613 NumFailTooManyStores++;
615 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
616 L.getStartLoc(),
L.getHeader())
617 <<
"Too many store instructions in the loop: "
618 <<
ore::NV(
"NumStores", NumStores) <<
" > "
625 preprocessPhiNodes(*
L.getHeader());
630 MachineRegisterInfo &
MRI =
MF->getRegInfo();
634 for (MachineInstr &PI :
B.phis()) {
635 MachineOperand &DefOp = PI.getOperand(0);
637 auto *RC =
MRI.getRegClass(DefOp.
getReg());
639 for (
unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
640 MachineOperand &RegOp = PI.getOperand(i);
647 MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
664bool MachinePipeliner::swingModuloScheduler(
MachineLoop &L) {
665 assert(
L.getBlocks().size() == 1 &&
"SMS works on single blocks only.");
668 SwingSchedulerDAG SMS(
672 MachineBasicBlock *
MBB =
L.getHeader();
690 return SMS.hasNewSchedule();
704bool MachinePipeliner::runWindowScheduler(
MachineLoop &L) {
712 Context.RegClassInfo->runOnMachineFunction(*
MF);
717bool MachinePipeliner::useSwingModuloScheduler() {
722bool MachinePipeliner::useWindowScheduler(
bool Changed) {
729 "llvm.loop.pipeline.initiationinterval is set.\n");
737void SwingSchedulerDAG::setMII(
unsigned ResMII,
unsigned RecMII) {
740 else if (II_setByPragma > 0)
741 MII = II_setByPragma;
743 MII = std::max(ResMII, RecMII);
746void SwingSchedulerDAG::setMAX_II() {
749 else if (II_setByPragma > 0)
750 MAX_II = II_setByPragma;
760 updatePhiDependences();
761 Topo.InitDAGTopologicalSorting();
767 dbgs() <<
"===== Loop Carried Edges Begin =====\n";
770 dbgs() <<
"===== Loop Carried Edges End =====\n";
773 NodeSetType NodeSets;
774 findCircuits(NodeSets);
775 NodeSetType Circuits = NodeSets;
778 unsigned ResMII = calculateResMII();
779 unsigned RecMII = calculateRecMII(NodeSets);
787 setMII(ResMII, RecMII);
791 <<
" (rec=" << RecMII <<
", res=" << ResMII <<
")\n");
797 Pass.ORE->emit([&]() {
799 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
800 <<
"Invalid Minimal Initiation Interval: 0";
808 <<
", we don't pipeline large loops\n");
809 NumFailLargeMaxMII++;
810 Pass.ORE->emit([&]() {
812 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
813 <<
"Minimal Initiation Interval too large: "
814 <<
ore::NV(
"MII", (
int)MII) <<
" > "
816 <<
"Refer to -pipeliner-max-mii.";
821 computeNodeFunctions(NodeSets);
823 registerPressureFilter(NodeSets);
825 colocateNodeSets(NodeSets);
827 checkNodeSets(NodeSets);
830 for (
auto &
I : NodeSets) {
831 dbgs() <<
" Rec NodeSet ";
838 groupRemainingNodes(NodeSets);
840 removeDuplicateNodes(NodeSets);
843 for (
auto &
I : NodeSets) {
844 dbgs() <<
" NodeSet ";
849 computeNodeOrder(NodeSets);
852 checkValidNodeOrder(Circuits);
855 Scheduled = schedulePipeline(Schedule);
860 Pass.ORE->emit([&]() {
862 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
863 <<
"Unable to find schedule";
870 if (numStages == 0) {
873 Pass.ORE->emit([&]() {
875 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
876 <<
"No need to pipeline - no overlapped iterations in schedule.";
883 <<
" : too many stages, abort\n");
884 NumFailLargeMaxStage++;
885 Pass.ORE->emit([&]() {
887 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
888 <<
"Too many stages in schedule: "
889 <<
ore::NV(
"numStages", (
int)numStages) <<
" > "
891 <<
". Refer to -pipeliner-max-stages.";
896 Pass.ORE->emit([&]() {
899 <<
"Pipelined succesfully!";
904 std::vector<MachineInstr *> OrderedInsts;
908 OrderedInsts.push_back(SU->getInstr());
909 Cycles[SU->getInstr()] =
Cycle;
914 for (
auto &KV : NewMIs) {
915 Cycles[KV.first] = Cycles[KV.second];
916 Stages[KV.first] = Stages[KV.second];
917 NewInstrChanges[KV.first] = InstrChanges[
getSUnit(KV.first)];
924 "Cannot serialize a schedule with InstrChanges!");
934 LoopPipelinerInfo->isMVEExpanderSupported() &&
948 for (
auto &KV : NewMIs)
949 MF.deleteMachineInstr(KV.second);
960 assert(Phi.isPHI() &&
"Expecting a Phi.");
964 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
965 if (Phi.getOperand(i + 1).getMBB() !=
Loop)
966 InitVal = Phi.getOperand(i).getReg();
968 LoopVal = Phi.getOperand(i).getReg();
970 assert(InitVal && LoopVal &&
"Unexpected Phi structure.");
976 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
977 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
978 return Phi.getOperand(i).getReg();
987 while (!Worklist.
empty()) {
989 for (
const auto &
SI : SU->
Succs) {
992 if (Visited.
count(SuccSU))
1005 if (!getUnderlyingObjects())
1030bool SUnitWithMemInfo::getUnderlyingObjects() {
1032 if (!
MI->hasOneMemOperand())
1050 const SUnitWithMemInfo &Dst,
1055 if (Src.isTriviallyDisjoint(Dst))
1069 if (Src.isUnknown() || Dst.isUnknown())
1071 if (Src.MemOpValue == Dst.MemOpValue && Src.MemOpOffset <= Dst.MemOpOffset)
1082 for (
const Value *SrcObj : Src.UnderlyingObjs)
1083 for (
const Value *DstObj : Dst.UnderlyingObjs)
1091void LoopCarriedOrderDepsTracker::LoadStoreChunk::append(SUnit *SU) {
1093 if (!
MI->mayLoadOrStore())
1095 (
MI->mayStore() ? Stores : Loads).emplace_back(SU);
1101 : DAG(SSD), BAA(BAA), SUnits(DAG->SUnits), N(SUnits.
size()),
1102 LoopCarried(N,
BitVector(N)), TII(TII), TRI(TRI) {}
1106 for (
auto &SU : SUnits) {
1107 auto Tagged = getInstrTag(&SU);
1112 TaggedSUnits.emplace_back(&SU, *Tagged);
1115 computeDependenciesAux();
1118std::optional<LoopCarriedOrderDepsTracker::InstrTag>
1119LoopCarriedOrderDepsTracker::getInstrTag(
SUnit *SU)
const {
1121 if (
TII->isGlobalMemoryObject(
MI))
1122 return InstrTag::Barrier;
1124 if (
MI->mayStore() ||
1125 (
MI->mayLoad() && !
MI->isDereferenceableInvariantLoad()))
1126 return InstrTag::LoadOrStore;
1128 if (
MI->mayRaiseFPException())
1129 return InstrTag::FPExceptions;
1131 return std::nullopt;
1134void LoopCarriedOrderDepsTracker::addDependenciesBetweenSUs(
1135 const SUnitWithMemInfo &Src,
const SUnitWithMemInfo &Dst) {
1137 if (Src.SU == Dst.SU)
1141 LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum);
1144void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
1145 const LoadStoreChunk &From,
const LoadStoreChunk &To) {
1147 for (
const SUnitWithMemInfo &Src : From.Loads)
1148 for (
const SUnitWithMemInfo &Dst : To.Stores)
1149 addDependenciesBetweenSUs(Src, Dst);
1152 for (
const SUnitWithMemInfo &Src : From.Stores)
1153 for (
const SUnitWithMemInfo &Dst : To.Loads)
1154 addDependenciesBetweenSUs(Src, Dst);
1157 for (
const SUnitWithMemInfo &Src : From.Stores)
1158 for (
const SUnitWithMemInfo &Dst : To.Stores)
1159 addDependenciesBetweenSUs(Src, Dst);
1162void LoopCarriedOrderDepsTracker::computeDependenciesAux() {
1164 for (
const auto &TSU : TaggedSUnits) {
1165 InstrTag
Tag = TSU.getTag();
1166 SUnit *SU = TSU.getPointer();
1168 case InstrTag::Barrier:
1169 Chunks.emplace_back();
1171 case InstrTag::LoadOrStore:
1172 Chunks.back().append(SU);
1174 case InstrTag::FPExceptions:
1183 for (
const LoadStoreChunk &Chunk : Chunks)
1184 addLoopCarriedDepenenciesForChunks(Chunk, Chunk);
1196LoopCarriedEdges SwingSchedulerDAG::addLoopCarriedDependences() {
1197 LoopCarriedEdges LCE;
1201 LCODTracker.computeDependencies();
1202 for (
unsigned I = 0;
I != SUnits.size();
I++)
1203 for (
const int Succ : LCODTracker.getLoopCarried(
I).set_bits())
1216void SwingSchedulerDAG::updatePhiDependences() {
1218 const TargetSubtargetInfo &
ST = MF.getSubtarget<TargetSubtargetInfo>();
1221 for (SUnit &
I : SUnits) {
1226 MachineInstr *
MI =
I.getInstr();
1228 for (
const MachineOperand &MO :
MI->operands()) {
1235 UI =
MRI.use_instr_begin(
Reg),
1236 UE =
MRI.use_instr_end();
1238 MachineInstr *
UseMI = &*UI;
1239 SUnit *SU = getSUnit(
UseMI);
1265 }
else if (MO.isUse()) {
1268 if (
DefMI ==
nullptr)
1270 SUnit *SU = getSUnit(
DefMI);
1275 ST.adjustSchedDependency(SU, 0, &
I, MO.getOperandNo(), Dep,
1282 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
1291 for (
auto &PI :
I.Preds) {
1292 MachineInstr *PMI = PI.getSUnit()->getInstr();
1294 if (
I.getInstr()->isPHI()) {
1303 for (
const SDep &
D : RemoveDeps)
1310void SwingSchedulerDAG::changeDependences() {
1314 for (SUnit &
I : SUnits) {
1315 unsigned BasePos = 0, OffsetPos = 0;
1317 int64_t NewOffset = 0;
1318 if (!canUseLastOffsetValue(
I.getInstr(), BasePos, OffsetPos, NewBase,
1323 Register OrigBase =
I.getInstr()->getOperand(BasePos).getReg();
1324 MachineInstr *
DefMI =
MRI.getUniqueVRegDef(OrigBase);
1327 SUnit *DefSU = getSUnit(
DefMI);
1331 MachineInstr *LastMI =
MRI.getUniqueVRegDef(NewBase);
1334 SUnit *LastSU = getSUnit(LastMI);
1338 if (Topo.IsReachable(&
I, LastSU))
1343 for (
const SDep &
P :
I.Preds)
1344 if (
P.getSUnit() == DefSU)
1346 for (
const SDep &
D : Deps) {
1347 Topo.RemovePred(&
I,
D.getSUnit());
1352 for (
auto &
P : LastSU->
Preds)
1355 for (
const SDep &
D : Deps) {
1356 Topo.RemovePred(LastSU,
D.getSUnit());
1363 Topo.AddPred(LastSU, &
I);
1368 InstrChanges[&
I] = std::make_pair(NewBase, NewOffset);
1379 std::vector<MachineInstr *> &OrderedInsts,
1387 Stage <= LastStage; ++Stage) {
1390 Instrs[
Cycle].push_front(SU);
1397 std::deque<SUnit *> &CycleInstrs = Instrs[
Cycle];
1399 for (
SUnit *SU : CycleInstrs) {
1401 OrderedInsts.push_back(
MI);
1411struct FuncUnitSorter {
1412 const InstrItineraryData *InstrItins;
1413 const MCSubtargetInfo *STI;
1414 DenseMap<InstrStage::FuncUnits, unsigned> Resources;
1416 FuncUnitSorter(
const TargetSubtargetInfo &TSI)
1417 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
1422 unsigned minFuncUnits(
const MachineInstr *Inst,
1425 unsigned min = UINT_MAX;
1426 if (InstrItins && !InstrItins->
isEmpty()) {
1427 for (
const InstrStage &IS :
1429 InstrItins->
endStage(SchedClass))) {
1432 if (numAlternatives < min) {
1433 min = numAlternatives;
1440 const MCSchedClassDesc *SCDesc =
1447 for (
const MCWriteProcResEntry &PRE :
1450 if (!PRE.ReleaseAtCycle)
1452 const MCProcResourceDesc *ProcResource =
1454 unsigned NumUnits = ProcResource->
NumUnits;
1455 if (NumUnits < min) {
1457 F = PRE.ProcResourceIdx;
1462 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1470 void calcCriticalResources(MachineInstr &
MI) {
1471 unsigned SchedClass =
MI.getDesc().getSchedClass();
1472 if (InstrItins && !InstrItins->
isEmpty()) {
1473 for (
const InstrStage &IS :
1475 InstrItins->
endStage(SchedClass))) {
1478 Resources[FuncUnits]++;
1483 const MCSchedClassDesc *SCDesc =
1490 for (
const MCWriteProcResEntry &PRE :
1493 if (!PRE.ReleaseAtCycle)
1495 Resources[PRE.ProcResourceIdx]++;
1499 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1503 bool operator()(
const MachineInstr *IS1,
const MachineInstr *IS2)
const {
1505 unsigned MFUs1 = minFuncUnits(IS1, F1);
1506 unsigned MFUs2 = minFuncUnits(IS2, F2);
1509 return MFUs1 > MFUs2;
1514class HighRegisterPressureDetector {
1515 MachineBasicBlock *OrigMBB;
1516 const MachineRegisterInfo &
MRI;
1517 const TargetRegisterInfo *
TRI;
1519 const unsigned PSetNum;
1525 std::vector<unsigned> InitSetPressure;
1529 std::vector<unsigned> PressureSetLimit;
1531 DenseMap<MachineInstr *, RegisterOperands> ROMap;
1533 using Instr2LastUsesTy = DenseMap<MachineInstr *, SmallDenseSet<Register, 4>>;
1536 using OrderedInstsTy = std::vector<MachineInstr *>;
1537 using Instr2StageTy = DenseMap<MachineInstr *, unsigned>;
1540 static void dumpRegisterPressures(
const std::vector<unsigned> &Pressures) {
1541 if (Pressures.size() == 0) {
1545 for (
unsigned P : Pressures) {
1556 VirtRegOrUnit VRegOrUnit =
1558 : VirtRegOrUnit(static_cast<MCRegUnit>(
Reg.id()));
1559 for (
auto PSetIter =
MRI.getPressureSets(VRegOrUnit); PSetIter.isValid();
1561 dbgs() << *PSetIter <<
' ';
1566 void increaseRegisterPressure(std::vector<unsigned> &Pressure,
1569 VirtRegOrUnit VRegOrUnit =
1571 : VirtRegOrUnit(static_cast<MCRegUnit>(
Reg.id()));
1572 auto PSetIter =
MRI.getPressureSets(VRegOrUnit);
1573 unsigned Weight = PSetIter.getWeight();
1574 for (; PSetIter.isValid(); ++PSetIter)
1575 Pressure[*PSetIter] += Weight;
1578 void decreaseRegisterPressure(std::vector<unsigned> &Pressure,
1580 auto PSetIter =
MRI.getPressureSets(VirtRegOrUnit(
Reg));
1581 unsigned Weight = PSetIter.getWeight();
1582 for (; PSetIter.isValid(); ++PSetIter) {
1583 auto &
P = Pressure[*PSetIter];
1585 "register pressure must be greater than or equal weight");
1607 void computeLiveIn() {
1608 DenseSet<Register>
Used;
1609 for (
auto &
MI : *OrigMBB) {
1610 if (
MI.isDebugInstr())
1612 for (
auto &Use : ROMap[&
MI].
Uses) {
1615 Use.VRegOrUnit.isVirtualReg()
1616 ?
Use.VRegOrUnit.asVirtualReg()
1617 :
Register(
static_cast<unsigned>(
Use.VRegOrUnit.asMCRegUnit()));
1622 if (isReservedRegister(
Reg))
1624 if (isDefinedInThisLoop(
Reg))
1630 for (
auto LiveIn : Used)
1631 increaseRegisterPressure(InitSetPressure, LiveIn);
1635 void computePressureSetLimit(
const RegisterClassInfo &RCI) {
1636 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1651 Instr2LastUsesTy computeLastUses(
const OrderedInstsTy &OrderedInsts,
1652 Instr2StageTy &Stages)
const {
1657 DenseSet<Register> TargetRegs;
1658 const auto UpdateTargetRegs = [
this, &TargetRegs](
Register Reg) {
1659 if (isDefinedInThisLoop(
Reg))
1662 for (MachineInstr *
MI : OrderedInsts) {
1665 UpdateTargetRegs(
Reg);
1667 for (
auto &Use : ROMap.
find(
MI)->getSecond().Uses) {
1670 ?
Use.VRegOrUnit.asVirtualReg()
1672 Use.VRegOrUnit.asMCRegUnit()));
1673 UpdateTargetRegs(
Reg);
1678 const auto InstrScore = [&Stages](MachineInstr *
MI) {
1679 return Stages[
MI] +
MI->isPHI();
1682 DenseMap<Register, MachineInstr *> LastUseMI;
1684 for (
auto &Use : ROMap.
find(
MI)->getSecond().Uses) {
1687 Use.VRegOrUnit.isVirtualReg()
1688 ?
Use.VRegOrUnit.asVirtualReg()
1689 :
Register(
static_cast<unsigned>(
Use.VRegOrUnit.asMCRegUnit()));
1694 MachineInstr *Orig = Ite->second;
1695 MachineInstr *
New =
MI;
1696 if (InstrScore(Orig) < InstrScore(New))
1702 Instr2LastUsesTy LastUses;
1703 for (
auto [
Reg,
MI] : LastUseMI)
1704 LastUses[
MI].insert(
Reg);
1720 std::vector<unsigned>
1721 computeMaxSetPressure(
const OrderedInstsTy &OrderedInsts,
1722 Instr2StageTy &Stages,
1723 const unsigned StageCount)
const {
1724 using RegSetTy = SmallDenseSet<Register, 16>;
1730 auto CurSetPressure = InitSetPressure;
1731 auto MaxSetPressure = InitSetPressure;
1732 auto LastUses = computeLastUses(OrderedInsts, Stages);
1735 dbgs() <<
"Ordered instructions:\n";
1736 for (MachineInstr *
MI : OrderedInsts) {
1737 dbgs() <<
"Stage " << Stages[
MI] <<
": ";
1742 const auto InsertReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1743 VirtRegOrUnit VRegOrUnit) {
1757 increaseRegisterPressure(CurSetPressure,
Reg);
1761 const auto EraseReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1767 if (!RegSet.contains(
Reg))
1772 decreaseRegisterPressure(CurSetPressure,
Reg);
1776 for (
unsigned I = 0;
I < StageCount;
I++) {
1777 for (MachineInstr *
MI : OrderedInsts) {
1778 const auto Stage = Stages[
MI];
1782 const unsigned Iter =
I - Stage;
1784 for (
auto &Def : ROMap.
find(
MI)->getSecond().Defs)
1785 InsertReg(LiveRegSets[Iter],
Def.VRegOrUnit);
1787 for (
auto LastUse : LastUses[
MI]) {
1790 EraseReg(LiveRegSets[Iter - 1], LastUse);
1792 EraseReg(LiveRegSets[Iter], LastUse);
1796 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1797 MaxSetPressure[PSet] =
1798 std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);
1801 dbgs() <<
"CurSetPressure=";
1802 dumpRegisterPressures(CurSetPressure);
1803 dbgs() <<
" iter=" << Iter <<
" stage=" << Stage <<
":";
1809 return MaxSetPressure;
1813 HighRegisterPressureDetector(MachineBasicBlock *OrigMBB,
1814 const MachineFunction &MF)
1815 : OrigMBB(OrigMBB),
MRI(MF.getRegInfo()),
1816 TRI(MF.getSubtarget().getRegisterInfo()),
1817 PSetNum(
TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),
1818 PressureSetLimit(PSetNum, 0) {}
1822 void init(
const RegisterClassInfo &RCI) {
1823 for (MachineInstr &
MI : *OrigMBB) {
1824 if (
MI.isDebugInstr())
1826 ROMap[&
MI].collect(
MI, *
TRI,
MRI,
false,
true);
1830 computePressureSetLimit(RCI);
1835 bool detect(
const SwingSchedulerDAG *SSD, SMSchedule &Schedule,
1836 const unsigned MaxStage)
const {
1838 "the percentage of the margin must be between 0 to 100");
1840 OrderedInstsTy OrderedInsts;
1841 Instr2StageTy Stages;
1843 const auto MaxSetPressure =
1844 computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);
1847 dbgs() <<
"Dump MaxSetPressure:\n";
1848 for (
unsigned I = 0;
I < MaxSetPressure.size();
I++) {
1849 dbgs() <<
format(
"MaxSetPressure[%d]=%d\n",
I, MaxSetPressure[
I]);
1854 for (
unsigned PSet = 0; PSet < PSetNum; PSet++) {
1855 unsigned Limit = PressureSetLimit[PSet];
1858 <<
" Margin=" << Margin <<
"\n");
1859 if (Limit < MaxSetPressure[PSet] + Margin) {
1862 <<
"Rejected the schedule because of too high register pressure\n");
1878unsigned SwingSchedulerDAG::calculateResMII() {
1880 ResourceManager
RM(&MF.getSubtarget(),
this);
1881 return RM.calculateResMII();
1890unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1891 unsigned RecMII = 0;
1893 for (NodeSet &Nodes : NodeSets) {
1897 unsigned Delay = Nodes.getLatency();
1898 unsigned Distance = 1;
1901 unsigned CurMII = (Delay + Distance - 1) / Distance;
1902 Nodes.setRecMII(CurMII);
1903 if (CurMII > RecMII)
1911void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1912 SwingSchedulerDAG *DAG) {
1913 BitVector
Added(SUnits.size());
1914 DenseMap<int, int> OutputDeps;
1915 for (
int i = 0, e = SUnits.size(); i != e; ++i) {
1918 for (
auto &OE : DAG->DDG->getOutEdges(&SUnits[i])) {
1921 if (OE.isOutputDep()) {
1922 int N = OE.getDst()->NodeNum;
1924 auto Dep = OutputDeps.
find(BackEdge);
1925 if (Dep != OutputDeps.
end()) {
1926 BackEdge = Dep->second;
1927 OutputDeps.
erase(Dep);
1929 OutputDeps[
N] = BackEdge;
1932 if (OE.getDst()->isBoundaryNode() || OE.isArtificial())
1944 int N = OE.getDst()->NodeNum;
1946 AdjK[i].push_back(
N);
1952 for (
auto &IE : DAG->DDG->getInEdges(&SUnits[i])) {
1953 SUnit *Src =
IE.getSrc();
1954 SUnit *Dst =
IE.getDst();
1957 if (
IE.isOrderDep() && Src->getInstr()->mayLoad()) {
1958 int N = Src->NodeNum;
1960 AdjK[i].push_back(
N);
1967 for (
auto &OD : OutputDeps)
1968 if (!
Added.test(OD.second)) {
1969 AdjK[OD.first].push_back(OD.second);
1970 Added.set(OD.second);
1976bool SwingSchedulerDAG::Circuits::circuit(
int V,
int S, NodeSetType &NodeSets,
1977 const SwingSchedulerDAG *DAG,
1979 SUnit *SV = &SUnits[
V];
1984 for (
auto W : AdjK[V]) {
1985 if (NumPaths > MaxPaths)
1996 if (!Blocked.test(W)) {
1997 if (circuit(W, S, NodeSets, DAG,
1998 Node2Idx->at(W) < Node2Idx->at(V) ?
true : HasBackedge))
2006 for (
auto W : AdjK[V]) {
2017void SwingSchedulerDAG::Circuits::unblock(
int U) {
2019 SmallPtrSet<SUnit *, 4> &BU =
B[
U];
2020 while (!BU.
empty()) {
2021 SmallPtrSet<SUnit *, 4>::iterator
SI = BU.
begin();
2022 assert(SI != BU.
end() &&
"Invalid B set.");
2025 if (Blocked.test(
W->NodeNum))
2026 unblock(
W->NodeNum);
2032void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
2033 Circuits Cir(SUnits, Topo);
2035 Cir.createAdjacencyStructure(
this);
2036 for (
int I = 0,
E = SUnits.size();
I !=
E; ++
I) {
2038 Cir.circuit(
I,
I, NodeSets,
this);
2060void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
2061 for (SUnit &SU : DAG->
SUnits) {
2071 for (
auto &Dep : SU.
Preds) {
2072 SUnit *TmpSU = Dep.getSUnit();
2073 MachineInstr *TmpMI = TmpSU->
getInstr();
2084 if (PHISUs.
size() == 0 || SrcSUs.
size() == 0)
2092 for (
auto &Dep : PHISUs[Index]->Succs) {
2096 SUnit *TmpSU = Dep.getSUnit();
2097 MachineInstr *TmpMI = TmpSU->
getInstr();
2106 if (UseSUs.
size() == 0)
2111 for (
auto *
I : UseSUs) {
2112 for (
auto *Src : SrcSUs) {
2128void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
2129 ScheduleInfo.resize(SUnits.size());
2132 for (
int I : Topo) {
2133 const SUnit &SU = SUnits[
I];
2140 for (
int I : Topo) {
2142 int zeroLatencyDepth = 0;
2143 SUnit *SU = &SUnits[
I];
2144 for (
const auto &IE : DDG->getInEdges(SU)) {
2145 SUnit *Pred =
IE.getSrc();
2146 if (
IE.getLatency() == 0)
2148 std::max(zeroLatencyDepth, getZeroLatencyDepth(Pred) + 1);
2149 if (
IE.ignoreDependence(
true))
2151 asap = std::max(asap, (
int)(getASAP(Pred) +
IE.getLatency() -
2152 IE.getDistance() * MII));
2154 maxASAP = std::max(maxASAP, asap);
2155 ScheduleInfo[
I].ASAP = asap;
2156 ScheduleInfo[
I].ZeroLatencyDepth = zeroLatencyDepth;
2162 int zeroLatencyHeight = 0;
2163 SUnit *SU = &SUnits[
I];
2164 for (
const auto &OE : DDG->getOutEdges(SU)) {
2165 SUnit *Succ = OE.getDst();
2168 if (OE.getLatency() == 0)
2170 std::max(zeroLatencyHeight, getZeroLatencyHeight(Succ) + 1);
2171 if (OE.ignoreDependence(
true))
2173 alap = std::min(alap, (
int)(getALAP(Succ) - OE.getLatency() +
2174 OE.getDistance() * MII));
2177 ScheduleInfo[
I].ALAP = alap;
2178 ScheduleInfo[
I].ZeroLatencyHeight = zeroLatencyHeight;
2182 for (NodeSet &
I : NodeSets)
2183 I.computeNodeSetInfo(
this);
2186 for (
unsigned i = 0; i < SUnits.size(); i++) {
2187 dbgs() <<
"\tNode " << i <<
":\n";
2188 dbgs() <<
"\t ASAP = " << getASAP(&SUnits[i]) <<
"\n";
2189 dbgs() <<
"\t ALAP = " << getALAP(&SUnits[i]) <<
"\n";
2190 dbgs() <<
"\t MOV = " << getMOV(&SUnits[i]) <<
"\n";
2191 dbgs() <<
"\t D = " << getDepth(&SUnits[i]) <<
"\n";
2192 dbgs() <<
"\t H = " << getHeight(&SUnits[i]) <<
"\n";
2193 dbgs() <<
"\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) <<
"\n";
2194 dbgs() <<
"\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) <<
"\n";
2209 SUnit *PredSU = IE.getSrc();
2210 if (S && S->count(PredSU) == 0)
2212 if (IE.ignoreDependence(
true))
2223 SUnit *SuccSU = OE.getDst();
2224 if (!OE.isAntiDep())
2226 if (S && S->count(SuccSU) == 0)
2232 return !Preds.
empty();
2245 SUnit *SuccSU = OE.getDst();
2246 if (S && S->count(SuccSU) == 0)
2248 if (OE.ignoreDependence(
false))
2259 SUnit *PredSU = IE.getSrc();
2260 if (!IE.isAntiDep())
2262 if (S && S->count(PredSU) == 0)
2268 return !Succs.
empty();
2284 if (!Visited.
insert(Cur).second)
2285 return Path.contains(Cur);
2286 bool FoundPath =
false;
2288 if (!OE.ignoreDependence(
false))
2290 computePath(OE.getDst(), Path, DestNodes, Exclude, Visited, DDG);
2292 if (IE.isAntiDep() && IE.getDistance() == 0)
2294 computePath(IE.getSrc(), Path, DestNodes, Exclude, Visited, DDG);
2309 for (
SUnit *SU : NS) {
2315 if (
Reg.isVirtual())
2317 else if (
MRI.isAllocatable(
Reg))
2318 for (MCRegUnit Unit :
TRI->regunits(
Reg.asMCReg()))
2322 for (
SUnit *SU : NS)
2326 if (
Reg.isVirtual()) {
2330 }
else if (
MRI.isAllocatable(
Reg)) {
2331 for (MCRegUnit Unit :
TRI->regunits(
Reg.asMCReg()))
2342void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
2343 for (
auto &NS : NodeSets) {
2347 IntervalPressure RecRegPressure;
2348 RegPressureTracker RecRPTracker(RecRegPressure);
2349 RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(),
false,
true);
2351 RecRPTracker.closeBottom();
2353 std::vector<SUnit *> SUnits(NS.begin(), NS.end());
2354 llvm::sort(SUnits, [](
const SUnit *
A,
const SUnit *
B) {
2355 return A->NodeNum >
B->NodeNum;
2358 for (
auto &SU : SUnits) {
2364 RecRPTracker.setPos(std::next(CurInstI));
2366 RegPressureDelta RPDelta;
2368 RecRPTracker.getMaxUpwardPressureDelta(SU->
getInstr(),
nullptr, RPDelta,
2373 dbgs() <<
"Excess register pressure: SU(" << SU->
NodeNum <<
") "
2376 NS.setExceedPressure(SU);
2379 RecRPTracker.recede();
2386void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2387 unsigned Colocate = 0;
2388 for (
int i = 0, e = NodeSets.size(); i < e; ++i) {
2390 SmallSetVector<SUnit *, 8>
S1;
2393 for (
int j = i + 1;
j <
e; ++
j) {
2397 SmallSetVector<SUnit *, 8> S2;
2414void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2419 for (
auto &NS : NodeSets) {
2420 if (NS.getRecMII() > 2)
2422 if (NS.getMaxDepth() > MII)
2431void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2432 SetVector<SUnit *> NodesAdded;
2433 SmallPtrSet<SUnit *, 8> Visited;
2436 for (NodeSet &
I : NodeSets) {
2437 SmallSetVector<SUnit *, 8>
N;
2440 SetVector<SUnit *>
Path;
2441 for (SUnit *NI :
N) {
2443 computePath(NI, Path, NodesAdded,
I, Visited, DDG.get());
2450 if (
succ_L(NodesAdded,
N, DDG.get())) {
2451 SetVector<SUnit *>
Path;
2452 for (SUnit *NI :
N) {
2454 computePath(NI, Path,
I, NodesAdded, Visited, DDG.get());
2465 SmallSetVector<SUnit *, 8>
N;
2466 if (
succ_L(NodesAdded,
N, DDG.get()))
2468 addConnectedNodes(
I, NewSet, NodesAdded);
2469 if (!NewSet.
empty())
2470 NodeSets.push_back(NewSet);
2475 if (
pred_L(NodesAdded,
N, DDG.get()))
2477 addConnectedNodes(
I, NewSet, NodesAdded);
2478 if (!NewSet.
empty())
2479 NodeSets.push_back(NewSet);
2483 for (SUnit &SU : SUnits) {
2484 if (NodesAdded.
count(&SU) == 0) {
2486 addConnectedNodes(&SU, NewSet, NodesAdded);
2487 if (!NewSet.
empty())
2488 NodeSets.push_back(NewSet);
2494void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
2495 SetVector<SUnit *> &NodesAdded) {
2498 for (
auto &OE : DDG->getOutEdges(SU)) {
2500 if (!OE.isArtificial() && !
Successor->isBoundaryNode() &&
2502 addConnectedNodes(
Successor, NewSet, NodesAdded);
2504 for (
auto &IE : DDG->getInEdges(SU)) {
2505 SUnit *Predecessor =
IE.getSrc();
2506 if (!
IE.isArtificial() && NodesAdded.
count(Predecessor) == 0)
2507 addConnectedNodes(Predecessor, NewSet, NodesAdded);
2516 for (
SUnit *SU : Set1) {
2517 if (Set2.
count(SU) != 0)
2520 return !Result.empty();
2524void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2525 for (NodeSetType::iterator
I = NodeSets.begin(),
E = NodeSets.end();
I !=
E;
2528 for (NodeSetType::iterator J =
I + 1; J !=
E;) {
2533 for (SUnit *SU : *J)
2545void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2546 for (NodeSetType::iterator
I = NodeSets.begin(),
E = NodeSets.end();
I !=
E;
2548 for (NodeSetType::iterator J =
I + 1; J !=
E;) {
2549 J->remove_if([&](SUnit *SUJ) {
return I->count(SUJ); });
2564void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2565 SmallSetVector<SUnit *, 8>
R;
2568 for (
auto &Nodes : NodeSets) {
2571 SmallSetVector<SUnit *, 8>
N;
2586 }
else if (NodeSets.size() == 1) {
2587 for (
const auto &
N : Nodes)
2588 if (
N->Succs.size() == 0)
2594 SUnit *maxASAP =
nullptr;
2595 for (SUnit *SU : Nodes) {
2596 if (maxASAP ==
nullptr || getASAP(SU) > getASAP(maxASAP) ||
2597 (getASAP(SU) == getASAP(maxASAP) && SU->
NodeNum > maxASAP->
NodeNum))
2605 while (!
R.empty()) {
2606 if (Order == TopDown) {
2610 while (!
R.empty()) {
2611 SUnit *maxHeight =
nullptr;
2612 for (SUnit *
I : R) {
2613 if (maxHeight ==
nullptr || getHeight(
I) > getHeight(maxHeight))
2615 else if (getHeight(
I) == getHeight(maxHeight) &&
2616 getZeroLatencyHeight(
I) > getZeroLatencyHeight(maxHeight))
2618 else if (getHeight(
I) == getHeight(maxHeight) &&
2619 getZeroLatencyHeight(
I) ==
2620 getZeroLatencyHeight(maxHeight) &&
2621 getMOV(
I) < getMOV(maxHeight))
2626 R.remove(maxHeight);
2627 for (
const auto &OE : DDG->getOutEdges(maxHeight)) {
2628 SUnit *SU = OE.getDst();
2629 if (Nodes.count(SU) == 0)
2633 if (OE.ignoreDependence(
false))
2642 for (
const auto &IE : DDG->getInEdges(maxHeight)) {
2643 SUnit *SU =
IE.getSrc();
2644 if (!
IE.isAntiDep())
2646 if (Nodes.count(SU) == 0)
2655 SmallSetVector<SUnit *, 8>
N;
2662 while (!
R.empty()) {
2663 SUnit *maxDepth =
nullptr;
2664 for (SUnit *
I : R) {
2665 if (maxDepth ==
nullptr || getDepth(
I) > getDepth(maxDepth))
2667 else if (getDepth(
I) == getDepth(maxDepth) &&
2668 getZeroLatencyDepth(
I) > getZeroLatencyDepth(maxDepth))
2670 else if (getDepth(
I) == getDepth(maxDepth) &&
2671 getZeroLatencyDepth(
I) == getZeroLatencyDepth(maxDepth) &&
2672 getMOV(
I) < getMOV(maxDepth))
2678 if (Nodes.isExceedSU(maxDepth)) {
2681 R.insert(Nodes.getNode(0));
2684 for (
const auto &IE : DDG->getInEdges(maxDepth)) {
2685 SUnit *SU =
IE.getSrc();
2686 if (Nodes.count(SU) == 0)
2697 for (
const auto &OE : DDG->getOutEdges(maxDepth)) {
2698 SUnit *SU = OE.getDst();
2699 if (!OE.isAntiDep())
2701 if (Nodes.count(SU) == 0)
2710 SmallSetVector<SUnit *, 8>
N;
2719 dbgs() <<
"Node order: ";
2721 dbgs() <<
" " <<
I->NodeNum <<
" ";
2728bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2735 bool scheduleFound =
false;
2736 std::unique_ptr<HighRegisterPressureDetector> HRPDetector;
2739 std::make_unique<HighRegisterPressureDetector>(Loop.getHeader(), MF);
2740 HRPDetector->init(RegClassInfo);
2743 for (
unsigned II = MII;
II <= MAX_II && !scheduleFound; ++
II) {
2755 int EarlyStart = INT_MIN;
2756 int LateStart = INT_MAX;
2765 dbgs() <<
format(
"\tes: %8x ls: %8x\n", EarlyStart, LateStart));
2767 if (EarlyStart > LateStart)
2768 scheduleFound =
false;
2769 else if (EarlyStart != INT_MIN && LateStart == INT_MAX)
2771 Schedule.
insert(SU, EarlyStart, EarlyStart + (
int)
II - 1,
II);
2772 else if (EarlyStart == INT_MIN && LateStart != INT_MAX)
2774 Schedule.
insert(SU, LateStart, LateStart - (
int)
II + 1,
II);
2775 else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2776 LateStart = std::min(LateStart, EarlyStart + (
int)
II - 1);
2785 scheduleFound = Schedule.
insert(SU, LateStart, EarlyStart,
II);
2787 scheduleFound = Schedule.
insert(SU, EarlyStart, LateStart,
II);
2790 scheduleFound = Schedule.
insert(SU, FirstCycle + getASAP(SU),
2791 FirstCycle + getASAP(SU) +
II - 1,
II);
2799 scheduleFound =
false;
2803 dbgs() <<
"\tCan't schedule\n";
2805 }
while (++NI != NE && scheduleFound);
2810 scheduleFound = DDG->isValidSchedule(Schedule);
2832 if (scheduleFound) {
2833 scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*
this, Schedule);
2838 if (scheduleFound) {
2840 Pass.ORE->emit([&]() {
2841 return MachineOptimizationRemarkAnalysis(
2842 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
2843 <<
"Schedule found with Initiation Interval: "
2845 <<
", MaxStageCount: "
2859 if (!
Reg.isVirtual())
2861 if (
MRI.getVRegDef(
Reg)->getParent() !=
MI.getParent())
2873 if (!
Op.isReg() || !
Op.getReg().isVirtual())
2901 if (Def->getParent() != LoopBB)
2904 if (Def->isCopy()) {
2906 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
2908 CurReg = Def->getOperand(1).getReg();
2909 }
else if (Def->isPHI()) {
2915 }
else if (
TII->getIncrementValue(*Def,
Value)) {
2923 bool OffsetIsScalable;
2924 if (
TII->getMemOperandWithOffset(*Def, BaseOp,
Offset, OffsetIsScalable,
2927 CurReg = BaseOp->
getReg();
2939 if (CurReg == OrgReg)
2951bool SwingSchedulerDAG::computeDelta(
const MachineInstr &
MI,
int &Delta)
const {
2952 const TargetRegisterInfo *
TRI = MF.getSubtarget().getRegisterInfo();
2953 const MachineOperand *BaseOp;
2955 bool OffsetIsScalable;
2956 if (!
TII->getMemOperandWithOffset(
MI, BaseOp,
Offset, OffsetIsScalable,
TRI))
2960 if (OffsetIsScalable)
2963 if (!BaseOp->
isReg())
2976bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *
MI,
2978 unsigned &OffsetPos,
2984 unsigned BasePosLd, OffsetPosLd;
2990 MachineRegisterInfo &
MRI =
MI->getMF()->getRegInfo();
2991 MachineInstr *
Phi =
MRI.getVRegDef(BaseReg);
2992 if (!Phi || !
Phi->isPHI())
3000 MachineInstr *PrevDef =
MRI.getVRegDef(PrevReg);
3001 if (!PrevDef || PrevDef ==
MI)
3007 unsigned BasePos1 = 0, OffsetPos1 = 0;
3013 int64_t LoadOffset =
MI->getOperand(OffsetPosLd).getImm();
3015 MachineInstr *NewMI = MF.CloneMachineInstr(
MI);
3018 MF.deleteMachineInstr(NewMI);
3023 BasePos = BasePosLd;
3024 OffsetPos = OffsetPosLd;
3036 InstrChanges.find(SU);
3037 if (It != InstrChanges.
end()) {
3038 std::pair<Register, int64_t> RegAndOffset = It->second;
3039 unsigned BasePos, OffsetPos;
3040 if (!
TII->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
3042 Register BaseReg =
MI->getOperand(BasePos).getReg();
3048 if (BaseStageNum < DefStageNum) {
3050 int OffsetDiff = DefStageNum - BaseStageNum;
3051 if (DefCycleNum < BaseCycleNum) {
3057 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
3072 while (Def->isPHI()) {
3073 if (!Visited.
insert(Def).second)
3075 for (
unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
3076 if (Def->getOperand(i + 1).getMBB() == BB) {
3077 Def =
MRI.getVRegDef(Def->getOperand(i).getReg());
3088 int DeltaB, DeltaO, Delta;
3095 int64_t OffsetB, OffsetO;
3096 bool OffsetBIsScalable, OffsetOIsScalable;
3098 if (!
TII->getMemOperandWithOffset(*BaseMI, BaseOpB, OffsetB,
3099 OffsetBIsScalable,
TRI) ||
3100 !
TII->getMemOperandWithOffset(*OtherMI, BaseOpO, OffsetO,
3101 OffsetOIsScalable,
TRI))
3104 if (OffsetBIsScalable || OffsetOIsScalable)
3114 if (!RegB.
isVirtual() || !RegO.isVirtual())
3119 if (!DefB || !DefO || !DefB->
isPHI() || !DefO->
isPHI())
3144 dbgs() <<
"Overlap check:\n";
3145 dbgs() <<
" BaseMI: ";
3147 dbgs() <<
" Base + " << OffsetB <<
" + I * " << Delta
3148 <<
", Len: " << AccessSizeB.
getValue() <<
"\n";
3149 dbgs() <<
" OtherMI: ";
3151 dbgs() <<
" Base + " << OffsetO <<
" + I * " << Delta
3152 <<
", Len: " << AccessSizeO.
getValue() <<
"\n";
3160 int64_t BaseMinAddr = OffsetB;
3161 int64_t OhterNextIterMaxAddr = OffsetO + Delta + AccessSizeO.
getValue() - 1;
3162 if (BaseMinAddr > OhterNextIterMaxAddr) {
3167 int64_t BaseMaxAddr = OffsetB + AccessSizeB.
getValue() - 1;
3168 int64_t OtherNextIterMinAddr = OffsetO + Delta;
3169 if (BaseMaxAddr < OtherNextIterMinAddr) {
3183 if ((!Edge.isOrderDep() && !Edge.isOutputDep()) || Edge.isArtificial() ||
3184 Edge.getDst()->isBoundaryNode())
3190 if (Edge.isOutputDep())
3195 assert(
SI !=
nullptr && DI !=
nullptr &&
"Expecting SUnit with an MI.");
3212void SwingSchedulerDAG::postProcessDAG() {
3213 for (
auto &M : Mutations)
3223 bool forward =
true;
3225 dbgs() <<
"Trying to insert node between " << StartCycle <<
" and "
3226 << EndCycle <<
" II: " <<
II <<
"\n";
3228 if (StartCycle > EndCycle)
3232 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3233 for (
int curCycle = StartCycle; curCycle != termCycle;
3234 forward ? ++curCycle : --curCycle) {
3237 ProcItinResources.canReserveResources(*SU, curCycle)) {
3239 dbgs() <<
"\tinsert at cycle " << curCycle <<
" ";
3244 ProcItinResources.reserveResources(*SU, curCycle);
3245 ScheduledInstrs[curCycle].push_back(SU);
3246 InstrToCycle.insert(std::make_pair(SU, curCycle));
3247 if (curCycle > LastCycle)
3248 LastCycle = curCycle;
3249 if (curCycle < FirstCycle)
3250 FirstCycle = curCycle;
3254 dbgs() <<
"\tfailed to insert at cycle " << curCycle <<
" ";
3267 int EarlyCycle = INT_MAX;
3268 while (!Worklist.
empty()) {
3271 if (Visited.
count(PrevSU))
3273 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3274 if (it == InstrToCycle.end())
3276 EarlyCycle = std::min(EarlyCycle, it->second);
3277 for (
const auto &IE : DDG->
getInEdges(PrevSU))
3278 if (IE.isOrderDep() || IE.isOutputDep())
3291 int LateCycle = INT_MIN;
3292 while (!Worklist.
empty()) {
3297 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3298 if (it == InstrToCycle.end())
3300 LateCycle = std::max(LateCycle, it->second);
3302 if (OE.isOrderDep() || OE.isOutputDep())
3313 for (
auto &
P : SU->
Preds)
3314 if (
P.getKind() ==
SDep::Anti &&
P.getSUnit()->getInstr()->isPHI())
3315 for (
auto &S :
P.getSUnit()->Succs)
3316 if (S.getKind() ==
SDep::Data && S.getSUnit()->getInstr()->isPHI())
3317 return P.getSUnit();
3330 for (
int cycle =
getFirstCycle(); cycle <= LastCycle; ++cycle) {
3333 if (IE.getSrc() ==
I) {
3338 *MinLateStart = std::min(*MinLateStart, End);
3340 int EarlyStart = cycle + IE.getLatency() - IE.getDistance() *
II;
3341 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3346 if (OE.getDst() ==
I) {
3351 *MaxEarlyStart = std::max(*MaxEarlyStart, Start);
3353 int LateStart = cycle - OE.getLatency() + OE.getDistance() *
II;
3354 *MinLateStart = std::min(*MinLateStart, LateStart);
3359 for (
const auto &Dep : SU->
Preds) {
3362 if (BE && Dep.getSUnit() == BE && !SU->
getInstr()->
isPHI() &&
3364 *MinLateStart = std::min(*MinLateStart, cycle);
3374 std::deque<SUnit *> &Insts)
const {
3376 bool OrderBeforeUse =
false;
3377 bool OrderAfterDef =
false;
3378 bool OrderBeforeDef =
false;
3379 unsigned MoveDef = 0;
3380 unsigned MoveUse = 0;
3385 for (std::deque<SUnit *>::iterator
I = Insts.begin(), E = Insts.end();
I != E;
3388 if (!MO.isReg() || !MO.getReg().isVirtual())
3392 unsigned BasePos, OffsetPos;
3393 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
3394 if (
MI->getOperand(BasePos).getReg() == Reg)
3398 std::tie(Reads, Writes) =
3399 (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3401 OrderBeforeUse =
true;
3406 OrderAfterDef =
true;
3408 }
else if (MO.isUse() && Writes &&
stageScheduled(*
I) == StageInst1) {
3410 OrderBeforeUse =
true;
3414 OrderAfterDef =
true;
3418 OrderBeforeUse =
true;
3422 OrderAfterDef =
true;
3427 OrderBeforeUse =
true;
3433 OrderBeforeDef =
true;
3441 if (OE.getDst() != *
I)
3444 OrderBeforeUse =
true;
3451 else if ((OE.isAntiDep() || OE.isOutputDep()) &&
3453 OrderBeforeUse =
true;
3454 if ((MoveUse == 0) || (Pos < MoveUse))
3459 if (IE.getSrc() != *
I)
3461 if ((IE.isAntiDep() || IE.isOutputDep() || IE.isOrderDep()) &&
3463 OrderAfterDef =
true;
3470 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3471 OrderBeforeUse =
false;
3476 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3480 if (OrderBeforeUse && OrderAfterDef) {
3481 SUnit *UseSU = Insts.at(MoveUse);
3482 SUnit *DefSU = Insts.at(MoveDef);
3483 if (MoveUse > MoveDef) {
3484 Insts.erase(Insts.begin() + MoveUse);
3485 Insts.erase(Insts.begin() + MoveDef);
3487 Insts.erase(Insts.begin() + MoveDef);
3488 Insts.erase(Insts.begin() + MoveUse);
3498 Insts.push_front(SU);
3500 Insts.push_back(SU);
3508 assert(Phi.isPHI() &&
"Expecting a Phi.");
3515 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3523 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3541 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3547 if (DMO.getReg() == LoopReg)
3558 if (InstrToCycle.count(IE.getSrc()))
3569 for (
auto &SU : SSD->
SUnits)
3574 while (!Worklist.
empty()) {
3576 if (DoNotPipeline.
count(SU))
3579 DoNotPipeline.
insert(SU);
3586 if (OE.getDistance() == 1)
3589 return DoNotPipeline;
3598 int NewLastCycle = INT_MIN;
3603 NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
3610 if (IE.getDistance() == 0)
3611 NewCycle = std::max(InstrToCycle[IE.getSrc()], NewCycle);
3616 if (OE.getDistance() == 1)
3617 NewCycle = std::max(InstrToCycle[OE.getDst()], NewCycle);
3619 int OldCycle = InstrToCycle[&SU];
3620 if (OldCycle != NewCycle) {
3621 InstrToCycle[&SU] = NewCycle;
3626 <<
") is not pipelined; moving from cycle " << OldCycle
3627 <<
" to " << NewCycle <<
" Instr:" << *SU.
getInstr());
3652 if (FirstCycle + InitiationInterval <= NewCycle)
3655 NewLastCycle = std::max(NewLastCycle, NewCycle);
3657 LastCycle = NewLastCycle;
3674 int CycleDef = InstrToCycle[&SU];
3675 assert(StageDef != -1 &&
"Instruction should have been scheduled.");
3677 SUnit *Dst = OE.getDst();
3678 if (OE.isAssignedRegDep() && !Dst->isBoundaryNode())
3679 if (OE.getReg().isPhysical()) {
3682 if (InstrToCycle[Dst] <= CycleDef)
3700void SwingSchedulerDAG::checkValidNodeOrder(
const NodeSetType &Circuits)
const {
3703 typedef std::pair<SUnit *, unsigned> UnitIndex;
3704 std::vector<UnitIndex> Indices(
NodeOrder.size(), std::make_pair(
nullptr, 0));
3706 for (
unsigned i = 0, s =
NodeOrder.size(); i < s; ++i)
3707 Indices.push_back(std::make_pair(
NodeOrder[i], i));
3709 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3710 return std::get<0>(i1) < std::get<0>(i2);
3723 for (
unsigned i = 0, s =
NodeOrder.size(); i < s; ++i) {
3727 bool PredBefore =
false;
3728 bool SuccBefore =
false;
3735 for (
const auto &IE : DDG->getInEdges(SU)) {
3736 SUnit *PredSU = IE.getSrc();
3737 unsigned PredIndex = std::get<1>(
3746 for (
const auto &OE : DDG->getOutEdges(SU)) {
3747 SUnit *SuccSU = OE.getDst();
3753 unsigned SuccIndex = std::get<1>(
3766 Circuits, [SU](
const NodeSet &Circuit) {
return Circuit.
count(SU); });
3771 NumNodeOrderIssues++;
3775 <<
" are scheduled before node " << SU->
NodeNum
3782 dbgs() <<
"Invalid node order found!\n";
3795 for (
SUnit *SU : Instrs) {
3797 for (
unsigned i = 0, e =
MI->getNumOperands(); i < e; ++i) {
3805 InstrChanges.find(SU);
3806 if (It != InstrChanges.
end()) {
3807 unsigned BasePos, OffsetPos;
3809 if (
TII->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos)) {
3813 MI->getOperand(OffsetPos).getImm() - It->second.second;
3826 unsigned TiedUseIdx = 0;
3827 if (
MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3829 OverlapReg =
MI->getOperand(TiedUseIdx).getReg();
3831 NewBaseReg =
MI->getOperand(i).getReg();
3840 const std::deque<SUnit *> &Instrs)
const {
3841 std::deque<SUnit *> NewOrderPhi;
3842 for (
SUnit *SU : Instrs) {
3844 NewOrderPhi.push_back(SU);
3846 std::deque<SUnit *> NewOrderI;
3847 for (
SUnit *SU : Instrs) {
3863 std::deque<SUnit *> &cycleInstrs =
3864 ScheduledInstrs[cycle + (stage * InitiationInterval)];
3866 ScheduledInstrs[cycle].push_front(SU);
3872 for (
int cycle =
getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3873 ScheduledInstrs.erase(cycle);
3883 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[
Cycle];
3892 os <<
"Num nodes " <<
size() <<
" rec " << RecMII <<
" mov " << MaxMOV
3893 <<
" depth " << MaxDepth <<
" col " << Colocate <<
"\n";
3894 for (
const auto &
I : Nodes)
3895 os <<
" SU(" <<
I->NodeNum <<
") " << *(
I->getInstr());
3899#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3906 for (
SUnit *CI : cycleInstrs->second) {
3908 os <<
"(" << CI->
NodeNum <<
") ";
3919void ResourceManager::dumpMRT()
const {
3923 std::stringstream SS;
3925 SS << std::setw(4) <<
"Slot";
3926 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I)
3927 SS << std::setw(3) <<
I;
3928 SS << std::setw(7) <<
"#Mops"
3930 for (
int Slot = 0; Slot < InitiationInterval; ++Slot) {
3931 SS << std::setw(4) << Slot;
3932 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I)
3933 SS << std::setw(3) << MRT[Slot][
I];
3934 SS << std::setw(7) << NumScheduledMops[Slot] <<
"\n";
3943 unsigned ProcResourceID = 0;
3947 assert(SM.getNumProcResourceKinds() < 64 &&
3948 "Too many kinds of resources, unsupported");
3951 Masks.
resize(SM.getNumProcResourceKinds());
3952 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3954 if (
Desc.SubUnitsIdxBegin)
3956 Masks[
I] = 1ULL << ProcResourceID;
3960 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3962 if (!
Desc.SubUnitsIdxBegin)
3964 Masks[
I] = 1ULL << ProcResourceID;
3965 for (
unsigned U = 0; U <
Desc.NumUnits; ++U)
3966 Masks[
I] |= Masks[
Desc.SubUnitsIdxBegin[U]];
3971 dbgs() <<
"ProcResourceDesc:\n";
3972 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3974 dbgs() <<
format(
" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3975 ProcResource->
Name,
I, Masks[
I],
3978 dbgs() <<
" -----------------\n";
3986 dbgs() <<
"canReserveResources:\n";
3989 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
3995 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
4001 reserveResources(SCDesc,
Cycle);
4002 bool Result = !isOverbooked();
4003 unreserveResources(SCDesc,
Cycle);
4012 dbgs() <<
"reserveResources:\n";
4015 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
4021 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
4027 reserveResources(SCDesc,
Cycle);
4032 dbgs() <<
"reserveResources: done!\n\n";
4043 ++MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
4046 ++NumScheduledMops[positiveModulo(
C, InitiationInterval)];
4055 --MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
4058 --NumScheduledMops[positiveModulo(
C, InitiationInterval)];
4061bool ResourceManager::isOverbooked()
const {
4063 for (
int Slot = 0;
Slot < InitiationInterval; ++
Slot) {
4064 for (
unsigned I = 1,
E = SM.getNumProcResourceKinds();
I <
E; ++
I) {
4065 const MCProcResourceDesc *
Desc = SM.getProcResource(
I);
4066 if (MRT[Slot][
I] >
Desc->NumUnits)
4069 if (NumScheduledMops[Slot] > IssueWidth)
4075int ResourceManager::calculateResMIIDFA()
const {
4080 FuncUnitSorter FUS = FuncUnitSorter(*ST);
4081 for (SUnit &SU : DAG->
SUnits)
4082 FUS.calcCriticalResources(*SU.
getInstr());
4083 PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
4086 for (SUnit &SU : DAG->
SUnits)
4093 while (!FuncUnitOrder.empty()) {
4094 MachineInstr *
MI = FuncUnitOrder.top();
4095 FuncUnitOrder.pop();
4096 if (
TII->isZeroCost(
MI->getOpcode()))
4102 unsigned ReservedCycles = 0;
4103 auto *RI = Resources.
begin();
4104 auto *RE = Resources.
end();
4106 dbgs() <<
"Trying to reserve resource for " << NumCycles
4107 <<
" cycles for \n";
4110 for (
unsigned C = 0;
C < NumCycles; ++
C)
4112 if ((*RI)->canReserveResources(*
MI)) {
4113 (*RI)->reserveResources(*
MI);
4120 <<
", NumCycles:" << NumCycles <<
"\n");
4122 for (
unsigned C = ReservedCycles;
C < NumCycles; ++
C) {
4124 <<
"NewResource created to reserve resources"
4127 assert(NewResource->canReserveResources(*
MI) &&
"Reserve error.");
4128 NewResource->reserveResources(*
MI);
4129 Resources.
push_back(std::unique_ptr<DFAPacketizer>(NewResource));
4133 int Resmii = Resources.
size();
4140 return calculateResMIIDFA();
4147 for (
SUnit &SU : DAG->SUnits) {
4159 <<
" WriteProcRes: ";
4164 make_range(STI->getWriteProcResBegin(SCDesc),
4165 STI->getWriteProcResEnd(SCDesc))) {
4169 SM.getProcResource(PRE.ProcResourceIdx);
4170 dbgs() <<
Desc->Name <<
": " << PRE.ReleaseAtCycle <<
", ";
4173 ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;
4178 int Result = (NumMops + IssueWidth - 1) / IssueWidth;
4181 dbgs() <<
"#Mops: " << NumMops <<
", "
4182 <<
"IssueWidth: " << IssueWidth <<
", "
4183 <<
"Cycles: " << Result <<
"\n";
4188 std::stringstream SS;
4189 SS << std::setw(2) <<
"ID" << std::setw(16) <<
"Name" << std::setw(10)
4190 <<
"Units" << std::setw(10) <<
"Consumed" << std::setw(10) <<
"Cycles"
4195 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
4197 int Cycles = (ResourceCount[
I] +
Desc->NumUnits - 1) /
Desc->NumUnits;
4200 std::stringstream SS;
4201 SS << std::setw(2) <<
I << std::setw(16) <<
Desc->Name << std::setw(10)
4202 <<
Desc->NumUnits << std::setw(10) << ResourceCount[
I]
4203 << std::setw(10) << Cycles <<
"\n";
4207 if (Cycles > Result)
4214 InitiationInterval =
II;
4215 DFAResources.clear();
4216 DFAResources.resize(
II);
4217 for (
auto &
I : DFAResources)
4218 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
4221 NumScheduledMops.clear();
4222 NumScheduledMops.resize(
II);
4226 if (Pred.isArtificial() || Dst->isBoundaryNode())
4231 return IgnoreAnti && (Pred.getKind() ==
SDep::Kind::Anti || Distance != 0);
4234SwingSchedulerDDG::SwingSchedulerDDGEdges &
4235SwingSchedulerDDG::getEdges(
const SUnit *SU) {
4237 return EntrySUEdges;
4243const SwingSchedulerDDG::SwingSchedulerDDGEdges &
4244SwingSchedulerDDG::getEdges(
const SUnit *SU)
const {
4246 return EntrySUEdges;
4252void SwingSchedulerDDG::addEdge(
const SUnit *SU,
4253 const SwingSchedulerDDGEdge &
Edge) {
4255 "Validation-only edges are not expected here.");
4256 auto &Edges = getEdges(SU);
4257 if (
Edge.getSrc() == SU)
4258 Edges.Succs.push_back(
Edge);
4260 Edges.Preds.push_back(
Edge);
4263void SwingSchedulerDDG::initEdges(SUnit *SU) {
4264 for (
const auto &PI : SU->
Preds) {
4265 SwingSchedulerDDGEdge
Edge(SU, PI,
false,
4270 for (
const auto &SI : SU->
Succs) {
4271 SwingSchedulerDDGEdge
Edge(SU, SI,
true,
4279 : EntrySU(EntrySU), ExitSU(ExitSU) {
4280 EdgesVec.resize(SUnits.size());
4285 for (
auto &SU : SUnits)
4289 for (
SUnit &SU : SUnits) {
4294 for (
SUnit *Dst : *OD) {
4297 Edge.setDistance(1);
4298 ValidationOnlyEdges.push_back(Edge);
4304const SwingSchedulerDDG::EdgesType &
4306 return getEdges(SU).Preds;
4309const SwingSchedulerDDG::EdgesType &
4311 return getEdges(SU).Succs;
4318 auto ExpandCycle = [&](
SUnit *SU) {
4325 SUnit *Src = Edge.getSrc();
4326 SUnit *Dst = Edge.getDst();
4327 if (!Src->isInstr() || !Dst->isInstr())
4329 int CycleSrc = ExpandCycle(Src);
4330 int CycleDst = ExpandCycle(Dst);
4331 int MaxLateStart = CycleDst + Edge.getDistance() *
II - Edge.getLatency();
4332 if (CycleSrc > MaxLateStart) {
4334 dbgs() <<
"Validation failed for edge from " << Src->NodeNum <<
" to "
4335 << Dst->NodeNum <<
"\n";
4345 for (
SUnit &SU : SUnits) {
4374 !
TII->isGlobalMemoryObject(FromMI) &&
4392 const auto DumpSU = [](
const SUnit *SU) {
4393 std::ostringstream OSS;
4394 OSS <<
"SU(" << SU->
NodeNum <<
")";
4398 dbgs() <<
" Loop carried edges from " << DumpSU(SU) <<
"\n"
4400 for (
SUnit *Dst : *Order)
4401 dbgs() <<
" " << DumpSU(Dst) <<
"\n";
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::optional< unsigned > getTag(const TargetRegisterInfo *TRI, const MachineInstr &MI, const LoadInfo &LI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file defines the DenseMap class.
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
print mir2vec MIR2Vec Vocabulary Printer Pass
static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))
A command line argument to force pipeliner to use specified initial interval.
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
static bool hasPHICycleDFS(unsigned Reg, const DenseMap< unsigned, SmallVector< unsigned, 2 > > &PhiDeps, SmallSet< unsigned, 8 > &Visited, SmallSet< unsigned, 8 > &RecStack)
Depth-first search to detect cycles among PHI dependencies.
static cl::opt< bool > MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false), cl::desc("Use the MVE code generator for software pipelining"))
static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, Register &InitVal, Register &LoopVal)
Return the register values for the operands of a Phi instruction.
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)
Create an instruction stream that represents a single iteration and stage of each instruction.
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
static Register getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
static bool findLoopIncrementValue(const MachineOperand &Op, int &Value)
When Op is a value that is incremented recursively in a loop and there is a unique instruction that i...
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
static cl::opt< unsigned > SwpMaxNumStores("pipeliner-max-num-stores", cl::desc("Maximum number of stores allwed in the target loop."), cl::Hidden, cl::init(200))
A command line argument to limit the number of store instructions in the target basic block.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static bool hasPHICycle(const MachineBasicBlock *LoopHeader, const MachineRegisterInfo &MRI)
static cl::opt< WindowSchedulingFlag > WindowSchedulingOption("window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On), cl::desc("Set how to use window scheduling algorithm."), cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off", "Turn off window algorithm."), clEnumValN(WindowSchedulingFlag::WS_On, "on", "Use window algorithm after SMS algorithm fails."), clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm.")))
A command line argument to set the window scheduling option.
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited, SwingSchedulerDDG *DDG)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
static bool hasLoopCarriedMemDep(const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst, BatchAAResults &BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const SwingSchedulerDAG *SSD)
Returns true if there is a loop-carried order dependency from Src to Dst.
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
static Register findUniqueOperandDefinedInLoop(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
Promote Memory to Register
This file provides utility analysis objects describing memory locations.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
Remove Loads Into Fake Uses
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
Add loop-carried chain dependencies.
void computeDependencies()
The main function to compute loop-carried order-dependencies.
const BitVector & getLoopCarried(unsigned Idx) const
LoopCarriedOrderDepsTracker(SwingSchedulerDAG *SSD, BatchAAResults *BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
AttributeList getAttributes() const
Return the attribute list for this Function.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isPostIncrement(const MachineInstr &MI) const override
Return true for post-incremented instructions.
DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override
Create machine specific model for scheduling.
bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const override
For instructions with a base and offset, return the position of the base register and offset operands...
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
bool isEmpty() const
Returns true if there are no itineraries.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
TypeSize getValue() const
Represents a single loop in the control flow graph.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI StringRef getString() const
MachineInstrBundleIterator< const MachineInstr > const_iterator
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
filtered_mop_range all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool isRegSequence() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
const Value * getValue() const
Return the base address of the memory access.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
The main class in the implementation of the target independent software pipeliner pass.
bool runOnMachineFunction(MachineFunction &MF) override
The "main" function for implementing Swing Modulo Scheduling.
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
Expand the kernel using modulo variable expansion algorithm (MVE).
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
void print(raw_ostream &os) const
void setRecMII(unsigned mii)
unsigned count(SUnit *SU) const
void setColocate(unsigned c)
int compareRecMII(NodeSet &RHS)
LLVM_DUMP_METHOD void dump() const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A reimplementation of ModuloScheduleExpander.
PointerIntPair - This class implements a pair of a pointer and small integer.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)
Force liveness of virtual registers or physical register units.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
int calculateResMII() const
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
void init(int II)
Initialize resources with the initiation interval II.
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Kind
These are the different kinds of scheduling dependencies.
@ Order
Any other ordering dependency.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
void dump() const
Utility function used for debugging to print the schedule.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the earliest scheduled instruction in the dependence chain.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, const SwingSchedulerDDG *DDG) const
Return true if all scheduled predecessors are loop-carried output/order dependencies.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
bool isValidSchedule(SwingSchedulerDAG *SSD)
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
DenseMap< int, std::deque< SUnit * > >::const_iterator const_sched_iterator
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
SmallPtrSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
int latestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the latest scheduled instruction in the dependence chain.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
LLVM_ABI void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
unsigned short Latency
Node latency.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
bool hasPhysRegDefs
Has physreg defs that are being used.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
LLVM_ABI bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock * BB
The block in which to insert instructions.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void dump() const override
LLVM_ABI void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
LLVM_ABI bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
MachineFunction & MF
Machine function.
SUnit ExitSU
Special node for the region exit.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void insert_range(Range &&R)
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
typename vector_type::const_iterator iterator
bool contains(const_arg_type key) const
Check if the SetVector contains the given key.
void clear()
Completely clear the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
const SwingSchedulerDDG * getDDG() const
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const
Return true for an order or output dependence that is loop carried potentially.
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
bool mayOverlapInLaterIter(const MachineInstr *BaseMI, const MachineInstr *OtherMI) const
Return false if there is no overlap between the region accessed by BaseMI in an iteration and the reg...
Register getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
Represents a dependence between two instruction.
SUnit * getDst() const
Returns the SUnit to which the edge points (destination node).
bool ignoreDependence(bool IgnoreAnti) const
Returns true for DDG nodes that we ignore when computing the cost functions.
SUnit * getSrc() const
Returns the SUnit from which the edge comes (source node).
This class provides APIs to retrieve edges from/to an SUnit node, with a particular focus on loop-car...
SwingSchedulerDDG(std::vector< SUnit > &SUnits, SUnit *EntrySU, SUnit *ExitSU, const LoopCarriedEdges &LCE)
const EdgesType & getInEdges(const SUnit *SU) const
bool isValidSchedule(const SMSchedule &Schedule) const
Check if Schedule doesn't violate the validation-only dependencies.
const EdgesType & getOutEdges(const SUnit *SU) const
Object returned by analyzeLoopForPipelining.
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.
TargetInstrInfo - Interface to description of machine instruction set.
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool enableMachinePipeliner() const
True if the subtarget should run MachinePipeliner.
virtual bool useDFAforSMS() const
Default to DFA for resource management, return false when target will use ProcResource in InstrSchedM...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Wrapper class representing a virtual register or register unit.
constexpr bool isVirtualReg() const
constexpr MCRegUnit asMCRegUnit() const
constexpr Register asVirtualReg() const
The main class in the implementation of the target independent window scheduler.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
@ Valid
The data is already valid.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< DefNode * > Def
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
std::set< NodeId > NodeSet
friend class Instruction
Iterator for Instructions in a `BasicBlock.
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void stable_sort(R &&Range)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
@ WS_Force
Use window algorithm after SMS algorithm fails.
@ WS_On
Turn off window algorithm.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
RegState getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
cl::opt< bool > SwpEnableCopyToPhi
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
LLVM_ABI char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
@ Increment
Incrementally increasing token ID.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This class holds an SUnit corresponding to a memory operation and other information related to the in...
const Value * MemOpValue
The value of a memory operand.
SmallVector< const Value *, 2 > UnderlyingObjs
bool isTriviallyDisjoint(const SUnitWithMemInfo &Other) const
int64_t MemOpOffset
The offset of a memory operand.
bool IsAllIdentified
True if all the underlying objects are identified.
SUnitWithMemInfo(SUnit *SU)
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
uint64_t FuncUnits
Bitmask representing a set of functional units.
static constexpr LaneBitmask getNone()
Represents loop-carried dependencies.
SmallSetVector< SUnit *, 8 > OrderDep
const OrderDep * getOrderDepOrNull(SUnit *Key) const
void modifySUnits(std::vector< SUnit > &SUnits, const TargetInstrInfo *TII)
Adds some edges to the original DAG that correspond to loop-carried dependencies.
void dump(SUnit *SU, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) const
Define a kind of processor resource that will be modeled by the scheduler.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.