72#include "llvm/Config/llvm-config.h"
100#define DEBUG_TYPE "pipeliner"
102STATISTIC(NumTrytoPipeline,
"Number of loops that we attempt to pipeline");
103STATISTIC(NumPipelined,
"Number of loops software pipelined");
104STATISTIC(NumNodeOrderIssues,
"Number of node order issues found");
105STATISTIC(NumFailBranch,
"Pipeliner abort due to unknown branch");
106STATISTIC(NumFailLoop,
"Pipeliner abort due to unsupported loop");
107STATISTIC(NumFailPreheader,
"Pipeliner abort due to missing preheader");
108STATISTIC(NumFailLargeMaxMII,
"Pipeliner abort due to MaxMII too large");
109STATISTIC(NumFailZeroMII,
"Pipeliner abort due to zero MII");
110STATISTIC(NumFailNoSchedule,
"Pipeliner abort due to no schedule found");
111STATISTIC(NumFailZeroStage,
"Pipeliner abort due to zero stage");
112STATISTIC(NumFailLargeMaxStage,
"Pipeliner abort due to too many stages");
113STATISTIC(NumFailTooManyStores,
"Pipeliner abort due to too many stores");
117 cl::desc(
"Enable Software Pipelining"));
126 cl::desc(
"Size limit for the MII."),
132 cl::desc(
"Force pipeliner to use specified II."),
138 cl::desc(
"Maximum stages allowed in the generated scheduled."),
145 cl::desc(
"Prune dependences between unrelated Phi nodes."),
152 cl::desc(
"Prune loop carried order dependences."),
170 cl::desc(
"Instead of emitting the pipelined code, annotate instructions "
171 "with the generated schedule for feeding into the "
172 "-modulo-schedule-test pass"));
177 "Use the experimental peeling code generator for software pipelining"));
185 cl::desc(
"Limit register pressure of scheduled loop"));
190 cl::desc(
"Margin representing the unused percentage of "
191 "the register pressure limit"));
195 cl::desc(
"Use the MVE code generator for software pipelining"));
200 "pipeliner-max-num-stores",
208 cl::desc(
"Enable CopyToPhi DAG Mutation"));
213 "pipeliner-force-issue-width",
220 cl::desc(
"Set how to use window scheduling algorithm."),
222 "Turn off window algorithm."),
224 "Use window algorithm after SMS algorithm fails."),
226 "Use window algorithm instead of SMS algorithm.")));
228unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
236 "Modulo Software Pipelining",
false,
false)
278 enum class InstrTag {
287 TaggedSUnit(
SUnit *SU, InstrTag Tag)
290 InstrTag
getTag()
const {
return InstrTag(getInt()); }
294 struct LoadStoreChunk {
298 void append(
SUnit *SU);
303 std::vector<SUnit> &SUnits;
309 std::vector<BitVector> LoopCarried;
322 std::vector<TaggedSUnit> TaggedSUnits;
336 return LoopCarried[Idx];
341 std::optional<InstrTag> getInstrTag(
SUnit *SU)
const;
343 void addLoopCarriedDepenenciesForChunks(
const LoadStoreChunk &From,
344 const LoadStoreChunk &To);
355 bool PerformCheapCheck =
false);
357 void computeDependenciesAux();
388 TII =
MF->getSubtarget().getInstrInfo();
391 for (
const auto &L : *
MLI)
401bool MachinePipeliner::scheduleLoop(
MachineLoop &L) {
403 for (
const auto &InnerLoop : L)
404 Changed |= scheduleLoop(*InnerLoop);
416 setPragmaPipelineOptions(L);
417 if (!canPipelineLoop(L)) {
421 L.getStartLoc(), L.getHeader())
422 <<
"Failed to pipeline loop";
425 LI.LoopPipelinerInfo.reset();
430 if (useSwingModuloScheduler())
431 Changed = swingModuloScheduler(L);
433 if (useWindowScheduler(
Changed))
434 Changed = runWindowScheduler(L);
436 LI.LoopPipelinerInfo.reset();
440void MachinePipeliner::setPragmaPipelineOptions(
MachineLoop &L) {
445 MachineBasicBlock *LBLK =
L.getTopBlock();
458 MDNode *LoopID = TI->
getMetadata(LLVMContext::MD_loop);
459 if (LoopID ==
nullptr)
476 if (S->
getString() ==
"llvm.loop.pipeline.initiationinterval") {
478 "Pipeline initiation interval hint metadata should have two operands.");
482 }
else if (S->
getString() ==
"llvm.loop.pipeline.disable") {
495 auto It = PhiDeps.find(
Reg);
496 if (It == PhiDeps.end())
507 for (
unsigned Dep : It->second) {
522 unsigned DefReg =
MI.getOperand(0).getReg();
526 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2)
527 Ins->second.push_back(
MI.getOperand(
I).getReg());
534 for (
const auto &KV : PhiDeps) {
535 unsigned Reg = KV.first;
546bool MachinePipeliner::canPipelineLoop(
MachineLoop &L) {
547 if (
L.getNumBlocks() != 1) {
549 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
550 L.getStartLoc(),
L.getHeader())
551 <<
"Not a single basic block: "
552 <<
ore::NV(
"NumBlocks",
L.getNumBlocks());
564 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
565 L.getStartLoc(),
L.getHeader())
566 <<
"Disabled by Pragma.";
576 if (
TII->analyzeBranch(*
L.getHeader(),
LI.TBB,
LI.FBB,
LI.BrCond)) {
577 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeBranch, can NOT pipeline Loop\n");
580 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
581 L.getStartLoc(),
L.getHeader())
582 <<
"The branch can't be understood";
587 LI.LoopInductionVar =
nullptr;
588 LI.LoopCompare =
nullptr;
589 LI.LoopPipelinerInfo =
TII->analyzeLoopForPipelining(
L.getTopBlock());
590 if (!
LI.LoopPipelinerInfo) {
591 LLVM_DEBUG(
dbgs() <<
"Unable to analyzeLoop, can NOT pipeline Loop\n");
594 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
595 L.getStartLoc(),
L.getHeader())
596 <<
"The loop structure is not supported";
601 if (!
L.getLoopPreheader()) {
602 LLVM_DEBUG(
dbgs() <<
"Preheader not found, can NOT pipeline Loop\n");
605 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
606 L.getStartLoc(),
L.getHeader())
607 <<
"No loop preheader found";
612 unsigned NumStores = 0;
613 for (MachineInstr &
MI : *
L.getHeader())
618 NumFailTooManyStores++;
620 return MachineOptimizationRemarkAnalysis(
DEBUG_TYPE,
"canPipelineLoop",
621 L.getStartLoc(),
L.getHeader())
622 <<
"Too many store instructions in the loop: "
623 <<
ore::NV(
"NumStores", NumStores) <<
" > "
630 preprocessPhiNodes(*
L.getHeader());
635 MachineRegisterInfo &
MRI =
MF->getRegInfo();
639 for (MachineInstr &PI :
B.phis()) {
640 MachineOperand &DefOp = PI.getOperand(0);
642 auto *RC =
MRI.getRegClass(DefOp.
getReg());
644 for (
unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
645 MachineOperand &RegOp = PI.getOperand(i);
652 MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
669bool MachinePipeliner::swingModuloScheduler(
MachineLoop &L) {
670 assert(
L.getBlocks().size() == 1 &&
"SMS works on single blocks only.");
673 SwingSchedulerDAG SMS(
677 MachineBasicBlock *
MBB =
L.getHeader();
695 return SMS.hasNewSchedule();
709bool MachinePipeliner::runWindowScheduler(
MachineLoop &L) {
717 Context.RegClassInfo->runOnMachineFunction(*
MF);
722bool MachinePipeliner::useSwingModuloScheduler() {
727bool MachinePipeliner::useWindowScheduler(
bool Changed) {
734 "llvm.loop.pipeline.initiationinterval is set.\n");
742void SwingSchedulerDAG::setMII(
unsigned ResMII,
unsigned RecMII) {
745 else if (II_setByPragma > 0)
746 MII = II_setByPragma;
748 MII = std::max(ResMII, RecMII);
751void SwingSchedulerDAG::setMAX_II() {
754 else if (II_setByPragma > 0)
755 MAX_II = II_setByPragma;
765 updatePhiDependences();
766 Topo.InitDAGTopologicalSorting();
772 dbgs() <<
"===== Loop Carried Edges Begin =====\n";
775 dbgs() <<
"===== Loop Carried Edges End =====\n";
778 NodeSetType NodeSets;
779 findCircuits(NodeSets);
780 NodeSetType Circuits = NodeSets;
783 unsigned ResMII = calculateResMII();
784 unsigned RecMII = calculateRecMII(NodeSets);
792 setMII(ResMII, RecMII);
796 <<
" (rec=" << RecMII <<
", res=" << ResMII <<
")\n");
802 Pass.ORE->emit([&]() {
804 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
805 <<
"Invalid Minimal Initiation Interval: 0";
813 <<
", we don't pipeline large loops\n");
814 NumFailLargeMaxMII++;
815 Pass.ORE->emit([&]() {
817 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
818 <<
"Minimal Initiation Interval too large: "
819 <<
ore::NV(
"MII", (
int)MII) <<
" > "
821 <<
"Refer to -pipeliner-max-mii.";
826 computeNodeFunctions(NodeSets);
828 registerPressureFilter(NodeSets);
830 colocateNodeSets(NodeSets);
832 checkNodeSets(NodeSets);
835 for (
auto &
I : NodeSets) {
836 dbgs() <<
" Rec NodeSet ";
843 groupRemainingNodes(NodeSets);
845 removeDuplicateNodes(NodeSets);
848 for (
auto &
I : NodeSets) {
849 dbgs() <<
" NodeSet ";
854 computeNodeOrder(NodeSets);
857 checkValidNodeOrder(Circuits);
860 Scheduled = schedulePipeline(Schedule);
865 Pass.ORE->emit([&]() {
867 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
868 <<
"Unable to find schedule";
875 if (numStages == 0) {
878 Pass.ORE->emit([&]() {
880 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
881 <<
"No need to pipeline - no overlapped iterations in schedule.";
888 <<
" : too many stages, abort\n");
889 NumFailLargeMaxStage++;
890 Pass.ORE->emit([&]() {
892 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
893 <<
"Too many stages in schedule: "
894 <<
ore::NV(
"numStages", (
int)numStages) <<
" > "
896 <<
". Refer to -pipeliner-max-stages.";
901 Pass.ORE->emit([&]() {
904 <<
"Pipelined succesfully!";
909 std::vector<MachineInstr *> OrderedInsts;
913 OrderedInsts.push_back(SU->getInstr());
914 Cycles[SU->getInstr()] =
Cycle;
919 for (
auto &KV : NewMIs) {
920 Cycles[KV.first] = Cycles[KV.second];
921 Stages[KV.first] = Stages[KV.second];
922 NewInstrChanges[KV.first] = InstrChanges[
getSUnit(KV.first)];
929 "Cannot serialize a schedule with InstrChanges!");
939 LoopPipelinerInfo->isMVEExpanderSupported() &&
953 for (
auto &KV : NewMIs)
954 MF.deleteMachineInstr(KV.second);
965 assert(Phi.isPHI() &&
"Expecting a Phi.");
969 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
970 if (Phi.getOperand(i + 1).getMBB() !=
Loop)
971 InitVal = Phi.getOperand(i).getReg();
973 LoopVal = Phi.getOperand(i).getReg();
975 assert(InitVal && LoopVal &&
"Unexpected Phi structure.");
981 for (
unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
982 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
983 return Phi.getOperand(i).getReg();
992 while (!Worklist.
empty()) {
994 for (
const auto &
SI : SU->
Succs) {
997 if (Visited.
count(SuccSU))
1010 if (!getUnderlyingObjects())
1035bool SUnitWithMemInfo::getUnderlyingObjects() {
1037 if (!
MI->hasOneMemOperand())
1059 if (Src.isTriviallyDisjoint(Dst))
1066 if (PerformCheapCheck) {
1073 int64_t Offset1, Offset2;
1074 bool Offset1IsScalable, Offset2IsScalable;
1075 if (
TII->getMemOperandWithOffset(SrcMI, BaseOp1, Offset1, Offset1IsScalable,
1077 TII->getMemOperandWithOffset(DstMI, BaseOp2, Offset2, Offset2IsScalable,
1080 Offset1IsScalable == Offset2IsScalable &&
1081 (
int)Offset1 < (
int)Offset2) {
1082 assert(
TII->areMemAccessesTriviallyDisjoint(SrcMI, DstMI) &&
1083 "What happened to the chain edge?");
1095 if (Src.isUnknown() || Dst.isUnknown())
1097 if (Src.MemOpValue == Dst.MemOpValue && Src.MemOpOffset <= Dst.MemOpOffset)
1108 for (
const Value *SrcObj : Src.UnderlyingObjs)
1109 for (
const Value *DstObj : Dst.UnderlyingObjs)
1117void LoopCarriedOrderDepsTracker::LoadStoreChunk::append(SUnit *SU) {
1119 if (!
MI->mayLoadOrStore())
1121 (
MI->mayStore() ? Stores : Loads).emplace_back(SU);
1127 : DAG(SSD), BAA(BAA), SUnits(DAG->SUnits), N(SUnits.
size()),
1128 LoopCarried(N,
BitVector(N)), TII(TII), TRI(TRI) {}
1132 for (
auto &SU : SUnits) {
1133 auto Tagged = getInstrTag(&SU);
1138 TaggedSUnits.emplace_back(&SU, *Tagged);
1141 computeDependenciesAux();
1144std::optional<LoopCarriedOrderDepsTracker::InstrTag>
1145LoopCarriedOrderDepsTracker::getInstrTag(
SUnit *SU)
const {
1147 if (
TII->isGlobalMemoryObject(
MI))
1148 return InstrTag::Barrier;
1150 if (
MI->mayStore() ||
1151 (
MI->mayLoad() && !
MI->isDereferenceableInvariantLoad()))
1152 return InstrTag::LoadOrStore;
1154 if (
MI->mayRaiseFPException())
1155 return InstrTag::FPExceptions;
1157 return std::nullopt;
1160void LoopCarriedOrderDepsTracker::addDependenciesBetweenSUs(
1161 const SUnitWithMemInfo &Src,
const SUnitWithMemInfo &Dst,
1162 bool PerformCheapCheck) {
1164 if (Src.SU == Dst.SU)
1168 LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum);
1171void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
1172 const LoadStoreChunk &From,
const LoadStoreChunk &To) {
1174 for (
const SUnitWithMemInfo &Src : From.Loads)
1175 for (
const SUnitWithMemInfo &Dst : To.Stores)
1177 addDependenciesBetweenSUs(Src, Dst, Src.SU->NodeNum < Dst.SU->NodeNum);
1180 for (
const SUnitWithMemInfo &Src : From.Stores)
1181 for (
const SUnitWithMemInfo &Dst : To.Loads)
1182 addDependenciesBetweenSUs(Src, Dst);
1185 for (
const SUnitWithMemInfo &Src : From.Stores)
1186 for (
const SUnitWithMemInfo &Dst : To.Stores)
1187 addDependenciesBetweenSUs(Src, Dst);
1190void LoopCarriedOrderDepsTracker::computeDependenciesAux() {
1192 for (
const auto &TSU : TaggedSUnits) {
1193 InstrTag
Tag = TSU.getTag();
1194 SUnit *SU = TSU.getPointer();
1196 case InstrTag::Barrier:
1197 Chunks.emplace_back();
1199 case InstrTag::LoadOrStore:
1200 Chunks.back().append(SU);
1202 case InstrTag::FPExceptions:
1211 for (
const LoadStoreChunk &Chunk : Chunks)
1212 addLoopCarriedDepenenciesForChunks(Chunk, Chunk);
1224LoopCarriedEdges SwingSchedulerDAG::addLoopCarriedDependences() {
1225 LoopCarriedEdges LCE;
1229 LCODTracker.computeDependencies();
1230 for (
unsigned I = 0;
I != SUnits.size();
I++)
1231 for (
const int Succ : LCODTracker.getLoopCarried(
I).set_bits())
1244void SwingSchedulerDAG::updatePhiDependences() {
1246 const TargetSubtargetInfo &
ST = MF.getSubtarget<TargetSubtargetInfo>();
1249 for (SUnit &
I : SUnits) {
1254 MachineInstr *
MI =
I.getInstr();
1256 for (
const MachineOperand &MO :
MI->operands()) {
1263 UI =
MRI.use_instr_begin(
Reg),
1264 UE =
MRI.use_instr_end();
1266 MachineInstr *
UseMI = &*UI;
1267 SUnit *SU = getSUnit(
UseMI);
1277 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
1282 }
else if (MO.isUse()) {
1285 if (
DefMI ==
nullptr)
1287 SUnit *SU = getSUnit(
DefMI);
1292 ST.adjustSchedDependency(SU, 0, &
I, MO.getOperandNo(), Dep,
1299 if (SU->
NodeNum <
I.NodeNum && !
I.isPred(SU))
1308 for (
auto &PI :
I.Preds) {
1309 MachineInstr *PMI = PI.getSUnit()->getInstr();
1311 if (
I.getInstr()->isPHI()) {
1320 for (
const SDep &
D : RemoveDeps)
1327void SwingSchedulerDAG::changeDependences() {
1331 for (SUnit &
I : SUnits) {
1332 unsigned BasePos = 0, OffsetPos = 0;
1334 int64_t NewOffset = 0;
1335 if (!canUseLastOffsetValue(
I.getInstr(), BasePos, OffsetPos, NewBase,
1340 Register OrigBase =
I.getInstr()->getOperand(BasePos).getReg();
1341 MachineInstr *
DefMI =
MRI.getUniqueVRegDef(OrigBase);
1344 SUnit *DefSU = getSUnit(
DefMI);
1348 MachineInstr *LastMI =
MRI.getUniqueVRegDef(NewBase);
1351 SUnit *LastSU = getSUnit(LastMI);
1355 if (Topo.IsReachable(&
I, LastSU))
1360 for (
const SDep &
P :
I.Preds)
1361 if (
P.getSUnit() == DefSU)
1363 for (
const SDep &
D : Deps) {
1364 Topo.RemovePred(&
I,
D.getSUnit());
1369 for (
auto &
P : LastSU->
Preds)
1372 for (
const SDep &
D : Deps) {
1373 Topo.RemovePred(LastSU,
D.getSUnit());
1380 Topo.AddPred(LastSU, &
I);
1385 InstrChanges[&
I] = std::make_pair(NewBase, NewOffset);
1396 std::vector<MachineInstr *> &OrderedInsts,
1404 Stage <= LastStage; ++Stage) {
1407 Instrs[
Cycle].push_front(SU);
1414 std::deque<SUnit *> &CycleInstrs = Instrs[
Cycle];
1416 for (
SUnit *SU : CycleInstrs) {
1418 OrderedInsts.push_back(
MI);
1428struct FuncUnitSorter {
1429 const InstrItineraryData *InstrItins;
1430 const MCSubtargetInfo *STI;
1431 DenseMap<InstrStage::FuncUnits, unsigned> Resources;
1433 FuncUnitSorter(
const TargetSubtargetInfo &TSI)
1434 : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
1439 unsigned minFuncUnits(
const MachineInstr *Inst,
1442 unsigned min = UINT_MAX;
1443 if (InstrItins && !InstrItins->
isEmpty()) {
1444 for (
const InstrStage &IS :
1446 InstrItins->
endStage(SchedClass))) {
1449 if (numAlternatives < min) {
1450 min = numAlternatives;
1457 const MCSchedClassDesc *SCDesc =
1464 for (
const MCWriteProcResEntry &PRE :
1467 if (!PRE.ReleaseAtCycle)
1469 const MCProcResourceDesc *ProcResource =
1471 unsigned NumUnits = ProcResource->
NumUnits;
1472 if (NumUnits < min) {
1474 F = PRE.ProcResourceIdx;
1479 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1487 void calcCriticalResources(MachineInstr &
MI) {
1488 unsigned SchedClass =
MI.getDesc().getSchedClass();
1489 if (InstrItins && !InstrItins->
isEmpty()) {
1490 for (
const InstrStage &IS :
1492 InstrItins->
endStage(SchedClass))) {
1495 Resources[FuncUnits]++;
1500 const MCSchedClassDesc *SCDesc =
1507 for (
const MCWriteProcResEntry &PRE :
1510 if (!PRE.ReleaseAtCycle)
1512 Resources[PRE.ProcResourceIdx]++;
1516 llvm_unreachable(
"Should have non-empty InstrItins or hasInstrSchedModel!");
1520 bool operator()(
const MachineInstr *IS1,
const MachineInstr *IS2)
const {
1522 unsigned MFUs1 = minFuncUnits(IS1, F1);
1523 unsigned MFUs2 = minFuncUnits(IS2, F2);
1526 return MFUs1 > MFUs2;
1531class HighRegisterPressureDetector {
1532 MachineBasicBlock *OrigMBB;
1533 const MachineRegisterInfo &
MRI;
1534 const TargetRegisterInfo *
TRI;
1536 const unsigned PSetNum;
1542 std::vector<unsigned> InitSetPressure;
1546 std::vector<unsigned> PressureSetLimit;
1548 DenseMap<MachineInstr *, RegisterOperands> ROMap;
1550 using Instr2LastUsesTy = DenseMap<MachineInstr *, SmallDenseSet<Register, 4>>;
1553 using OrderedInstsTy = std::vector<MachineInstr *>;
1554 using Instr2StageTy = DenseMap<MachineInstr *, unsigned>;
1557 static void dumpRegisterPressures(
const std::vector<unsigned> &Pressures) {
1558 if (Pressures.size() == 0) {
1562 for (
unsigned P : Pressures) {
1573 VirtRegOrUnit VRegOrUnit =
1575 : VirtRegOrUnit(static_cast<MCRegUnit>(
Reg.id()));
1576 for (
auto PSetIter =
MRI.getPressureSets(VRegOrUnit); PSetIter.isValid();
1578 dbgs() << *PSetIter <<
' ';
1583 void increaseRegisterPressure(std::vector<unsigned> &Pressure,
1586 VirtRegOrUnit VRegOrUnit =
1588 : VirtRegOrUnit(static_cast<MCRegUnit>(
Reg.id()));
1589 auto PSetIter =
MRI.getPressureSets(VRegOrUnit);
1590 unsigned Weight = PSetIter.getWeight();
1591 for (; PSetIter.isValid(); ++PSetIter)
1592 Pressure[*PSetIter] += Weight;
1595 void decreaseRegisterPressure(std::vector<unsigned> &Pressure,
1597 auto PSetIter =
MRI.getPressureSets(VirtRegOrUnit(
Reg));
1598 unsigned Weight = PSetIter.getWeight();
1599 for (; PSetIter.isValid(); ++PSetIter) {
1600 auto &
P = Pressure[*PSetIter];
1602 "register pressure must be greater than or equal weight");
1624 void computeLiveIn() {
1625 DenseSet<Register>
Used;
1626 for (
auto &
MI : *OrigMBB) {
1627 if (
MI.isDebugInstr())
1629 for (
auto &Use : ROMap[&
MI].
Uses) {
1632 Use.VRegOrUnit.isVirtualReg()
1633 ?
Use.VRegOrUnit.asVirtualReg()
1634 :
Register(
static_cast<unsigned>(
Use.VRegOrUnit.asMCRegUnit()));
1639 if (isReservedRegister(
Reg))
1641 if (isDefinedInThisLoop(
Reg))
1647 for (
auto LiveIn : Used)
1648 increaseRegisterPressure(InitSetPressure, LiveIn);
1652 void computePressureSetLimit(
const RegisterClassInfo &RCI) {
1653 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1668 Instr2LastUsesTy computeLastUses(
const OrderedInstsTy &OrderedInsts,
1669 Instr2StageTy &Stages)
const {
1674 DenseSet<Register> TargetRegs;
1675 const auto UpdateTargetRegs = [
this, &TargetRegs](
Register Reg) {
1676 if (isDefinedInThisLoop(
Reg))
1679 for (MachineInstr *
MI : OrderedInsts) {
1682 UpdateTargetRegs(
Reg);
1684 for (
auto &Use : ROMap.
find(
MI)->getSecond().Uses) {
1687 ?
Use.VRegOrUnit.asVirtualReg()
1689 Use.VRegOrUnit.asMCRegUnit()));
1690 UpdateTargetRegs(
Reg);
1695 const auto InstrScore = [&Stages](MachineInstr *
MI) {
1696 return Stages[
MI] +
MI->isPHI();
1699 DenseMap<Register, MachineInstr *> LastUseMI;
1701 for (
auto &Use : ROMap.
find(
MI)->getSecond().Uses) {
1704 Use.VRegOrUnit.isVirtualReg()
1705 ?
Use.VRegOrUnit.asVirtualReg()
1706 :
Register(
static_cast<unsigned>(
Use.VRegOrUnit.asMCRegUnit()));
1711 MachineInstr *Orig = Ite->second;
1712 MachineInstr *
New =
MI;
1713 if (InstrScore(Orig) < InstrScore(New))
1719 Instr2LastUsesTy LastUses;
1720 for (
auto [
Reg,
MI] : LastUseMI)
1721 LastUses[
MI].insert(
Reg);
1737 std::vector<unsigned>
1738 computeMaxSetPressure(
const OrderedInstsTy &OrderedInsts,
1739 Instr2StageTy &Stages,
1740 const unsigned StageCount)
const {
1741 using RegSetTy = SmallDenseSet<Register, 16>;
1747 auto CurSetPressure = InitSetPressure;
1748 auto MaxSetPressure = InitSetPressure;
1749 auto LastUses = computeLastUses(OrderedInsts, Stages);
1752 dbgs() <<
"Ordered instructions:\n";
1753 for (MachineInstr *
MI : OrderedInsts) {
1754 dbgs() <<
"Stage " << Stages[
MI] <<
": ";
1759 const auto InsertReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1760 VirtRegOrUnit VRegOrUnit) {
1774 increaseRegisterPressure(CurSetPressure,
Reg);
1778 const auto EraseReg = [
this, &CurSetPressure](RegSetTy &RegSet,
1784 if (!RegSet.contains(
Reg))
1789 decreaseRegisterPressure(CurSetPressure,
Reg);
1793 for (
unsigned I = 0;
I < StageCount;
I++) {
1794 for (MachineInstr *
MI : OrderedInsts) {
1795 const auto Stage = Stages[
MI];
1799 const unsigned Iter =
I - Stage;
1801 for (
auto &Def : ROMap.
find(
MI)->getSecond().Defs)
1802 InsertReg(LiveRegSets[Iter],
Def.VRegOrUnit);
1804 for (
auto LastUse : LastUses[
MI]) {
1807 EraseReg(LiveRegSets[Iter - 1], LastUse);
1809 EraseReg(LiveRegSets[Iter], LastUse);
1813 for (
unsigned PSet = 0; PSet < PSetNum; PSet++)
1814 MaxSetPressure[PSet] =
1815 std::max(MaxSetPressure[PSet], CurSetPressure[PSet]);
1818 dbgs() <<
"CurSetPressure=";
1819 dumpRegisterPressures(CurSetPressure);
1820 dbgs() <<
" iter=" << Iter <<
" stage=" << Stage <<
":";
1826 return MaxSetPressure;
1830 HighRegisterPressureDetector(MachineBasicBlock *OrigMBB,
1831 const MachineFunction &MF)
1832 : OrigMBB(OrigMBB),
MRI(MF.getRegInfo()),
1833 TRI(MF.getSubtarget().getRegisterInfo()),
1834 PSetNum(
TRI->getNumRegPressureSets()), InitSetPressure(PSetNum, 0),
1835 PressureSetLimit(PSetNum, 0) {}
1839 void init(
const RegisterClassInfo &RCI) {
1840 for (MachineInstr &
MI : *OrigMBB) {
1841 if (
MI.isDebugInstr())
1843 ROMap[&
MI].collect(
MI, *
TRI,
MRI,
false,
true);
1847 computePressureSetLimit(RCI);
1852 bool detect(
const SwingSchedulerDAG *SSD, SMSchedule &Schedule,
1853 const unsigned MaxStage)
const {
1855 "the percentage of the margin must be between 0 to 100");
1857 OrderedInstsTy OrderedInsts;
1858 Instr2StageTy Stages;
1860 const auto MaxSetPressure =
1861 computeMaxSetPressure(OrderedInsts, Stages, MaxStage + 1);
1864 dbgs() <<
"Dump MaxSetPressure:\n";
1865 for (
unsigned I = 0;
I < MaxSetPressure.size();
I++) {
1866 dbgs() <<
format(
"MaxSetPressure[%d]=%d\n",
I, MaxSetPressure[
I]);
1871 for (
unsigned PSet = 0; PSet < PSetNum; PSet++) {
1872 unsigned Limit = PressureSetLimit[PSet];
1875 <<
" Margin=" << Margin <<
"\n");
1876 if (Limit < MaxSetPressure[PSet] + Margin) {
1879 <<
"Rejected the schedule because of too high register pressure\n");
1895unsigned SwingSchedulerDAG::calculateResMII() {
1897 ResourceManager
RM(&MF.getSubtarget(),
this);
1898 return RM.calculateResMII();
1907unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1908 unsigned RecMII = 0;
1910 for (NodeSet &Nodes : NodeSets) {
1914 unsigned Delay = Nodes.getLatency();
1915 unsigned Distance = 1;
1918 unsigned CurMII = (Delay + Distance - 1) / Distance;
1919 Nodes.setRecMII(CurMII);
1920 if (CurMII > RecMII)
1928void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1929 SwingSchedulerDAG *DAG) {
1930 BitVector
Added(SUnits.size());
1931 DenseMap<int, int> OutputDeps;
1932 for (
int i = 0, e = SUnits.size(); i != e; ++i) {
1935 for (
auto &OE : DAG->DDG->getOutEdges(&SUnits[i])) {
1938 if (OE.isOutputDep()) {
1939 int N = OE.getDst()->NodeNum;
1941 auto Dep = OutputDeps.
find(BackEdge);
1942 if (Dep != OutputDeps.
end()) {
1943 BackEdge = Dep->second;
1944 OutputDeps.
erase(Dep);
1946 OutputDeps[
N] = BackEdge;
1949 if (OE.getDst()->isBoundaryNode() || OE.isArtificial())
1961 int N = OE.getDst()->NodeNum;
1963 AdjK[i].push_back(
N);
1969 for (
auto &IE : DAG->DDG->getInEdges(&SUnits[i])) {
1970 SUnit *Src =
IE.getSrc();
1971 SUnit *Dst =
IE.getDst();
1974 if (
IE.isOrderDep() && Src->getInstr()->mayLoad()) {
1975 int N = Src->NodeNum;
1977 AdjK[i].push_back(
N);
1984 for (
auto &OD : OutputDeps)
1985 if (!
Added.test(OD.second)) {
1986 AdjK[OD.first].push_back(OD.second);
1987 Added.set(OD.second);
1993bool SwingSchedulerDAG::Circuits::circuit(
int V,
int S, NodeSetType &NodeSets,
1994 const SwingSchedulerDAG *DAG,
1996 SUnit *SV = &SUnits[
V];
2001 for (
auto W : AdjK[V]) {
2002 if (NumPaths > MaxPaths)
2013 if (!Blocked.test(W)) {
2014 if (circuit(W, S, NodeSets, DAG,
2015 Node2Idx->at(W) < Node2Idx->at(V) ?
true : HasBackedge))
2023 for (
auto W : AdjK[V]) {
2034void SwingSchedulerDAG::Circuits::unblock(
int U) {
2036 SmallPtrSet<SUnit *, 4> &BU =
B[
U];
2037 while (!BU.
empty()) {
2038 SmallPtrSet<SUnit *, 4>::iterator
SI = BU.
begin();
2039 assert(SI != BU.
end() &&
"Invalid B set.");
2042 if (Blocked.test(
W->NodeNum))
2043 unblock(
W->NodeNum);
2049void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
2050 Circuits Cir(SUnits, Topo);
2052 Cir.createAdjacencyStructure(
this);
2053 for (
int I = 0,
E = SUnits.size();
I !=
E; ++
I) {
2055 Cir.circuit(
I,
I, NodeSets,
this);
2077void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
2078 for (SUnit &SU : DAG->
SUnits) {
2088 for (
auto &Dep : SU.
Preds) {
2089 SUnit *TmpSU = Dep.getSUnit();
2090 MachineInstr *TmpMI = TmpSU->
getInstr();
2101 if (PHISUs.
size() == 0 || SrcSUs.
size() == 0)
2109 for (
auto &Dep : PHISUs[Index]->Succs) {
2113 SUnit *TmpSU = Dep.getSUnit();
2114 MachineInstr *TmpMI = TmpSU->
getInstr();
2123 if (UseSUs.
size() == 0)
2128 for (
auto *
I : UseSUs) {
2129 for (
auto *Src : SrcSUs) {
2145void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
2146 ScheduleInfo.resize(SUnits.size());
2149 for (
int I : Topo) {
2150 const SUnit &SU = SUnits[
I];
2157 for (
int I : Topo) {
2159 int zeroLatencyDepth = 0;
2160 SUnit *SU = &SUnits[
I];
2161 for (
const auto &IE : DDG->getInEdges(SU)) {
2162 SUnit *Pred =
IE.getSrc();
2163 if (
IE.getLatency() == 0)
2165 std::max(zeroLatencyDepth, getZeroLatencyDepth(Pred) + 1);
2166 if (
IE.ignoreDependence(
true))
2168 asap = std::max(asap, (
int)(getASAP(Pred) +
IE.getLatency() -
2169 IE.getDistance() * MII));
2171 maxASAP = std::max(maxASAP, asap);
2172 ScheduleInfo[
I].ASAP = asap;
2173 ScheduleInfo[
I].ZeroLatencyDepth = zeroLatencyDepth;
2179 int zeroLatencyHeight = 0;
2180 SUnit *SU = &SUnits[
I];
2181 for (
const auto &OE : DDG->getOutEdges(SU)) {
2182 SUnit *Succ = OE.getDst();
2185 if (OE.getLatency() == 0)
2187 std::max(zeroLatencyHeight, getZeroLatencyHeight(Succ) + 1);
2188 if (OE.ignoreDependence(
true))
2190 alap = std::min(alap, (
int)(getALAP(Succ) - OE.getLatency() +
2191 OE.getDistance() * MII));
2194 ScheduleInfo[
I].ALAP = alap;
2195 ScheduleInfo[
I].ZeroLatencyHeight = zeroLatencyHeight;
2199 for (NodeSet &
I : NodeSets)
2200 I.computeNodeSetInfo(
this);
2203 for (
unsigned i = 0; i < SUnits.size(); i++) {
2204 dbgs() <<
"\tNode " << i <<
":\n";
2205 dbgs() <<
"\t ASAP = " << getASAP(&SUnits[i]) <<
"\n";
2206 dbgs() <<
"\t ALAP = " << getALAP(&SUnits[i]) <<
"\n";
2207 dbgs() <<
"\t MOV = " << getMOV(&SUnits[i]) <<
"\n";
2208 dbgs() <<
"\t D = " << getDepth(&SUnits[i]) <<
"\n";
2209 dbgs() <<
"\t H = " << getHeight(&SUnits[i]) <<
"\n";
2210 dbgs() <<
"\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) <<
"\n";
2211 dbgs() <<
"\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) <<
"\n";
2226 SUnit *PredSU = IE.getSrc();
2227 if (S && S->count(PredSU) == 0)
2229 if (IE.ignoreDependence(
true))
2240 SUnit *SuccSU = OE.getDst();
2241 if (!OE.isAntiDep())
2243 if (S && S->count(SuccSU) == 0)
2249 return !Preds.
empty();
2262 SUnit *SuccSU = OE.getDst();
2263 if (S && S->count(SuccSU) == 0)
2265 if (OE.ignoreDependence(
false))
2276 SUnit *PredSU = IE.getSrc();
2277 if (!IE.isAntiDep())
2279 if (S && S->count(PredSU) == 0)
2285 return !Succs.
empty();
2301 if (!Visited.
insert(Cur).second)
2302 return Path.contains(Cur);
2303 bool FoundPath =
false;
2305 if (!OE.ignoreDependence(
false))
2307 computePath(OE.getDst(), Path, DestNodes, Exclude, Visited, DDG);
2309 if (IE.isAntiDep() && IE.getDistance() == 0)
2311 computePath(IE.getSrc(), Path, DestNodes, Exclude, Visited, DDG);
2326 for (
SUnit *SU : NS) {
2332 if (
Reg.isVirtual())
2334 else if (
MRI.isAllocatable(
Reg))
2335 for (MCRegUnit Unit :
TRI->regunits(
Reg.asMCReg()))
2339 for (
SUnit *SU : NS)
2343 if (
Reg.isVirtual()) {
2347 }
else if (
MRI.isAllocatable(
Reg)) {
2348 for (MCRegUnit Unit :
TRI->regunits(
Reg.asMCReg()))
2359void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
2360 for (
auto &NS : NodeSets) {
2364 IntervalPressure RecRegPressure;
2365 RegPressureTracker RecRPTracker(RecRegPressure);
2366 RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(),
false,
true);
2368 RecRPTracker.closeBottom();
2370 std::vector<SUnit *> SUnits(NS.begin(), NS.end());
2371 llvm::sort(SUnits, [](
const SUnit *
A,
const SUnit *
B) {
2372 return A->NodeNum >
B->NodeNum;
2375 for (
auto &SU : SUnits) {
2381 RecRPTracker.setPos(std::next(CurInstI));
2383 RegPressureDelta RPDelta;
2385 RecRPTracker.getMaxUpwardPressureDelta(SU->
getInstr(),
nullptr, RPDelta,
2390 dbgs() <<
"Excess register pressure: SU(" << SU->
NodeNum <<
") "
2393 NS.setExceedPressure(SU);
2396 RecRPTracker.recede();
2403void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2404 unsigned Colocate = 0;
2405 for (
int i = 0, e = NodeSets.size(); i < e; ++i) {
2407 SmallSetVector<SUnit *, 8>
S1;
2410 for (
int j = i + 1;
j <
e; ++
j) {
2414 SmallSetVector<SUnit *, 8> S2;
2431void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2436 for (
auto &NS : NodeSets) {
2437 if (NS.getRecMII() > 2)
2439 if (NS.getMaxDepth() > MII)
2448void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2449 SetVector<SUnit *> NodesAdded;
2450 SmallPtrSet<SUnit *, 8> Visited;
2453 for (NodeSet &
I : NodeSets) {
2454 SmallSetVector<SUnit *, 8>
N;
2457 SetVector<SUnit *>
Path;
2458 for (SUnit *NI :
N) {
2460 computePath(NI, Path, NodesAdded,
I, Visited, DDG.get());
2467 if (
succ_L(NodesAdded,
N, DDG.get())) {
2468 SetVector<SUnit *>
Path;
2469 for (SUnit *NI :
N) {
2471 computePath(NI, Path,
I, NodesAdded, Visited, DDG.get());
2482 SmallSetVector<SUnit *, 8>
N;
2483 if (
succ_L(NodesAdded,
N, DDG.get()))
2485 addConnectedNodes(
I, NewSet, NodesAdded);
2486 if (!NewSet.
empty())
2487 NodeSets.push_back(NewSet);
2492 if (
pred_L(NodesAdded,
N, DDG.get()))
2494 addConnectedNodes(
I, NewSet, NodesAdded);
2495 if (!NewSet.
empty())
2496 NodeSets.push_back(NewSet);
2500 for (SUnit &SU : SUnits) {
2501 if (NodesAdded.
count(&SU) == 0) {
2503 addConnectedNodes(&SU, NewSet, NodesAdded);
2504 if (!NewSet.
empty())
2505 NodeSets.push_back(NewSet);
2511void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
2512 SetVector<SUnit *> &NodesAdded) {
2515 for (
auto &OE : DDG->getOutEdges(SU)) {
2517 if (!OE.isArtificial() && !
Successor->isBoundaryNode() &&
2519 addConnectedNodes(
Successor, NewSet, NodesAdded);
2521 for (
auto &IE : DDG->getInEdges(SU)) {
2522 SUnit *Predecessor =
IE.getSrc();
2523 if (!
IE.isArtificial() && NodesAdded.
count(Predecessor) == 0)
2524 addConnectedNodes(Predecessor, NewSet, NodesAdded);
2533 for (
SUnit *SU : Set1) {
2534 if (Set2.
count(SU) != 0)
2537 return !Result.empty();
2541void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2542 for (NodeSetType::iterator
I = NodeSets.begin(),
E = NodeSets.end();
I !=
E;
2545 for (NodeSetType::iterator J =
I + 1; J !=
E;) {
2550 for (SUnit *SU : *J)
2562void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2563 for (NodeSetType::iterator
I = NodeSets.begin(),
E = NodeSets.end();
I !=
E;
2565 for (NodeSetType::iterator J =
I + 1; J !=
E;) {
2566 J->remove_if([&](SUnit *SUJ) {
return I->count(SUJ); });
2581void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2582 SmallSetVector<SUnit *, 8>
R;
2585 for (
auto &Nodes : NodeSets) {
2588 SmallSetVector<SUnit *, 8>
N;
2603 }
else if (NodeSets.size() == 1) {
2604 for (
const auto &
N : Nodes)
2605 if (
N->Succs.size() == 0)
2611 SUnit *maxASAP =
nullptr;
2612 for (SUnit *SU : Nodes) {
2613 if (maxASAP ==
nullptr || getASAP(SU) > getASAP(maxASAP) ||
2614 (getASAP(SU) == getASAP(maxASAP) && SU->
NodeNum > maxASAP->
NodeNum))
2622 while (!
R.empty()) {
2623 if (Order == TopDown) {
2627 while (!
R.empty()) {
2628 SUnit *maxHeight =
nullptr;
2629 for (SUnit *
I : R) {
2630 if (maxHeight ==
nullptr || getHeight(
I) > getHeight(maxHeight))
2632 else if (getHeight(
I) == getHeight(maxHeight) &&
2633 getZeroLatencyHeight(
I) > getZeroLatencyHeight(maxHeight))
2635 else if (getHeight(
I) == getHeight(maxHeight) &&
2636 getZeroLatencyHeight(
I) ==
2637 getZeroLatencyHeight(maxHeight) &&
2638 getMOV(
I) < getMOV(maxHeight))
2643 R.remove(maxHeight);
2644 for (
const auto &OE : DDG->getOutEdges(maxHeight)) {
2645 SUnit *SU = OE.getDst();
2646 if (Nodes.count(SU) == 0)
2650 if (OE.ignoreDependence(
false))
2659 for (
const auto &IE : DDG->getInEdges(maxHeight)) {
2660 SUnit *SU =
IE.getSrc();
2661 if (!
IE.isAntiDep())
2663 if (Nodes.count(SU) == 0)
2672 SmallSetVector<SUnit *, 8>
N;
2679 while (!
R.empty()) {
2680 SUnit *maxDepth =
nullptr;
2681 for (SUnit *
I : R) {
2682 if (maxDepth ==
nullptr || getDepth(
I) > getDepth(maxDepth))
2684 else if (getDepth(
I) == getDepth(maxDepth) &&
2685 getZeroLatencyDepth(
I) > getZeroLatencyDepth(maxDepth))
2687 else if (getDepth(
I) == getDepth(maxDepth) &&
2688 getZeroLatencyDepth(
I) == getZeroLatencyDepth(maxDepth) &&
2689 getMOV(
I) < getMOV(maxDepth))
2695 if (Nodes.isExceedSU(maxDepth)) {
2698 R.insert(Nodes.getNode(0));
2701 for (
const auto &IE : DDG->getInEdges(maxDepth)) {
2702 SUnit *SU =
IE.getSrc();
2703 if (Nodes.count(SU) == 0)
2714 for (
const auto &OE : DDG->getOutEdges(maxDepth)) {
2715 SUnit *SU = OE.getDst();
2716 if (!OE.isAntiDep())
2718 if (Nodes.count(SU) == 0)
2727 SmallSetVector<SUnit *, 8>
N;
2736 dbgs() <<
"Node order: ";
2738 dbgs() <<
" " <<
I->NodeNum <<
" ";
2745bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2752 bool scheduleFound =
false;
2753 std::unique_ptr<HighRegisterPressureDetector> HRPDetector;
2756 std::make_unique<HighRegisterPressureDetector>(Loop.getHeader(), MF);
2757 HRPDetector->init(RegClassInfo);
2760 for (
unsigned II = MII;
II <= MAX_II && !scheduleFound; ++
II) {
2772 int EarlyStart = INT_MIN;
2773 int LateStart = INT_MAX;
2782 dbgs() <<
format(
"\tes: %8x ls: %8x\n", EarlyStart, LateStart));
2784 if (EarlyStart > LateStart)
2785 scheduleFound =
false;
2786 else if (EarlyStart != INT_MIN && LateStart == INT_MAX)
2788 Schedule.
insert(SU, EarlyStart, EarlyStart + (
int)
II - 1,
II);
2789 else if (EarlyStart == INT_MIN && LateStart != INT_MAX)
2791 Schedule.
insert(SU, LateStart, LateStart - (
int)
II + 1,
II);
2792 else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2793 LateStart = std::min(LateStart, EarlyStart + (
int)
II - 1);
2802 scheduleFound = Schedule.
insert(SU, LateStart, EarlyStart,
II);
2804 scheduleFound = Schedule.
insert(SU, EarlyStart, LateStart,
II);
2807 scheduleFound = Schedule.
insert(SU, FirstCycle + getASAP(SU),
2808 FirstCycle + getASAP(SU) +
II - 1,
II);
2816 scheduleFound =
false;
2820 dbgs() <<
"\tCan't schedule\n";
2822 }
while (++NI != NE && scheduleFound);
2827 scheduleFound = DDG->isValidSchedule(Schedule);
2849 if (scheduleFound) {
2850 scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*
this, Schedule);
2855 if (scheduleFound) {
2857 Pass.ORE->emit([&]() {
2858 return MachineOptimizationRemarkAnalysis(
2859 DEBUG_TYPE,
"schedule", Loop.getStartLoc(), Loop.getHeader())
2860 <<
"Schedule found with Initiation Interval: "
2862 <<
", MaxStageCount: "
2876 if (!
Reg.isVirtual())
2878 if (
MRI.getVRegDef(
Reg)->getParent() !=
MI.getParent())
2890 if (!
Op.isReg() || !
Op.getReg().isVirtual())
2918 if (Def->getParent() != LoopBB)
2921 if (Def->isCopy()) {
2923 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
2925 CurReg = Def->getOperand(1).getReg();
2926 }
else if (Def->isPHI()) {
2932 }
else if (
TII->getIncrementValue(*Def,
Value)) {
2940 bool OffsetIsScalable;
2941 if (
TII->getMemOperandWithOffset(*Def, BaseOp,
Offset, OffsetIsScalable,
2944 CurReg = BaseOp->
getReg();
2956 if (CurReg == OrgReg)
2968bool SwingSchedulerDAG::computeDelta(
const MachineInstr &
MI,
int &Delta)
const {
2969 const TargetRegisterInfo *
TRI = MF.getSubtarget().getRegisterInfo();
2970 const MachineOperand *BaseOp;
2972 bool OffsetIsScalable;
2973 if (!
TII->getMemOperandWithOffset(
MI, BaseOp,
Offset, OffsetIsScalable,
TRI))
2977 if (OffsetIsScalable)
2980 if (!BaseOp->
isReg())
2993bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *
MI,
2995 unsigned &OffsetPos,
3001 unsigned BasePosLd, OffsetPosLd;
3007 MachineRegisterInfo &
MRI =
MI->getMF()->getRegInfo();
3008 MachineInstr *
Phi =
MRI.getVRegDef(BaseReg);
3009 if (!Phi || !
Phi->isPHI())
3017 MachineInstr *PrevDef =
MRI.getVRegDef(PrevReg);
3018 if (!PrevDef || PrevDef ==
MI)
3024 unsigned BasePos1 = 0, OffsetPos1 = 0;
3030 int64_t LoadOffset =
MI->getOperand(OffsetPosLd).getImm();
3032 MachineInstr *NewMI = MF.CloneMachineInstr(
MI);
3035 MF.deleteMachineInstr(NewMI);
3040 BasePos = BasePosLd;
3041 OffsetPos = OffsetPosLd;
3053 InstrChanges.find(SU);
3054 if (It != InstrChanges.
end()) {
3055 std::pair<Register, int64_t> RegAndOffset = It->second;
3056 unsigned BasePos, OffsetPos;
3057 if (!
TII->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
3059 Register BaseReg =
MI->getOperand(BasePos).getReg();
3065 if (BaseStageNum < DefStageNum) {
3067 int OffsetDiff = DefStageNum - BaseStageNum;
3068 if (DefCycleNum < BaseCycleNum) {
3074 MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
3089 while (Def->isPHI()) {
3090 if (!Visited.
insert(Def).second)
3092 for (
unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
3093 if (Def->getOperand(i + 1).getMBB() == BB) {
3094 Def =
MRI.getVRegDef(Def->getOperand(i).getReg());
3105 int DeltaB, DeltaO, Delta;
3112 int64_t OffsetB, OffsetO;
3113 bool OffsetBIsScalable, OffsetOIsScalable;
3115 if (!
TII->getMemOperandWithOffset(*BaseMI, BaseOpB, OffsetB,
3116 OffsetBIsScalable,
TRI) ||
3117 !
TII->getMemOperandWithOffset(*OtherMI, BaseOpO, OffsetO,
3118 OffsetOIsScalable,
TRI))
3121 if (OffsetBIsScalable || OffsetOIsScalable)
3131 if (!RegB.
isVirtual() || !RegO.isVirtual())
3136 if (!DefB || !DefO || !DefB->
isPHI() || !DefO->
isPHI())
3161 dbgs() <<
"Overlap check:\n";
3162 dbgs() <<
" BaseMI: ";
3164 dbgs() <<
" Base + " << OffsetB <<
" + I * " << Delta
3165 <<
", Len: " << AccessSizeB.
getValue() <<
"\n";
3166 dbgs() <<
" OtherMI: ";
3168 dbgs() <<
" Base + " << OffsetO <<
" + I * " << Delta
3169 <<
", Len: " << AccessSizeO.
getValue() <<
"\n";
3177 int64_t BaseMinAddr = OffsetB;
3178 int64_t OhterNextIterMaxAddr = OffsetO + Delta + AccessSizeO.
getValue() - 1;
3179 if (BaseMinAddr > OhterNextIterMaxAddr) {
3184 int64_t BaseMaxAddr = OffsetB + AccessSizeB.
getValue() - 1;
3185 int64_t OtherNextIterMinAddr = OffsetO + Delta;
3186 if (BaseMaxAddr < OtherNextIterMinAddr) {
3200 if ((!Edge.isOrderDep() && !Edge.isOutputDep()) || Edge.isArtificial() ||
3201 Edge.getDst()->isBoundaryNode())
3207 if (Edge.isOutputDep())
3212 assert(
SI !=
nullptr && DI !=
nullptr &&
"Expecting SUnit with an MI.");
3229void SwingSchedulerDAG::postProcessDAG() {
3230 for (
auto &M : Mutations)
3240 bool forward =
true;
3242 dbgs() <<
"Trying to insert node between " << StartCycle <<
" and "
3243 << EndCycle <<
" II: " <<
II <<
"\n";
3245 if (StartCycle > EndCycle)
3249 int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3250 for (
int curCycle = StartCycle; curCycle != termCycle;
3251 forward ? ++curCycle : --curCycle) {
3254 ProcItinResources.canReserveResources(*SU, curCycle)) {
3256 dbgs() <<
"\tinsert at cycle " << curCycle <<
" ";
3261 ProcItinResources.reserveResources(*SU, curCycle);
3262 ScheduledInstrs[curCycle].push_back(SU);
3263 InstrToCycle.insert(std::make_pair(SU, curCycle));
3264 if (curCycle > LastCycle)
3265 LastCycle = curCycle;
3266 if (curCycle < FirstCycle)
3267 FirstCycle = curCycle;
3271 dbgs() <<
"\tfailed to insert at cycle " << curCycle <<
" ";
3284 int EarlyCycle = INT_MAX;
3285 while (!Worklist.
empty()) {
3288 if (Visited.
count(PrevSU))
3290 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3291 if (it == InstrToCycle.end())
3293 EarlyCycle = std::min(EarlyCycle, it->second);
3294 for (
const auto &IE : DDG->
getInEdges(PrevSU))
3295 if (IE.isOrderDep() || IE.isOutputDep())
3308 int LateCycle = INT_MIN;
3309 while (!Worklist.
empty()) {
3314 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3315 if (it == InstrToCycle.end())
3317 LateCycle = std::max(LateCycle, it->second);
3319 if (OE.isOrderDep() || OE.isOutputDep())
3330 for (
auto &
P : SU->
Preds)
3331 if (
P.getKind() ==
SDep::Anti &&
P.getSUnit()->getInstr()->isPHI())
3332 for (
auto &S :
P.getSUnit()->Succs)
3333 if (S.getKind() ==
SDep::Data && S.getSUnit()->getInstr()->isPHI())
3334 return P.getSUnit();
3347 for (
int cycle =
getFirstCycle(); cycle <= LastCycle; ++cycle) {
3350 if (IE.getSrc() ==
I) {
3355 *MinLateStart = std::min(*MinLateStart, End);
3357 int EarlyStart = cycle + IE.getLatency() - IE.getDistance() *
II;
3358 *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3363 if (OE.getDst() ==
I) {
3368 *MaxEarlyStart = std::max(*MaxEarlyStart, Start);
3370 int LateStart = cycle - OE.getLatency() + OE.getDistance() *
II;
3371 *MinLateStart = std::min(*MinLateStart, LateStart);
3376 for (
const auto &Dep : SU->
Preds) {
3379 if (BE && Dep.getSUnit() == BE && !SU->
getInstr()->
isPHI() &&
3381 *MinLateStart = std::min(*MinLateStart, cycle);
3391 std::deque<SUnit *> &Insts)
const {
3393 bool OrderBeforeUse =
false;
3394 bool OrderAfterDef =
false;
3395 bool OrderBeforeDef =
false;
3396 unsigned MoveDef = 0;
3397 unsigned MoveUse = 0;
3402 for (std::deque<SUnit *>::iterator
I = Insts.begin(), E = Insts.end();
I != E;
3405 if (!MO.isReg() || !MO.getReg().isVirtual())
3409 unsigned BasePos, OffsetPos;
3410 if (ST.getInstrInfo()->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos))
3411 if (
MI->getOperand(BasePos).getReg() == Reg)
3415 std::tie(Reads, Writes) =
3416 (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3418 OrderBeforeUse =
true;
3423 OrderAfterDef =
true;
3425 }
else if (MO.isUse() && Writes &&
stageScheduled(*
I) == StageInst1) {
3427 OrderBeforeUse =
true;
3431 OrderAfterDef =
true;
3435 OrderBeforeUse =
true;
3439 OrderAfterDef =
true;
3444 OrderBeforeUse =
true;
3450 OrderBeforeDef =
true;
3458 if (OE.getDst() != *
I)
3461 OrderBeforeUse =
true;
3468 else if ((OE.isAntiDep() || OE.isOutputDep()) &&
3470 OrderBeforeUse =
true;
3471 if ((MoveUse == 0) || (Pos < MoveUse))
3476 if (IE.getSrc() != *
I)
3478 if ((IE.isAntiDep() || IE.isOutputDep() || IE.isOrderDep()) &&
3480 OrderAfterDef =
true;
3487 if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3488 OrderBeforeUse =
false;
3493 OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3497 if (OrderBeforeUse && OrderAfterDef) {
3498 SUnit *UseSU = Insts.at(MoveUse);
3499 SUnit *DefSU = Insts.at(MoveDef);
3500 if (MoveUse > MoveDef) {
3501 Insts.erase(Insts.begin() + MoveUse);
3502 Insts.erase(Insts.begin() + MoveDef);
3504 Insts.erase(Insts.begin() + MoveDef);
3505 Insts.erase(Insts.begin() + MoveUse);
3515 Insts.push_front(SU);
3517 Insts.push_back(SU);
3525 assert(Phi.isPHI() &&
"Expecting a Phi.");
3532 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3540 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3558 if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3564 if (DMO.getReg() == LoopReg)
3575 if (InstrToCycle.count(IE.getSrc()))
3586 for (
auto &SU : SSD->
SUnits)
3591 while (!Worklist.
empty()) {
3593 if (DoNotPipeline.
count(SU))
3596 DoNotPipeline.
insert(SU);
3603 if (OE.getDistance() == 1)
3606 return DoNotPipeline;
3615 int NewLastCycle = INT_MIN;
3620 NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
3627 if (IE.getDistance() == 0)
3628 NewCycle = std::max(InstrToCycle[IE.getSrc()], NewCycle);
3633 if (OE.getDistance() == 1)
3634 NewCycle = std::max(InstrToCycle[OE.getDst()], NewCycle);
3636 int OldCycle = InstrToCycle[&SU];
3637 if (OldCycle != NewCycle) {
3638 InstrToCycle[&SU] = NewCycle;
3643 <<
") is not pipelined; moving from cycle " << OldCycle
3644 <<
" to " << NewCycle <<
" Instr:" << *SU.
getInstr());
3669 if (FirstCycle + InitiationInterval <= NewCycle)
3672 NewLastCycle = std::max(NewLastCycle, NewCycle);
3674 LastCycle = NewLastCycle;
3691 int CycleDef = InstrToCycle[&SU];
3692 assert(StageDef != -1 &&
"Instruction should have been scheduled.");
3694 SUnit *Dst = OE.getDst();
3695 if (OE.isAssignedRegDep() && !Dst->isBoundaryNode())
3696 if (OE.getReg().isPhysical()) {
3699 if (InstrToCycle[Dst] <= CycleDef)
3717void SwingSchedulerDAG::checkValidNodeOrder(
const NodeSetType &Circuits)
const {
3720 typedef std::pair<SUnit *, unsigned> UnitIndex;
3721 std::vector<UnitIndex> Indices(
NodeOrder.size(), std::make_pair(
nullptr, 0));
3723 for (
unsigned i = 0, s =
NodeOrder.size(); i < s; ++i)
3724 Indices.push_back(std::make_pair(
NodeOrder[i], i));
3726 auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3727 return std::get<0>(i1) < std::get<0>(i2);
3740 for (
unsigned i = 0, s =
NodeOrder.size(); i < s; ++i) {
3744 bool PredBefore =
false;
3745 bool SuccBefore =
false;
3752 for (
const auto &IE : DDG->getInEdges(SU)) {
3753 SUnit *PredSU = IE.getSrc();
3754 unsigned PredIndex = std::get<1>(
3763 for (
const auto &OE : DDG->getOutEdges(SU)) {
3764 SUnit *SuccSU = OE.getDst();
3770 unsigned SuccIndex = std::get<1>(
3783 Circuits, [SU](
const NodeSet &Circuit) {
return Circuit.
count(SU); });
3788 NumNodeOrderIssues++;
3792 <<
" are scheduled before node " << SU->
NodeNum
3799 dbgs() <<
"Invalid node order found!\n";
3812 for (
SUnit *SU : Instrs) {
3814 for (
unsigned i = 0, e =
MI->getNumOperands(); i < e; ++i) {
3822 InstrChanges.find(SU);
3823 if (It != InstrChanges.
end()) {
3824 unsigned BasePos, OffsetPos;
3826 if (
TII->getBaseAndOffsetPosition(*
MI, BasePos, OffsetPos)) {
3830 MI->getOperand(OffsetPos).getImm() - It->second.second;
3843 unsigned TiedUseIdx = 0;
3844 if (
MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3846 OverlapReg =
MI->getOperand(TiedUseIdx).getReg();
3848 NewBaseReg =
MI->getOperand(i).getReg();
3857 const std::deque<SUnit *> &Instrs)
const {
3858 std::deque<SUnit *> NewOrderPhi;
3859 for (
SUnit *SU : Instrs) {
3861 NewOrderPhi.push_back(SU);
3863 std::deque<SUnit *> NewOrderI;
3864 for (
SUnit *SU : Instrs) {
3880 std::deque<SUnit *> &cycleInstrs =
3881 ScheduledInstrs[cycle + (stage * InitiationInterval)];
3883 ScheduledInstrs[cycle].push_front(SU);
3889 for (
int cycle =
getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3890 ScheduledInstrs.erase(cycle);
3900 std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[
Cycle];
3909 os <<
"Num nodes " <<
size() <<
" rec " << RecMII <<
" mov " << MaxMOV
3910 <<
" depth " << MaxDepth <<
" col " << Colocate <<
"\n";
3911 for (
const auto &
I : Nodes)
3912 os <<
" SU(" <<
I->NodeNum <<
") " << *(
I->getInstr());
3916#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3923 for (
SUnit *CI : cycleInstrs->second) {
3925 os <<
"(" << CI->
NodeNum <<
") ";
3936void ResourceManager::dumpMRT()
const {
3940 std::stringstream SS;
3942 SS << std::setw(4) <<
"Slot";
3943 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I)
3944 SS << std::setw(3) <<
I;
3945 SS << std::setw(7) <<
"#Mops"
3947 for (
int Slot = 0; Slot < InitiationInterval; ++Slot) {
3948 SS << std::setw(4) << Slot;
3949 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I)
3950 SS << std::setw(3) << MRT[Slot][
I];
3951 SS << std::setw(7) << NumScheduledMops[Slot] <<
"\n";
3960 unsigned ProcResourceID = 0;
3964 assert(SM.getNumProcResourceKinds() < 64 &&
3965 "Too many kinds of resources, unsupported");
3968 Masks.
resize(SM.getNumProcResourceKinds());
3969 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3971 if (
Desc.SubUnitsIdxBegin)
3973 Masks[
I] = 1ULL << ProcResourceID;
3977 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3979 if (!
Desc.SubUnitsIdxBegin)
3981 Masks[
I] = 1ULL << ProcResourceID;
3982 for (
unsigned U = 0; U <
Desc.NumUnits; ++U)
3983 Masks[
I] |= Masks[
Desc.SubUnitsIdxBegin[U]];
3988 dbgs() <<
"ProcResourceDesc:\n";
3989 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
3991 dbgs() <<
format(
" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3992 ProcResource->
Name,
I, Masks[
I],
3995 dbgs() <<
" -----------------\n";
4003 dbgs() <<
"canReserveResources:\n";
4006 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
4012 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
4018 reserveResources(SCDesc,
Cycle);
4019 bool Result = !isOverbooked();
4020 unreserveResources(SCDesc,
Cycle);
4029 dbgs() <<
"reserveResources:\n";
4032 return DFAResources[positiveModulo(
Cycle, InitiationInterval)]
4038 dbgs() <<
"No valid Schedule Class Desc for schedClass!\n";
4044 reserveResources(SCDesc,
Cycle);
4049 dbgs() <<
"reserveResources: done!\n\n";
4060 ++MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
4063 ++NumScheduledMops[positiveModulo(
C, InitiationInterval)];
4072 --MRT[positiveModulo(
C, InitiationInterval)][PRE.ProcResourceIdx];
4075 --NumScheduledMops[positiveModulo(
C, InitiationInterval)];
4078bool ResourceManager::isOverbooked()
const {
4080 for (
int Slot = 0;
Slot < InitiationInterval; ++
Slot) {
4081 for (
unsigned I = 1,
E = SM.getNumProcResourceKinds();
I <
E; ++
I) {
4082 const MCProcResourceDesc *
Desc = SM.getProcResource(
I);
4083 if (MRT[Slot][
I] >
Desc->NumUnits)
4086 if (NumScheduledMops[Slot] > IssueWidth)
4092int ResourceManager::calculateResMIIDFA()
const {
4097 FuncUnitSorter FUS = FuncUnitSorter(*ST);
4098 for (SUnit &SU : DAG->
SUnits)
4099 FUS.calcCriticalResources(*SU.
getInstr());
4100 PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
4103 for (SUnit &SU : DAG->
SUnits)
4110 while (!FuncUnitOrder.empty()) {
4111 MachineInstr *
MI = FuncUnitOrder.top();
4112 FuncUnitOrder.pop();
4113 if (
TII->isZeroCost(
MI->getOpcode()))
4119 unsigned ReservedCycles = 0;
4120 auto *RI = Resources.
begin();
4121 auto *RE = Resources.
end();
4123 dbgs() <<
"Trying to reserve resource for " << NumCycles
4124 <<
" cycles for \n";
4127 for (
unsigned C = 0;
C < NumCycles; ++
C)
4129 if ((*RI)->canReserveResources(*
MI)) {
4130 (*RI)->reserveResources(*
MI);
4137 <<
", NumCycles:" << NumCycles <<
"\n");
4139 for (
unsigned C = ReservedCycles;
C < NumCycles; ++
C) {
4141 <<
"NewResource created to reserve resources"
4144 assert(NewResource->canReserveResources(*
MI) &&
"Reserve error.");
4145 NewResource->reserveResources(*
MI);
4146 Resources.
push_back(std::unique_ptr<DFAPacketizer>(NewResource));
4150 int Resmii = Resources.
size();
4157 return calculateResMIIDFA();
4164 for (
SUnit &SU : DAG->SUnits) {
4176 <<
" WriteProcRes: ";
4181 make_range(STI->getWriteProcResBegin(SCDesc),
4182 STI->getWriteProcResEnd(SCDesc))) {
4186 SM.getProcResource(PRE.ProcResourceIdx);
4187 dbgs() <<
Desc->Name <<
": " << PRE.ReleaseAtCycle <<
", ";
4190 ResourceCount[PRE.ProcResourceIdx] += PRE.ReleaseAtCycle;
4195 int Result = (NumMops + IssueWidth - 1) / IssueWidth;
4198 dbgs() <<
"#Mops: " << NumMops <<
", "
4199 <<
"IssueWidth: " << IssueWidth <<
", "
4200 <<
"Cycles: " << Result <<
"\n";
4205 std::stringstream SS;
4206 SS << std::setw(2) <<
"ID" << std::setw(16) <<
"Name" << std::setw(10)
4207 <<
"Units" << std::setw(10) <<
"Consumed" << std::setw(10) <<
"Cycles"
4212 for (
unsigned I = 1, E = SM.getNumProcResourceKinds();
I < E; ++
I) {
4214 int Cycles = (ResourceCount[
I] +
Desc->NumUnits - 1) /
Desc->NumUnits;
4217 std::stringstream SS;
4218 SS << std::setw(2) <<
I << std::setw(16) <<
Desc->Name << std::setw(10)
4219 <<
Desc->NumUnits << std::setw(10) << ResourceCount[
I]
4220 << std::setw(10) << Cycles <<
"\n";
4224 if (Cycles > Result)
4231 InitiationInterval =
II;
4232 DFAResources.clear();
4233 DFAResources.resize(
II);
4234 for (
auto &
I : DFAResources)
4235 I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
4238 NumScheduledMops.clear();
4239 NumScheduledMops.resize(
II);
4243 if (Pred.isArtificial() || Dst->isBoundaryNode())
4248 return IgnoreAnti && (Pred.getKind() ==
SDep::Kind::Anti || Distance != 0);
4251SwingSchedulerDDG::SwingSchedulerDDGEdges &
4252SwingSchedulerDDG::getEdges(
const SUnit *SU) {
4254 return EntrySUEdges;
4260const SwingSchedulerDDG::SwingSchedulerDDGEdges &
4261SwingSchedulerDDG::getEdges(
const SUnit *SU)
const {
4263 return EntrySUEdges;
4269void SwingSchedulerDDG::addEdge(
const SUnit *SU,
4270 const SwingSchedulerDDGEdge &
Edge) {
4272 "Validation-only edges are not expected here.");
4273 auto &Edges = getEdges(SU);
4274 if (
Edge.getSrc() == SU)
4275 Edges.Succs.push_back(
Edge);
4277 Edges.Preds.push_back(
Edge);
4280void SwingSchedulerDDG::initEdges(SUnit *SU) {
4281 for (
const auto &PI : SU->
Preds) {
4282 SwingSchedulerDDGEdge
Edge(SU, PI,
false,
4287 for (
const auto &SI : SU->
Succs) {
4288 SwingSchedulerDDGEdge
Edge(SU, SI,
true,
4296 : EntrySU(EntrySU), ExitSU(ExitSU) {
4297 EdgesVec.resize(SUnits.size());
4302 for (
auto &SU : SUnits)
4306 for (
SUnit &SU : SUnits) {
4311 for (
SUnit *Dst : *OD) {
4314 Edge.setDistance(1);
4315 ValidationOnlyEdges.push_back(Edge);
4321const SwingSchedulerDDG::EdgesType &
4323 return getEdges(SU).Preds;
4326const SwingSchedulerDDG::EdgesType &
4328 return getEdges(SU).Succs;
4335 auto ExpandCycle = [&](
SUnit *SU) {
4342 SUnit *Src = Edge.getSrc();
4343 SUnit *Dst = Edge.getDst();
4344 if (!Src->isInstr() || !Dst->isInstr())
4346 int CycleSrc = ExpandCycle(Src);
4347 int CycleDst = ExpandCycle(Dst);
4348 int MaxLateStart = CycleDst + Edge.getDistance() *
II - Edge.getLatency();
4349 if (CycleSrc > MaxLateStart) {
4351 dbgs() <<
"Validation failed for edge from " << Src->NodeNum <<
" to "
4352 << Dst->NodeNum <<
"\n";
4362 for (
SUnit &SU : SUnits) {
4391 !
TII->isGlobalMemoryObject(FromMI) &&
4409 const auto DumpSU = [](
const SUnit *SU) {
4410 std::ostringstream OSS;
4411 OSS <<
"SU(" << SU->
NodeNum <<
")";
4415 dbgs() <<
" Loop carried edges from " << DumpSU(SU) <<
"\n"
4417 for (
SUnit *Dst : *Order)
4418 dbgs() <<
" " << DumpSU(Dst) <<
"\n";
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::optional< unsigned > getTag(const TargetRegisterInfo *TRI, const MachineInstr &MI, const LoadInfo &LI)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file defines the DenseMap class.
const HexagonInstrInfo * TII
A common definition of LaneBitmask for use in TableGen and CodeGen.
static void addEdge(SmallVectorImpl< LazyCallGraph::Edge > &Edges, DenseMap< LazyCallGraph::Node *, int > &EdgeIndexMap, LazyCallGraph::Node &N, LazyCallGraph::Edge::Kind EK)
print mir2vec MIR2Vec Vocabulary Printer Pass
static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))
A command line argument to force pipeliner to use specified initial interval.
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
static bool hasPHICycleDFS(unsigned Reg, const DenseMap< unsigned, SmallVector< unsigned, 2 > > &PhiDeps, SmallSet< unsigned, 8 > &Visited, SmallSet< unsigned, 8 > &RecStack)
Depth-first search to detect cycles among PHI dependencies.
static cl::opt< bool > MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false), cl::desc("Use the MVE code generator for software pipelining"))
static cl::opt< int > RegPressureMargin("pipeliner-register-pressure-margin", cl::Hidden, cl::init(5), cl::desc("Margin representing the unused percentage of " "the register pressure limit"))
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, Register &InitVal, Register &LoopVal)
Return the register values for the operands of a Phi instruction.
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
static void computeScheduledInsts(const SwingSchedulerDAG *SSD, SMSchedule &Schedule, std::vector< MachineInstr * > &OrderedInsts, DenseMap< MachineInstr *, unsigned > &Stages)
Create an instruction stream that represents a single iteration and stage of each instruction.
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
static Register getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
static bool findLoopIncrementValue(const MachineOperand &Op, int &Value)
When Op is a value that is incremented recursively in a loop and there is a unique instruction that i...
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
static cl::opt< unsigned > SwpMaxNumStores("pipeliner-max-num-stores", cl::desc("Maximum number of stores allwed in the target loop."), cl::Hidden, cl::init(200))
A command line argument to limit the number of store instructions in the target basic block.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static bool hasPHICycle(const MachineBasicBlock *LoopHeader, const MachineRegisterInfo &MRI)
static cl::opt< WindowSchedulingFlag > WindowSchedulingOption("window-sched", cl::Hidden, cl::init(WindowSchedulingFlag::WS_On), cl::desc("Set how to use window scheduling algorithm."), cl::values(clEnumValN(WindowSchedulingFlag::WS_Off, "off", "Turn off window algorithm."), clEnumValN(WindowSchedulingFlag::WS_On, "on", "Use window algorithm after SMS algorithm fails."), clEnumValN(WindowSchedulingFlag::WS_Force, "force", "Use window algorithm instead of SMS algorithm.")))
A command line argument to set the window scheduling option.
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
static bool hasLoopCarriedMemDep(const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst, BatchAAResults &BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, const SwingSchedulerDAG *SSD, bool PerformCheapCheck)
Returns true if there is a loop-carried order dependency from Src to Dst.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
static cl::opt< int > SwpIISearchRange("pipeliner-ii-search-range", cl::desc("Range to search for II"), cl::Hidden, cl::init(10))
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited, SwingSchedulerDDG *DDG)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, SwingSchedulerDDG *DDG, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static cl::opt< bool > LimitRegPressure("pipeliner-register-pressure", cl::Hidden, cl::init(false), cl::desc("Limit register pressure of scheduled loop"))
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
static Register findUniqueOperandDefinedInLoop(const MachineInstr &MI)
Register const TargetRegisterInfo * TRI
Promote Memory to Register
This file provides utility analysis objects describing memory locations.
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PriorityQueue class.
Remove Loads Into Fake Uses
std::pair< BasicBlock *, BasicBlock * > Edge
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Target-Independent Code Generator Pass Configuration Options pass.
Add loop-carried chain dependencies.
void computeDependencies()
The main function to compute loop-carried order-dependencies.
const BitVector & getLoopCarried(unsigned Idx) const
LoopCarriedOrderDepsTracker(SwingSchedulerDAG *SSD, BatchAAResults *BAA, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool erase(const KeyT &Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
AttributeList getAttributes() const
Return the attribute list for this Function.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
bool isPostIncrement(const MachineInstr &MI) const override
Return true for post-incremented instructions.
DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override
Create machine specific model for scheduling.
bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const override
For instructions with a base and offset, return the position of the base register and offset operands...
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
bool isEmpty() const
Returns true if there are no itineraries.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
TypeSize getValue() const
Represents a single loop in the control flow graph.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MDOperand & getOperand(unsigned I) const
ArrayRef< MDOperand > operands() const
unsigned getNumOperands() const
Return number of MDNode operands.
LLVM_ABI StringRef getString() const
MachineInstrBundleIterator< const MachineInstr > const_iterator
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
filtered_mop_range all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool isRegSequence() const
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI bool isIdenticalTo(const MachineInstr &Other, MICheckType Check=CheckDefs) const
Return true if this instruction is identical to Other.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
LLVM_ABI void dump() const
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
const Value * getValue() const
Return the base address of the memory access.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
The main class in the implementation of the target independent software pipeliner pass.
bool runOnMachineFunction(MachineFunction &MF) override
The "main" function for implementing Swing Modulo Scheduling.
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
Expand the kernel using modulo variable expansion algorithm (MVE).
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
void print(raw_ostream &os) const
void setRecMII(unsigned mii)
unsigned count(SUnit *SU) const
void setColocate(unsigned c)
int compareRecMII(NodeSet &RHS)
LLVM_DUMP_METHOD void dump() const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A reimplementation of ModuloScheduleExpander.
PointerIntPair - This class implements a pair of a pointer and small integer.
Track the current register pressure at some position in the instruction stream, and remember the high...
LLVM_ABI void addLiveRegs(ArrayRef< VRegMaskOrUnit > Regs)
Force liveness of virtual registers or physical register units.
unsigned getRegPressureSetLimit(unsigned Idx) const
Get the register unit limit for the given pressure set index.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
int calculateResMII() const
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
void init(int II)
Initialize resources with the initiation interval II.
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Kind
These are the different kinds of scheduling dependencies.
@ Order
Any other ordering dependency.
@ Anti
A register anti-dependence (aka WAR).
@ Data
Regular data dependence (aka true-dependence).
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Barrier
An unknown scheduling barrier.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
This class represents the scheduled code.
std::deque< SUnit * > reorderInstructions(const SwingSchedulerDAG *SSD, const std::deque< SUnit * > &Instrs) const
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
void dump() const
Utility function used for debugging to print the schedule.
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the earliest scheduled instruction in the dependence chain.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU, const SwingSchedulerDDG *DDG) const
Return true if all scheduled predecessors are loop-carried output/order dependencies.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts) const
Order the instructions within a cycle so that the definitions occur before the uses.
bool isValidSchedule(SwingSchedulerDAG *SSD)
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
DenseMap< int, std::deque< SUnit * > >::const_iterator const_sched_iterator
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO) const
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
SmallPtrSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const
Return true if the scheduled Phi has a loop carried operand.
int latestCycleInChain(const SwingSchedulerDDGEdge &Dep, const SwingSchedulerDDG *DDG)
Return the cycle of the latest scheduled instruction in the dependence chain.
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Scheduling unit. This is a node in the scheduling DAG.
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
unsigned NodeNum
Entry # of node in the node vector.
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
LLVM_ABI void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
unsigned short Latency
Node latency.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
bool hasPhysRegDefs
Has physreg defs that are being used.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
LLVM_ABI bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock * BB
The block in which to insert instructions.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void dump() const override
LLVM_ABI void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
LLVM_ABI bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
MachineRegisterInfo & MRI
Virtual/real register map.
const TargetInstrInfo * TII
Target instruction information.
std::vector< SUnit > SUnits
The scheduling units.
const TargetRegisterInfo * TRI
Target processor register info.
SUnit EntrySU
Special node for the region entry.
MachineFunction & MF
Machine function.
SUnit ExitSU
Special node for the region exit.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void insert_range(Range &&R)
typename vector_type::const_iterator iterator
void clear()
Completely clear the SetVector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
const SwingSchedulerDDG * getDDG() const
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const
Return true for an order or output dependence that is loop carried potentially.
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
bool mayOverlapInLaterIter(const MachineInstr *BaseMI, const MachineInstr *OtherMI) const
Return false if there is no overlap between the region accessed by BaseMI in an iteration and the reg...
Register getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
Represents a dependence between two instruction.
SUnit * getDst() const
Returns the SUnit to which the edge points (destination node).
bool ignoreDependence(bool IgnoreAnti) const
Returns true for DDG nodes that we ignore when computing the cost functions.
SUnit * getSrc() const
Returns the SUnit from which the edge comes (source node).
This class provides APIs to retrieve edges from/to an SUnit node, with a particular focus on loop-car...
SwingSchedulerDDG(std::vector< SUnit > &SUnits, SUnit *EntrySU, SUnit *ExitSU, const LoopCarriedEdges &LCE)
const EdgesType & getInEdges(const SUnit *SU) const
bool isValidSchedule(const SMSchedule &Schedule) const
Check if Schedule doesn't violate the validation-only dependencies.
const EdgesType & getOutEdges(const SUnit *SU) const
Object returned by analyzeLoopForPipelining.
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.
TargetInstrInfo - Interface to description of machine instruction set.
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool enableMachinePipeliner() const
True if the subtarget should run MachinePipeliner.
virtual bool useDFAforSMS() const
Default to DFA for resource management, return false when target will use ProcResource in InstrSchedM...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Wrapper class representing a virtual register or register unit.
constexpr bool isVirtualReg() const
constexpr MCRegUnit asMCRegUnit() const
constexpr Register asVirtualReg() const
The main class in the implementation of the target independent window scheduler.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
@ Valid
The data is already valid.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< DefNode * > Def
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
std::set< NodeId > NodeSet
friend class Instruction
Iterator for Instructions in a `BasicBlock.
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
void stable_sort(R &&Range)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
@ WS_Force
Use window algorithm after SMS algorithm fails.
@ WS_On
Turn off window algorithm.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
cl::opt< bool > SwpEnableCopyToPhi
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
LLVM_ABI char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
@ Increment
Incrementally increasing token ID.
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, const LoopInfo *LI=nullptr, unsigned MaxLookup=MaxLookupSearchDepth)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
LLVM_ABI bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This class holds an SUnit corresponding to a memory operation and other information related to the in...
const Value * MemOpValue
The value of a memory operand.
SmallVector< const Value *, 2 > UnderlyingObjs
bool isTriviallyDisjoint(const SUnitWithMemInfo &Other) const
int64_t MemOpOffset
The offset of a memory operand.
bool IsAllIdentified
True if all the underlying objects are identified.
SUnitWithMemInfo(SUnit *SU)
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
uint64_t FuncUnits
Bitmask representing a set of functional units.
static constexpr LaneBitmask getNone()
Represents loop-carried dependencies.
SmallSetVector< SUnit *, 8 > OrderDep
const OrderDep * getOrderDepOrNull(SUnit *Key) const
void modifySUnits(std::vector< SUnit > &SUnits, const TargetInstrInfo *TII)
Adds some edges to the original DAG that correspond to loop-carried dependencies.
void dump(SUnit *SU, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) const
Define a kind of processor resource that will be modeled by the scheduler.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.