Go to the documentation of this file.
25 #include "llvm/Config/llvm-config.h"
39 #define DEBUG_TYPE "pre-RA-sched"
41 STATISTIC(NumNewPredsAdded,
"Number of times a single predecessor was added");
43 "Number of times the topological order has been recomputed");
48 cl::desc(
"Stress test instruction scheduling"));
51 void SchedulingPriorityQueue::anchor() {}
54 :
TM(mf.getTarget()),
TII(mf.getSubtarget().getInstrInfo()),
55 TRI(mf.getSubtarget().getRegisterInfo()), MF(mf),
56 MRI(mf.getRegInfo()) {
71 if (!Node || !Node->isMachineOpcode())
return nullptr;
72 return &
TII->
get(Node->getMachineOpcode());
95 switch(Contents.OrdKind) {
100 case Weak:
dbgs() <<
" Weak";
break;
112 if (!
Required && PredDep.getSUnit() ==
D.getSUnit())
114 if (PredDep.overlaps(
D)) {
117 if (PredDep.getLatency() <
D.getLatency()) {
118 SUnit *PredSU = PredDep.getSUnit();
120 SDep ForwardD = PredDep;
123 if (SuccDep == ForwardD) {
128 PredDep.setLatency(
D.getLatency());
140 "NumPreds will overflow!");
142 "NumSuccs will overflow!");
146 if (!
N->isScheduled) {
152 "NumPredsLeft will overflow!");
162 "NumSuccsLeft will overflow!");
167 N->Succs.push_back(
P);
168 if (
P.getLatency() != 0) {
185 assert(Succ !=
N->Succs.end() &&
"Mismatching preds / succs lists!");
186 N->Succs.erase(Succ);
191 assert(
N->NumSuccs > 0 &&
"NumSuccs will underflow!");
195 if (!
N->isScheduled) {
207 assert(
N->NumSuccsLeft > 0 &&
"NumSuccsLeft will underflow!");
211 if (
P.getLatency() != 0) {
218 if (!isDepthCurrent)
return;
220 WorkList.push_back(
this);
223 SU->isDepthCurrent =
false;
226 if (SuccSU->isDepthCurrent)
227 WorkList.push_back(SuccSU);
229 }
while (!WorkList.empty());
233 if (!isHeightCurrent)
return;
235 WorkList.push_back(
this);
238 SU->isHeightCurrent =
false;
241 if (PredSU->isHeightCurrent)
242 WorkList.push_back(PredSU);
244 }
while (!WorkList.empty());
252 isDepthCurrent =
true;
260 isHeightCurrent =
true;
264 void SUnit::ComputeDepth() {
266 WorkList.push_back(
this);
268 SUnit *Cur = WorkList.back();
271 unsigned MaxPredDepth = 0;
274 if (PredSU->isDepthCurrent)
275 MaxPredDepth =
std::max(MaxPredDepth,
279 WorkList.push_back(PredSU);
285 if (MaxPredDepth != Cur->Depth) {
287 Cur->Depth = MaxPredDepth;
289 Cur->isDepthCurrent =
true;
291 }
while (!WorkList.empty());
295 void SUnit::ComputeHeight() {
297 WorkList.push_back(
this);
299 SUnit *Cur = WorkList.back();
302 unsigned MaxSuccHeight = 0;
305 if (SuccSU->isHeightCurrent)
306 MaxSuccHeight =
std::max(MaxSuccHeight,
310 WorkList.push_back(SuccSU);
316 if (MaxSuccHeight != Cur->Height) {
318 Cur->Height = MaxSuccHeight;
320 Cur->isHeightCurrent =
true;
322 }
while (!WorkList.empty());
330 unsigned MaxDepth = BestI->getSUnit()->getDepth();
336 if (BestI !=
Preds.begin())
340 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
366 if (SU.
Preds.size() > 0) {
367 dbgs() <<
" Predecessors:\n";
376 if (SU.
Succs.size() > 0) {
377 dbgs() <<
" Successors:\n";
391 bool AnyNotSched =
false;
392 unsigned DeadNodes = 0;
400 dbgs() <<
"*** Scheduling failed! ***\n";
402 dbgs() <<
"has not been scheduled!\n";
409 dbgs() <<
"*** Scheduling failed! ***\n";
411 dbgs() <<
"has an unexpected "
412 << (isBottomUp ?
"Height" :
"Depth") <<
" value!\n";
418 dbgs() <<
"*** Scheduling failed! ***\n";
420 dbgs() <<
"has successors left!\n";
426 dbgs() <<
"*** Scheduling failed! ***\n";
428 dbgs() <<
"has predecessors left!\n";
434 return SUnits.size() - DeadNodes;
470 unsigned DAGSize = SUnits.size();
471 std::vector<SUnit*> WorkList;
474 Index2Node.resize(DAGSize);
475 Node2Index.resize(DAGSize);
479 WorkList.push_back(ExitSU);
480 for (
SUnit &SU : SUnits) {
481 int NodeNum = SU.NodeNum;
482 unsigned Degree = SU.Succs.size();
484 Node2Index[NodeNum] = Degree;
488 assert(SU.Succs.empty() &&
"SUnit should have no successors");
490 WorkList.push_back(&SU);
495 while (!WorkList.empty()) {
496 SUnit *SU = WorkList.back();
505 WorkList.push_back(SU);
514 for (
SUnit &SU : SUnits) {
515 for (
const SDep &
PD : SU.Preds) {
516 assert(Node2Index[SU.NodeNum] > Node2Index[
PD.getSUnit()->NodeNum] &&
517 "Wrong topological sorting");
523 void ScheduleDAGTopologicalSort::FixOrder() {
531 for (
auto &U : Updates)
540 Dirty = Dirty || Updates.size() > 10;
545 Updates.emplace_back(
Y,
X);
549 int UpperBound, LowerBound;
550 LowerBound = Node2Index[
Y->NodeNum];
551 UpperBound = Node2Index[
X->NodeNum];
552 bool HasLoop =
false;
554 if (LowerBound < UpperBound) {
557 DFS(
Y, UpperBound, HasLoop);
558 assert(!HasLoop &&
"Inserted edge creates a loop!");
560 Shift(Visited, LowerBound, UpperBound);
570 void ScheduleDAGTopologicalSort::DFS(
const SUnit *SU,
int UpperBound,
572 std::vector<const SUnit*> WorkList;
573 WorkList.
reserve(SUnits.size());
575 WorkList.push_back(SU);
577 SU = WorkList.back();
583 if (
s >= Node2Index.size())
585 if (Node2Index[
s] == UpperBound) {
590 if (!Visited.
test(
s) && Node2Index[
s] < UpperBound) {
591 WorkList.push_back(SuccDep.
getSUnit());
594 }
while (!WorkList.empty());
598 const SUnit &TargetSU,
600 std::vector<const SUnit*> WorkList;
601 int LowerBound = Node2Index[StartSU.
NodeNum];
602 int UpperBound = Node2Index[TargetSU.
NodeNum];
605 std::vector<int> Nodes;
607 if (LowerBound > UpperBound) {
612 WorkList.reserve(SUnits.size());
619 const SUnit *SU = WorkList.back();
622 const SUnit *Succ = SD.getSUnit();
627 if (Node2Index[
s] == UpperBound) {
632 if (!Visited.
test(
s) && Node2Index[
s] < UpperBound) {
637 }
while (!WorkList.empty());
645 VisitedBack.
resize(SUnits.size());
651 WorkList.push_back(&TargetSU);
653 const SUnit *SU = WorkList.back();
656 const SUnit *Pred = SD.getSUnit();
661 if (Node2Index[
s] == LowerBound) {
665 if (!VisitedBack.
test(
s) && Visited.
test(
s)) {
671 }
while (!WorkList.empty());
673 assert(Found &&
"Error in SUnit Graph!");
678 void ScheduleDAGTopologicalSort::Shift(
BitVector& Visited,
int LowerBound,
684 for (
i = LowerBound;
i <= UpperBound; ++
i) {
686 int w = Index2Node[
i];
687 if (Visited.
test(w)) {
697 for (
unsigned LI : L) {
708 for (
const SDep &PredDep : TargetSU->
Preds)
716 assert(SU->
NodeNum == Index2Node.size() &&
"Node cannot be added at the end");
717 assert(SU->
NumPreds == 0 &&
"Can only add SU's with no predecessors");
718 Node2Index.push_back(Index2Node.size());
719 Index2Node.push_back(SU->
NodeNum);
720 Visited.
resize(Node2Index.size());
724 const SUnit *TargetSU) {
728 int UpperBound, LowerBound;
729 LowerBound = Node2Index[TargetSU->
NodeNum];
730 UpperBound = Node2Index[SU->
NodeNum];
731 bool HasLoop =
false;
733 if (LowerBound < UpperBound) {
736 DFS(TargetSU, UpperBound, HasLoop);
741 void ScheduleDAGTopologicalSort::Allocate(
int n,
int index) {
748 : SUnits(sunits), ExitSU(exitsu) {}
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
void dumpAttributes() const
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This is an optimization pass for GlobalISel generic memory operations.
bool WillCreateCycle(SUnit *TargetSU, SUnit *SU)
Returns true if addPred(TargetSU, SU) creates a cycle.
@ MustAliasMem
Nonvolatile load/Store instructions that must alias.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
@ Anti
A register anti-dependence (aka WAR).
Represents one node in the SelectionDAG.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
SmallVector< SDep, 4 > Succs
All sunit successors.
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
LLVM_NODISCARD T pop_back_val()
unsigned const TargetRegisterInfo * TRI
void AddPredQueued(SUnit *Y, SUnit *X)
Queues an update to the topological ordering to accommodate an edge to be added from SUnit X to SUnit...
ScheduleDAG(MachineFunction &mf)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
@ Weak
Arbitrary weak DAG edge.
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
virtual ~ScheduleHazardRecognizer()
void setDepthDirty()
Sets a flag in this node to indicate that its stored Depth value will require recomputation the next ...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void setHeightToAtLeast(unsigned NewHeight)
If NewHeight is greater than this node's height value, set it to be the new height value.
unsigned short Latency
Node latency.
void setDepthToAtLeast(unsigned NewDepth)
If NewDepth is greater than this node's depth value, sets it to be the new depth value.
unsigned NodeNum
Entry # of node in the node vector.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const HexagonInstrInfo * TII
Describe properties that are true of each instruction in the target description file.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
STATISTIC(NumFunctions, "Total number of functions")
@ Output
A register output-dependence (aka WAW).
@ Data
Regular data dependence (aka true-dependence).
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
unsigned VerifyScheduledDAG(bool isBottomUp)
Verifies that all SUnits were scheduled and that their state is consistent.
@ Order
Any other ordering dependency.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
ScheduleDAGTopologicalSort(std::vector< SUnit > &SUnits, SUnit *ExitSU)
static cl::opt< bool > StressSchedOpt("stress-sched", cl::Hidden, cl::init(false), cl::desc("Stress test instruction scheduling"))
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
unsigned short NumRegDefsLeft
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
multiplies can be turned into SHL s
unsigned getReg() const
Returns the register associated with this edge.
bool isScheduled
True once scheduled.
initializer< Ty > init(const Ty &Val)
@ MayAliasMem
Nonvolatile load/Store instructions that may alias.
void biasCriticalPath()
Orders this node's predecessor edges such that the critical path edge occurs first.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
void dumpNodeAll(const SUnit &SU) const
virtual void dumpNode(const SUnit &SU) const =0
const TargetRegisterInfo * TRI
Target processor register info.
@ Barrier
An unknown scheduling barrier.
SUnit EntrySU
Special node for the region entry.
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
unsigned const MachineRegisterInfo * MRI
static const unsigned MaxDepth
std::vector< SUnit > SUnits
The scheduling units.
bool test(unsigned Idx) const
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
void setLatency(unsigned Lat)
Sets the latency for this edge.
const TargetInstrInfo * TII
Target instruction information.
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
typename SuperClass::iterator iterator
void dumpNodeName(const SUnit &SU) const
Align max(MaybeAlign Lhs, Align Rhs)
http eax xorl edx cl sete al setne dl sall eax sall edx But that requires good bit subreg support this might be better It s an extra shift
@ Cluster
Weak DAG edge linking a chain of clustered instrs.
std::vector< int > GetSubGraph(const SUnit &StartSU, const SUnit &TargetSU, bool &Success)
Returns an array of SUs that are both in the successor subtree of StartSU and in the predecessor subt...
const char LLVMTargetMachineRef TM
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Scheduling unit. This is a node in the scheduling DAG.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
SmallVectorImpl< SDep >::iterator pred_iterator
void dump(const TargetRegisterInfo *TRI=nullptr) const
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void reserve(size_type N)
void clearDAG()
Clears the DAG state (between regions).
SUnit ExitSU
Special node for the region exit.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
void AddSUnitWithoutPredecessors(const SUnit *SU)
Add a SUnit without predecessors to the end of the topological order.