Go to the documentation of this file.
17 #define DEBUG_TYPE "machine-scheduler"
21 class GCNILPScheduler {
33 unsigned CurQueueId = 0;
35 std::vector<unsigned> SUNumbers;
38 unsigned CurCycle = 0;
40 unsigned getNodePriority(
const SUnit *SU)
const;
43 Candidate* pickCandidate();
45 void releasePending();
46 void advanceToCycle(
unsigned NextCycle);
47 void releasePredecessors(
const SUnit* SU);
59 unsigned &SethiUllmanNumber = SUNumbers[SU->
NodeNum];
60 if (SethiUllmanNumber != 0)
61 return SethiUllmanNumber;
65 if (Pred.
isCtrl())
continue;
68 if (PredSethiUllman > SethiUllmanNumber) {
69 SethiUllmanNumber = PredSethiUllman;
72 else if (PredSethiUllman == SethiUllmanNumber)
76 SethiUllmanNumber += Extra;
78 if (SethiUllmanNumber == 0)
79 SethiUllmanNumber = 1;
81 return SethiUllmanNumber;
86 unsigned GCNILPScheduler::getNodePriority(
const SUnit *SU)
const {
107 unsigned MaxHeight = 0;
109 if (Succ.
isCtrl())
continue;
113 if (Height > MaxHeight)
122 unsigned Scratches = 0;
124 if (Pred.
isCtrl())
continue;
136 int RHeight = (
int)
right->getHeight();
145 if (LHeight != RHeight)
146 return LHeight > RHeight ? 1 : -1;
149 int RDepth =
right->getDepth();
150 if (LDepth != RDepth) {
152 <<
") depth " << LDepth <<
" vs SU (" <<
right->NodeNum
153 <<
") depth " << RDepth <<
"\n");
154 return LDepth < RDepth ? 1 : -1;
173 <<
"): " <<
right->getDepth() <<
"\n");
186 unsigned LPriority = getNodePriority(left);
187 unsigned RPriority = getNodePriority(
right);
189 if (LPriority != RPriority)
190 return LPriority > RPriority ?
right : left;
212 return LDist < RDist ?
right : left;
217 if (LScratch != RScratch)
218 return LScratch > RScratch ?
right : left;
236 "NodeQueueId cannot be zero");
240 GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
241 if (AvailQueue.empty())
243 auto Best = AvailQueue.begin();
244 for (
auto I = std::next(AvailQueue.begin()),
E = AvailQueue.end();
I !=
E; ++
I) {
245 auto NewBestSU = pickBest(Best->SU,
I->SU);
246 if (NewBestSU != Best->SU) {
254 void GCNILPScheduler::releasePending() {
257 for(
auto I = PendingQueue.begin(),
E = PendingQueue.end();
I !=
E;) {
259 if (
C.SU->getHeight() <= CurCycle) {
260 PendingQueue.remove(
C);
261 AvailQueue.push_back(
C);
262 C.SU->NodeQueueId = CurQueueId++;
268 void GCNILPScheduler::advanceToCycle(
unsigned NextCycle) {
269 if (NextCycle <= CurCycle)
271 CurCycle = NextCycle;
275 void GCNILPScheduler::releasePredecessors(
const SUnit* SU) {
276 for (
const auto &PredEdge : SU->
Preds) {
277 auto PredSU = PredEdge.getSUnit();
278 if (PredEdge.isWeak())
280 assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
282 PredSU->setHeightToAtLeast(SU->
getHeight() + PredEdge.getLatency());
284 if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
285 PendingQueue.push_front(*
new (Alloc.
Allocate()) Candidate(PredSU));
289 std::vector<const SUnit*>
292 auto &SUnits =
const_cast<ScheduleDAG&
>(DAG).SUnits;
294 std::vector<SUnit> SUSavedCopy;
295 SUSavedCopy.resize(SUnits.size());
299 for (
const SUnit &SU : SUnits)
302 SUNumbers.assign(SUnits.size(), 0);
303 for (
const SUnit &SU : SUnits)
306 for (
auto SU : BotRoots) {
307 AvailQueue.push_back(
310 releasePredecessors(&DAG.
ExitSU);
312 std::vector<const SUnit*> Schedule;
313 Schedule.reserve(SUnits.size());
315 if (AvailQueue.empty() && !PendingQueue.empty()) {
316 auto EarliestSU = std::min_element(
317 PendingQueue.begin(), PendingQueue.end(),
318 [=](
const Candidate&
C1,
const Candidate& C2) {
319 return C1.SU->getHeight() < C2.SU->getHeight();
321 advanceToCycle(
std::max(CurCycle + 1, EarliestSU->getHeight()));
323 if (AvailQueue.empty())
330 <<
' ' <<
C.SU->NodeNum;
333 auto C = pickCandidate();
335 AvailQueue.remove(*
C);
341 releasePredecessors(SU);
342 Schedule.push_back(SU);
345 assert(SUnits.size() == Schedule.size());
350 for (
auto &SU : SUnits)
360 return S.schedule(BotRoots, DAG);
the custom lowered code happens to be right
This is an optimization pass for GlobalISel generic memory operations.
instcombine should handle this C2 when C1
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
SmallVector< SDep, 4 > Succs
All sunit successors.
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s result
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
unsigned short Latency
Node latency.
(vector float) vec_cmpeq(*A, *B) C
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
unsigned NodeNum
Entry # of node in the node vector.
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
bool isScheduled
True once scheduled.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
virtual void dumpNode(const SUnit &SU) const =0
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
std::vector< const SUnit * > makeGCNILPScheduler(ArrayRef< const SUnit * > BotRoots, const ScheduleDAG &DAG)
LLVM_ATTRIBUTE_RETURNS_NONNULL void * Allocate(size_t Size, Align Alignment)
Allocate space at the specified alignment.
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
unsigned NodeQueueId
Queue id of node.
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers,...
Align max(MaybeAlign Lhs, Align Rhs)
A simple intrusive list implementation.
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Scheduling unit. This is a node in the scheduling DAG.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
SUnit ExitSU
Special node for the region exit.
static int BUCompareLatency(const SUnit *left, const SUnit *right)