LLVM 17.0.0git
MachineScheduler.cpp
Go to the documentation of this file.
1//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// MachineScheduler schedules machine instructions after phi elimination. It
10// preserves LiveIntervals so it can be invoked before register allocation.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
50#include "llvm/Config/llvm-config.h"
52#include "llvm/MC/LaneBitmask.h"
53#include "llvm/Pass.h"
56#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <iterator>
65#include <limits>
66#include <memory>
67#include <string>
68#include <tuple>
69#include <utility>
70#include <vector>
71
72using namespace llvm;
73
74#define DEBUG_TYPE "machine-scheduler"
75
76STATISTIC(NumClustered, "Number of load/store pairs clustered");
77
78namespace llvm {
79
81 cl::desc("Force top-down list scheduling"));
83 cl::desc("Force bottom-up list scheduling"));
86 cl::desc("Print critical path length to stdout"));
87
89 "verify-misched", cl::Hidden,
90 cl::desc("Verify machine instrs before and after machine scheduling"));
91
92#ifndef NDEBUG
94 "view-misched-dags", cl::Hidden,
95 cl::desc("Pop up a window to show MISched dags after they are processed"));
96cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
97 cl::desc("Print schedule DAGs"));
99 "misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
100 cl::desc("Dump resource usage at schedule boundary."));
101#else
102const bool ViewMISchedDAGs = false;
103const bool PrintDAGs = false;
104#ifdef LLVM_ENABLE_DUMP
105const bool MISchedDumpReservedCycles = false;
106#endif // LLVM_ENABLE_DUMP
107#endif // NDEBUG
108
109} // end namespace llvm
110
111#ifndef NDEBUG
112/// In some situations a few uninteresting nodes depend on nearly all other
113/// nodes in the graph, provide a cutoff to hide them.
114static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
115 cl::desc("Hide nodes with more predecessor/successor than cutoff"));
116
118 cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
119
121 cl::desc("Only schedule this function"));
122static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
123 cl::desc("Only schedule this MBB#"));
124#endif // NDEBUG
125
126/// Avoid quadratic complexity in unusually large basic blocks by limiting the
127/// size of the ready lists.
129 cl::desc("Limit ready list to N instructions"), cl::init(256));
130
131static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
132 cl::desc("Enable register pressure scheduling."), cl::init(true));
133
134static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
135 cl::desc("Enable cyclic critical path analysis."), cl::init(true));
136
138 cl::desc("Enable memop clustering."),
139 cl::init(true));
140static cl::opt<bool>
141 ForceFastCluster("force-fast-cluster", cl::Hidden,
142 cl::desc("Switch to fast cluster algorithm with the lost "
143 "of some fusion opportunities"),
144 cl::init(false));
146 FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
147 cl::desc("The threshold for fast cluster"),
148 cl::init(1000));
149
150#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
152 "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
153 cl::desc("Dump resource usage at schedule boundary."));
155 HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
156 cl::desc("Set width of the columns with "
157 "the resources and schedule units"),
158 cl::init(19));
160 ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
161 cl::desc("Set width of the columns showing resource booking."),
162 cl::init(5));
163#endif
164
165// DAG subtrees must have at least this many nodes.
166static const unsigned MinSubtreeSize = 8;
167
168// Pin the vtables to this file.
169void MachineSchedStrategy::anchor() {}
170
171void ScheduleDAGMutation::anchor() {}
172
173//===----------------------------------------------------------------------===//
174// Machine Instruction Scheduling Pass and Registry
175//===----------------------------------------------------------------------===//
176
179}
180
182 delete RegClassInfo;
183}
184
185namespace {
186
187/// Base class for a machine scheduler class that can run at any point.
188class MachineSchedulerBase : public MachineSchedContext,
189 public MachineFunctionPass {
190public:
191 MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
192
193 void print(raw_ostream &O, const Module* = nullptr) const override;
194
195protected:
196 void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
197};
198
199/// MachineScheduler runs after coalescing and before register allocation.
200class MachineScheduler : public MachineSchedulerBase {
201public:
202 MachineScheduler();
203
204 void getAnalysisUsage(AnalysisUsage &AU) const override;
205
206 bool runOnMachineFunction(MachineFunction&) override;
207
208 static char ID; // Class identification, replacement for typeinfo
209
210protected:
211 ScheduleDAGInstrs *createMachineScheduler();
212};
213
214/// PostMachineScheduler runs after shortly before code emission.
215class PostMachineScheduler : public MachineSchedulerBase {
216public:
217 PostMachineScheduler();
218
219 void getAnalysisUsage(AnalysisUsage &AU) const override;
220
221 bool runOnMachineFunction(MachineFunction&) override;
222
223 static char ID; // Class identification, replacement for typeinfo
224
225protected:
226 ScheduleDAGInstrs *createPostMachineScheduler();
227};
228
229} // end anonymous namespace
230
231char MachineScheduler::ID = 0;
232
233char &llvm::MachineSchedulerID = MachineScheduler::ID;
234
236 "Machine Instruction Scheduler", false, false)
244
245MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
247}
248
249void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
250 AU.setPreservesCFG();
260}
261
262char PostMachineScheduler::ID = 0;
263
264char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
265
266INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
267 "PostRA Machine Instruction Scheduler", false, false)
271INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
272 "PostRA Machine Instruction Scheduler", false, false)
273
274PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
276}
277
278void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
279 AU.setPreservesCFG();
285}
286
289
290/// A dummy default scheduler factory indicates whether the scheduler
291/// is overridden on the command line.
293 return nullptr;
294}
295
296/// MachineSchedOpt allows command line selection of the scheduler.
301 cl::desc("Machine instruction scheduler to use"));
302
304DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
306
308 "enable-misched",
309 cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
310 cl::Hidden);
311
313 "enable-post-misched",
314 cl::desc("Enable the post-ra machine instruction scheduling pass."),
315 cl::init(true), cl::Hidden);
316
317/// Decrement this iterator until reaching the top or a non-debug instr.
321 assert(I != Beg && "reached the top of the region, cannot decrement");
322 while (--I != Beg) {
323 if (!I->isDebugOrPseudoInstr())
324 break;
325 }
326 return I;
327}
328
329/// Non-const version.
335}
336
337/// If this iterator is a debug value, increment until reaching the End or a
338/// non-debug instruction.
342 for(; I != End; ++I) {
343 if (!I->isDebugOrPseudoInstr())
344 break;
345 }
346 return I;
347}
348
349/// Non-const version.
355}
356
357/// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
358ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
359 // Select the scheduler, or set the default.
361 if (Ctor != useDefaultMachineSched)
362 return Ctor(this);
363
364 // Get the default scheduler set by the target for this function.
365 ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
366 if (Scheduler)
367 return Scheduler;
368
369 // Default to GenericScheduler.
370 return createGenericSchedLive(this);
371}
372
373/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
374/// the caller. We don't have a command line option to override the postRA
375/// scheduler. The Target must configure it.
376ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
377 // Get the postRA scheduler set by the target for this function.
378 ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
379 if (Scheduler)
380 return Scheduler;
381
382 // Default to GenericScheduler.
383 return createGenericSchedPostRA(this);
384}
385
386/// Top-level MachineScheduler pass driver.
387///
388/// Visit blocks in function order. Divide each block into scheduling regions
389/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
390/// consistent with the DAG builder, which traverses the interior of the
391/// scheduling regions bottom-up.
392///
393/// This design avoids exposing scheduling boundaries to the DAG builder,
394/// simplifying the DAG builder's support for "special" target instructions.
395/// At the same time the design allows target schedulers to operate across
396/// scheduling boundaries, for example to bundle the boundary instructions
397/// without reordering them. This creates complexity, because the target
398/// scheduler must update the RegionBegin and RegionEnd positions cached by
399/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
400/// design would be to split blocks at scheduling boundaries, but LLVM has a
401/// general bias against block splitting purely for implementation simplicity.
402bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
403 if (skipFunction(mf.getFunction()))
404 return false;
405
408 return false;
409 } else if (!mf.getSubtarget().enableMachineScheduler())
410 return false;
411
412 LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
413
414 // Initialize the context of the pass.
415 MF = &mf;
416 MLI = &getAnalysis<MachineLoopInfo>();
417 MDT = &getAnalysis<MachineDominatorTree>();
418 PassConfig = &getAnalysis<TargetPassConfig>();
419 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
420
421 LIS = &getAnalysis<LiveIntervals>();
422
423 if (VerifyScheduling) {
424 LLVM_DEBUG(LIS->dump());
425 MF->verify(this, "Before machine scheduling.");
426 }
427 RegClassInfo->runOnMachineFunction(*MF);
428
429 // Instantiate the selected scheduler for this target, function, and
430 // optimization level.
431 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
432 scheduleRegions(*Scheduler, false);
433
434 LLVM_DEBUG(LIS->dump());
436 MF->verify(this, "After machine scheduling.");
437 return true;
438}
439
440bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
441 if (skipFunction(mf.getFunction()))
442 return false;
443
446 return false;
447 } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
448 LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
449 return false;
450 }
451 LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
452
453 // Initialize the context of the pass.
454 MF = &mf;
455 MLI = &getAnalysis<MachineLoopInfo>();
456 PassConfig = &getAnalysis<TargetPassConfig>();
457 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
458
460 MF->verify(this, "Before post machine scheduling.");
461
462 // Instantiate the selected scheduler for this target, function, and
463 // optimization level.
464 std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
465 scheduleRegions(*Scheduler, true);
466
468 MF->verify(this, "After post machine scheduling.");
469 return true;
470}
471
472/// Return true of the given instruction should not be included in a scheduling
473/// region.
474///
475/// MachineScheduler does not currently support scheduling across calls. To
476/// handle calls, the DAG builder needs to be modified to create register
477/// anti/output dependencies on the registers clobbered by the call's regmask
478/// operand. In PreRA scheduling, the stack pointer adjustment already prevents
479/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
480/// the boundary, but there would be no benefit to postRA scheduling across
481/// calls this late anyway.
484 MachineFunction *MF,
485 const TargetInstrInfo *TII) {
486 return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
487}
488
489/// A region of an MBB for scheduling.
490namespace {
491struct SchedRegion {
492 /// RegionBegin is the first instruction in the scheduling region, and
493 /// RegionEnd is either MBB->end() or the scheduling boundary after the
494 /// last instruction in the scheduling region. These iterators cannot refer
495 /// to instructions outside of the identified scheduling region because
496 /// those may be reordered before scheduling this region.
497 MachineBasicBlock::iterator RegionBegin;
499 unsigned NumRegionInstrs;
500
502 unsigned N) :
503 RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
504};
505} // end anonymous namespace
506
508
509static void
511 MBBRegionsVector &Regions,
512 bool RegionsTopDown) {
515
517 for(MachineBasicBlock::iterator RegionEnd = MBB->end();
518 RegionEnd != MBB->begin(); RegionEnd = I) {
519
520 // Avoid decrementing RegionEnd for blocks with no terminator.
521 if (RegionEnd != MBB->end() ||
522 isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
523 --RegionEnd;
524 }
525
526 // The next region starts above the previous region. Look backward in the
527 // instruction stream until we find the nearest boundary.
528 unsigned NumRegionInstrs = 0;
529 I = RegionEnd;
530 for (;I != MBB->begin(); --I) {
531 MachineInstr &MI = *std::prev(I);
532 if (isSchedBoundary(&MI, &*MBB, MF, TII))
533 break;
534 if (!MI.isDebugOrPseudoInstr()) {
535 // MBB::size() uses instr_iterator to count. Here we need a bundle to
536 // count as a single instruction.
537 ++NumRegionInstrs;
538 }
539 }
540
541 // It's possible we found a scheduling region that only has debug
542 // instructions. Don't bother scheduling these.
543 if (NumRegionInstrs != 0)
544 Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
545 }
546
547 if (RegionsTopDown)
548 std::reverse(Regions.begin(), Regions.end());
549}
550
551/// Main driver for both MachineScheduler and PostMachineScheduler.
552void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
553 bool FixKillFlags) {
554 // Visit all machine basic blocks.
555 //
556 // TODO: Visit blocks in global postorder or postorder within the bottom-up
557 // loop tree. Then we can optionally compute global RegPressure.
558 for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
559 MBB != MBBEnd; ++MBB) {
560
561 Scheduler.startBlock(&*MBB);
562
563#ifndef NDEBUG
564 if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
565 continue;
566 if (SchedOnlyBlock.getNumOccurrences()
567 && (int)SchedOnlyBlock != MBB->getNumber())
568 continue;
569#endif
570
571 // Break the block into scheduling regions [I, RegionEnd). RegionEnd
572 // points to the scheduling boundary at the bottom of the region. The DAG
573 // does not include RegionEnd, but the region does (i.e. the next
574 // RegionEnd is above the previous RegionBegin). If the current block has
575 // no terminator then RegionEnd == MBB->end() for the bottom region.
576 //
577 // All the regions of MBB are first found and stored in MBBRegions, which
578 // will be processed (MBB) top-down if initialized with true.
579 //
580 // The Scheduler may insert instructions during either schedule() or
581 // exitRegion(), even for empty regions. So the local iterators 'I' and
582 // 'RegionEnd' are invalid across these calls. Instructions must not be
583 // added to other regions than the current one without updating MBBRegions.
584
585 MBBRegionsVector MBBRegions;
586 getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
587 for (const SchedRegion &R : MBBRegions) {
588 MachineBasicBlock::iterator I = R.RegionBegin;
589 MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
590 unsigned NumRegionInstrs = R.NumRegionInstrs;
591
592 // Notify the scheduler of the region, even if we may skip scheduling
593 // it. Perhaps it still needs to be bundled.
594 Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
595
596 // Skip empty scheduling regions (0 or 1 schedulable instructions).
597 if (I == RegionEnd || I == std::prev(RegionEnd)) {
598 // Close the current region. Bundle the terminator if needed.
599 // This invalidates 'RegionEnd' and 'I'.
600 Scheduler.exitRegion();
601 continue;
602 }
603 LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
604 LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
605 << " " << MBB->getName() << "\n From: " << *I
606 << " To: ";
607 if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
608 else dbgs() << "End\n";
609 dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
611 errs() << MF->getName();
612 errs() << ":%bb. " << MBB->getNumber();
613 errs() << " " << MBB->getName() << " \n";
614 }
615
616 // Schedule a region: possibly reorder instructions.
617 // This invalidates the original region iterators.
618 Scheduler.schedule();
619
620 // Close the current region.
621 Scheduler.exitRegion();
622 }
623 Scheduler.finishBlock();
624 // FIXME: Ideally, no further passes should rely on kill flags. However,
625 // thumb2 size reduction is currently an exception, so the PostMIScheduler
626 // needs to do this.
627 if (FixKillFlags)
628 Scheduler.fixupKills(*MBB);
629 }
630 Scheduler.finalizeSchedule();
631}
632
633void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
634 // unimplemented
635}
636
637#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
639 dbgs() << "Queue " << Name << ": ";
640 for (const SUnit *SU : Queue)
641 dbgs() << SU->NodeNum << " ";
642 dbgs() << "\n";
643}
644#endif
645
646//===----------------------------------------------------------------------===//
647// ScheduleDAGMI - Basic machine instruction scheduling. This is
648// independent of PreRA/PostRA scheduling and involves no extra book-keeping for
649// virtual registers.
650// ===----------------------------------------------------------------------===/
651
652// Provide a vtable anchor.
654
655/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
656/// NumPredsLeft reaches zero, release the successor node.
657///
658/// FIXME: Adjust SuccSU height based on MinLatency.
660 SUnit *SuccSU = SuccEdge->getSUnit();
661
662 if (SuccEdge->isWeak()) {
663 --SuccSU->WeakPredsLeft;
664 if (SuccEdge->isCluster())
665 NextClusterSucc = SuccSU;
666 return;
667 }
668#ifndef NDEBUG
669 if (SuccSU->NumPredsLeft == 0) {
670 dbgs() << "*** Scheduling failed! ***\n";
671 dumpNode(*SuccSU);
672 dbgs() << " has been released too many times!\n";
673 llvm_unreachable(nullptr);
674 }
675#endif
676 // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
677 // CurrCycle may have advanced since then.
678 if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
679 SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
680
681 --SuccSU->NumPredsLeft;
682 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
683 SchedImpl->releaseTopNode(SuccSU);
684}
685
686/// releaseSuccessors - Call releaseSucc on each of SU's successors.
688 for (SDep &Succ : SU->Succs)
689 releaseSucc(SU, &Succ);
690}
691
692/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
693/// NumSuccsLeft reaches zero, release the predecessor node.
694///
695/// FIXME: Adjust PredSU height based on MinLatency.
697 SUnit *PredSU = PredEdge->getSUnit();
698
699 if (PredEdge->isWeak()) {
700 --PredSU->WeakSuccsLeft;
701 if (PredEdge->isCluster())
702 NextClusterPred = PredSU;
703 return;
704 }
705#ifndef NDEBUG
706 if (PredSU->NumSuccsLeft == 0) {
707 dbgs() << "*** Scheduling failed! ***\n";
708 dumpNode(*PredSU);
709 dbgs() << " has been released too many times!\n";
710 llvm_unreachable(nullptr);
711 }
712#endif
713 // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
714 // CurrCycle may have advanced since then.
715 if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
716 PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
717
718 --PredSU->NumSuccsLeft;
719 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
720 SchedImpl->releaseBottomNode(PredSU);
721}
722
723/// releasePredecessors - Call releasePred on each of SU's predecessors.
725 for (SDep &Pred : SU->Preds)
726 releasePred(SU, &Pred);
727}
728
731 SchedImpl->enterMBB(bb);
732}
733
735 SchedImpl->leaveMBB();
737}
738
739/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
740/// crossing a scheduling boundary. [begin, end) includes all instructions in
741/// the region, including the boundary itself and single-instruction regions
742/// that don't get scheduled.
746 unsigned regioninstrs)
747{
748 ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
749
750 SchedImpl->initPolicy(begin, end, regioninstrs);
751}
752
753/// This is normally called from the main scheduler loop but may also be invoked
754/// by the scheduling strategy to perform additional code motion.
757 // Advance RegionBegin if the first instruction moves down.
758 if (&*RegionBegin == MI)
759 ++RegionBegin;
760
761 // Update the instruction stream.
762 BB->splice(InsertPos, BB, MI);
763
764 // Update LiveIntervals
765 if (LIS)
766 LIS->handleMove(*MI, /*UpdateFlags=*/true);
767
768 // Recede RegionBegin if an instruction moves above the first.
769 if (RegionBegin == InsertPos)
770 RegionBegin = MI;
771}
772
774#if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
775 if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
777 return false;
778 }
779 ++NumInstrsScheduled;
780#endif
781 return true;
782}
783
784/// Per-region scheduling driver, called back from
785/// MachineScheduler::runOnMachineFunction. This is a simplified driver that
786/// does not consider liveness or register pressure. It is useful for PostRA
787/// scheduling and potentially other custom schedulers.
789 LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
790 LLVM_DEBUG(SchedImpl->dumpPolicy());
791
792 // Build the DAG.
794
796
797 SmallVector<SUnit*, 8> TopRoots, BotRoots;
798 findRootsAndBiasEdges(TopRoots, BotRoots);
799
800 LLVM_DEBUG(dump());
801 if (PrintDAGs) dump();
803
804 // Initialize the strategy before modifying the DAG.
805 // This may initialize a DFSResult to be used for queue priority.
806 SchedImpl->initialize(this);
807
808 // Initialize ready queues now that the DAG and priority data are finalized.
809 initQueues(TopRoots, BotRoots);
810
811 bool IsTopNode = false;
812 while (true) {
813 LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
814 SUnit *SU = SchedImpl->pickNode(IsTopNode);
815 if (!SU) break;
816
817 assert(!SU->isScheduled && "Node already scheduled");
818 if (!checkSchedLimit())
819 break;
820
821 MachineInstr *MI = SU->getInstr();
822 if (IsTopNode) {
823 assert(SU->isTopReady() && "node still has unscheduled dependencies");
824 if (&*CurrentTop == MI)
826 else
828 } else {
829 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
832 if (&*priorII == MI)
833 CurrentBottom = priorII;
834 else {
835 if (&*CurrentTop == MI)
836 CurrentTop = nextIfDebug(++CurrentTop, priorII);
839 }
840 }
841 // Notify the scheduling strategy before updating the DAG.
842 // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
843 // runs, it can then use the accurate ReadyCycle time to determine whether
844 // newly released nodes can move to the readyQ.
845 SchedImpl->schedNode(SU, IsTopNode);
846
847 updateQueues(SU, IsTopNode);
848 }
849 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
850
852
853 LLVM_DEBUG({
854 dbgs() << "*** Final schedule for "
855 << printMBBReference(*begin()->getParent()) << " ***\n";
856 dumpSchedule();
857 dbgs() << '\n';
858 });
859}
860
861/// Apply each ScheduleDAGMutation step in order.
863 for (auto &m : Mutations)
864 m->apply(this);
865}
866
869 SmallVectorImpl<SUnit*> &BotRoots) {
870 for (SUnit &SU : SUnits) {
871 assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
872
873 // Order predecessors so DFSResult follows the critical path.
874 SU.biasCriticalPath();
875
876 // A SUnit is ready to top schedule if it has no predecessors.
877 if (!SU.NumPredsLeft)
878 TopRoots.push_back(&SU);
879 // A SUnit is ready to bottom schedule if it has no successors.
880 if (!SU.NumSuccsLeft)
881 BotRoots.push_back(&SU);
882 }
884}
885
886/// Identify DAG roots and setup scheduler queues.
888 ArrayRef<SUnit*> BotRoots) {
889 NextClusterSucc = nullptr;
890 NextClusterPred = nullptr;
891
892 // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
893 //
894 // Nodes with unreleased weak edges can still be roots.
895 // Release top roots in forward order.
896 for (SUnit *SU : TopRoots)
897 SchedImpl->releaseTopNode(SU);
898
899 // Release bottom roots in reverse order so the higher priority nodes appear
900 // first. This is more natural and slightly more efficient.
902 I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
903 SchedImpl->releaseBottomNode(*I);
904 }
905
908
909 SchedImpl->registerRoots();
910
911 // Advance past initial DebugValues.
914}
915
916/// Update scheduler queues after scheduling an instruction.
917void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
918 // Release dependent instructions for scheduling.
919 if (IsTopNode)
921 else
923
924 SU->isScheduled = true;
925}
926
927/// Reinsert any remaining debug_values, just like the PostRA scheduler.
929 // If first instruction was a DBG_VALUE then put it back.
930 if (FirstDbgValue) {
933 }
934
935 for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
936 DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
937 std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
938 MachineInstr *DbgValue = P.first;
939 MachineBasicBlock::iterator OrigPrevMI = P.second;
940 if (&*RegionBegin == DbgValue)
941 ++RegionBegin;
942 BB->splice(std::next(OrigPrevMI), BB, DbgValue);
943 if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
945 }
946}
947
948#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
949static const char *scheduleTableLegend = " i: issue\n x: resource booked";
950
952 // Nothing to show if there is no or just one instruction.
953 if (BB->size() < 2)
954 return;
955
956 dbgs() << " * Schedule table (TopDown):\n";
957 dbgs() << scheduleTableLegend << "\n";
958 const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;
959 unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;
960 for (MachineInstr &MI : *this) {
961 SUnit *SU = getSUnit(&MI);
962 if (!SU)
963 continue;
964 const MCSchedClassDesc *SC = getSchedClass(SU);
967 PI != PE; ++PI) {
968 if (SU->TopReadyCycle + PI->Cycles - 1 > LastCycle)
969 LastCycle = SU->TopReadyCycle + PI->Cycles - 1;
970 }
971 }
972 // Print the header with the cycles
974 for (unsigned C = FirstCycle; C <= LastCycle; ++C)
975 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
976 dbgs() << "|\n";
977
978 for (MachineInstr &MI : *this) {
979 SUnit *SU = getSUnit(&MI);
980 if (!SU) {
981 dbgs() << "Missing SUnit\n";
982 continue;
983 }
984 std::string NodeName("SU(");
985 NodeName += std::to_string(SU->NodeNum) + ")";
986 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
987 unsigned C = FirstCycle;
988 for (; C <= LastCycle; ++C) {
989 if (C == SU->TopReadyCycle)
990 dbgs() << llvm::left_justify("| i", ColWidth);
991 else
993 }
994 dbgs() << "|\n";
995 const MCSchedClassDesc *SC = getSchedClass(SU);
998 PI != PE; ++PI) {
999 C = FirstCycle;
1000 const std::string ResName =
1001 SchedModel.getResourceName(PI->ProcResourceIdx);
1002 dbgs() << llvm::left_justify(ResName, HeaderColWidth);
1003 for (; C < SU->TopReadyCycle; ++C) {
1004 dbgs() << llvm::left_justify("|", ColWidth);
1005 }
1006 for (unsigned i = 0; i < PI->Cycles; ++i, ++C)
1007 dbgs() << llvm::left_justify("| x", ColWidth);
1008 while (C++ <= LastCycle)
1009 dbgs() << llvm::left_justify("|", ColWidth);
1010 // Place end char
1011 dbgs() << "| \n";
1012 }
1013 }
1014}
1015
1017 // Nothing to show if there is no or just one instruction.
1018 if (BB->size() < 2)
1019 return;
1020
1021 dbgs() << " * Schedule table (BottomUp):\n";
1022 dbgs() << scheduleTableLegend << "\n";
1023
1024 const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;
1025 int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;
1026 for (MachineInstr &MI : *this) {
1027 SUnit *SU = getSUnit(&MI);
1028 if (!SU)
1029 continue;
1030 const MCSchedClassDesc *SC = getSchedClass(SU);
1033 PI != PE; ++PI) {
1034 if ((int)SU->BotReadyCycle - PI->Cycles + 1 < LastCycle)
1035 LastCycle = (int)SU->BotReadyCycle - PI->Cycles + 1;
1036 }
1037 }
1038 // Print the header with the cycles
1039 dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
1040 for (int C = FirstCycle; C >= LastCycle; --C)
1041 dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
1042 dbgs() << "|\n";
1043
1044 for (MachineInstr &MI : *this) {
1045 SUnit *SU = getSUnit(&MI);
1046 if (!SU) {
1047 dbgs() << "Missing SUnit\n";
1048 continue;
1049 }
1050 std::string NodeName("SU(");
1051 NodeName += std::to_string(SU->NodeNum) + ")";
1052 dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
1053 int C = FirstCycle;
1054 for (; C >= LastCycle; --C) {
1055 if (C == (int)SU->BotReadyCycle)
1056 dbgs() << llvm::left_justify("| i", ColWidth);
1057 else
1058 dbgs() << llvm::left_justify("|", ColWidth);
1059 }
1060 dbgs() << "|\n";
1061 const MCSchedClassDesc *SC = getSchedClass(SU);
1064 PI != PE; ++PI) {
1065 C = FirstCycle;
1066 const std::string ResName =
1067 SchedModel.getResourceName(PI->ProcResourceIdx);
1068 dbgs() << llvm::left_justify(ResName, HeaderColWidth);
1069 for (; C > (int)SU->BotReadyCycle; --C) {
1070 dbgs() << llvm::left_justify("|", ColWidth);
1071 }
1072 for (unsigned i = 0; i < PI->Cycles; ++i, --C)
1073 dbgs() << llvm::left_justify("| x", ColWidth);
1074 while (C-- >= LastCycle)
1075 dbgs() << llvm::left_justify("|", ColWidth);
1076 // Place end char
1077 dbgs() << "| \n";
1078 }
1079 }
1080}
1081#endif
1082
1083#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1086 if (ForceTopDown)
1088 else if (ForceBottomUp)
1090 else {
1091 dbgs() << "* Schedule table (Bidirectional): not implemented\n";
1092 }
1093 }
1094
1095 for (MachineInstr &MI : *this) {
1096 if (SUnit *SU = getSUnit(&MI))
1097 dumpNode(*SU);
1098 else
1099 dbgs() << "Missing SUnit\n";
1100 }
1101}
1102#endif
1103
1104//===----------------------------------------------------------------------===//
1105// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
1106// preservation.
1107//===----------------------------------------------------------------------===//
1108
1110 delete DFSResult;
1111}
1112
1114 const MachineInstr &MI = *SU.getInstr();
1115 for (const MachineOperand &MO : MI.operands()) {
1116 if (!MO.isReg())
1117 continue;
1118 if (!MO.readsReg())
1119 continue;
1120 if (TrackLaneMasks && !MO.isUse())
1121 continue;
1122
1123 Register Reg = MO.getReg();
1124 if (!Reg.isVirtual())
1125 continue;
1126
1127 // Ignore re-defs.
1128 if (TrackLaneMasks) {
1129 bool FoundDef = false;
1130 for (const MachineOperand &MO2 : MI.operands()) {
1131 if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
1132 FoundDef = true;
1133 break;
1134 }
1135 }
1136 if (FoundDef)
1137 continue;
1138 }
1139
1140 // Record this local VReg use.
1142 for (; UI != VRegUses.end(); ++UI) {
1143 if (UI->SU == &SU)
1144 break;
1145 }
1146 if (UI == VRegUses.end())
1148 }
1149}
1150
1151/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
1152/// crossing a scheduling boundary. [begin, end) includes all instructions in
1153/// the region, including the boundary itself and single-instruction regions
1154/// that don't get scheduled.
1158 unsigned regioninstrs)
1159{
1160 // ScheduleDAGMI initializes SchedImpl's per-region policy.
1161 ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
1162
1163 // For convenience remember the end of the liveness region.
1164 LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
1165
1167
1168 ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1169 ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1170
1172 "ShouldTrackLaneMasks requires ShouldTrackPressure");
1173}
1174
1175// Setup the register pressure trackers for the top scheduled and bottom
1176// scheduled regions.
1178 VRegUses.clear();
1180 for (SUnit &SU : SUnits)
1181 collectVRegUses(SU);
1182
1184 ShouldTrackLaneMasks, false);
1186 ShouldTrackLaneMasks, false);
1187
1188 // Close the RPTracker to finalize live ins.
1190
1192
1193 // Initialize the live ins and live outs.
1196
1197 // Close one end of the tracker so we can call
1198 // getMaxUpward/DownwardPressureDelta before advancing across any
1199 // instructions. This converts currently live regs into live ins/outs.
1202
1204 if (!BotRPTracker.getLiveThru().empty()) {
1206 LLVM_DEBUG(dbgs() << "Live Thru: ";
1208 };
1209
1210 // For each live out vreg reduce the pressure change associated with other
1211 // uses of the same vreg below the live-out reaching def.
1213
1214 // Account for liveness generated by the region boundary.
1215 if (LiveRegionEnd != RegionEnd) {
1217 BotRPTracker.recede(&LiveUses);
1218 updatePressureDiffs(LiveUses);
1219 }
1220
1221 LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1223 dbgs() << "Bottom Pressure:\n";
1225
1227 (RegionEnd->isDebugInstr() &&
1229 "Can't find the region bottom");
1230
1231 // Cache the list of excess pressure sets in this region. This will also track
1232 // the max pressure in the scheduled code for these sets.
1233 RegionCriticalPSets.clear();
1234 const std::vector<unsigned> &RegionPressure =
1236 for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1237 unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1238 if (RegionPressure[i] > Limit) {
1239 LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1240 << " Actual " << RegionPressure[i] << "\n");
1241 RegionCriticalPSets.push_back(PressureChange(i));
1242 }
1243 }
1244 LLVM_DEBUG(dbgs() << "Excess PSets: ";
1245 for (const PressureChange &RCPS
1247 << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1248 dbgs() << "\n");
1249}
1250
1253 const std::vector<unsigned> &NewMaxPressure) {
1254 const PressureDiff &PDiff = getPressureDiff(SU);
1255 unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1256 for (const PressureChange &PC : PDiff) {
1257 if (!PC.isValid())
1258 break;
1259 unsigned ID = PC.getPSet();
1260 while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1261 ++CritIdx;
1262 if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1263 if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1264 && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1265 RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1266 }
1267 unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1268 if (NewMaxPressure[ID] >= Limit - 2) {
1269 LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1270 << NewMaxPressure[ID]
1271 << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1272 << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1273 << " livethru)\n");
1274 }
1275 }
1276}
1277
1278/// Update the PressureDiff array for liveness after scheduling this
1279/// instruction.
1281 ArrayRef<RegisterMaskPair> LiveUses) {
1282 for (const RegisterMaskPair &P : LiveUses) {
1283 Register Reg = P.RegUnit;
1284 /// FIXME: Currently assuming single-use physregs.
1285 if (!Reg.isVirtual())
1286 continue;
1287
1289 // If the register has just become live then other uses won't change
1290 // this fact anymore => decrement pressure.
1291 // If the register has just become dead then other uses make it come
1292 // back to life => increment pressure.
1293 bool Decrement = P.LaneMask.any();
1294
1295 for (const VReg2SUnit &V2SU
1296 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1297 SUnit &SU = *V2SU.SU;
1298 if (SU.isScheduled || &SU == &ExitSU)
1299 continue;
1300
1301 PressureDiff &PDiff = getPressureDiff(&SU);
1302 PDiff.addPressureChange(Reg, Decrement, &MRI);
1303 LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1304 << printReg(Reg, TRI) << ':'
1305 << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1306 dbgs() << " to "; PDiff.dump(*TRI););
1307 }
1308 } else {
1309 assert(P.LaneMask.any());
1310 LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1311 // This may be called before CurrentBottom has been initialized. However,
1312 // BotRPTracker must have a valid position. We want the value live into the
1313 // instruction or live out of the block, so ask for the previous
1314 // instruction's live-out.
1315 const LiveInterval &LI = LIS->getInterval(Reg);
1316 VNInfo *VNI;
1319 if (I == BB->end())
1320 VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1321 else {
1323 VNI = LRQ.valueIn();
1324 }
1325 // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1326 assert(VNI && "No live value at use.");
1327 for (const VReg2SUnit &V2SU
1328 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1329 SUnit *SU = V2SU.SU;
1330 // If this use comes before the reaching def, it cannot be a last use,
1331 // so decrease its pressure change.
1332 if (!SU->isScheduled && SU != &ExitSU) {
1333 LiveQueryResult LRQ =
1335 if (LRQ.valueIn() == VNI) {
1336 PressureDiff &PDiff = getPressureDiff(SU);
1337 PDiff.addPressureChange(Reg, true, &MRI);
1338 LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1339 << *SU->getInstr();
1340 dbgs() << " to "; PDiff.dump(*TRI););
1341 }
1342 }
1343 }
1344 }
1345 }
1346}
1347
1349#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1350 if (EntrySU.getInstr() != nullptr)
1352 for (const SUnit &SU : SUnits) {
1353 dumpNodeAll(SU);
1354 if (ShouldTrackPressure) {
1355 dbgs() << " Pressure Diff : ";
1356 getPressureDiff(&SU).dump(*TRI);
1357 }
1358 dbgs() << " Single Issue : ";
1359 if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1360 SchedModel.mustEndGroup(SU.getInstr()))
1361 dbgs() << "true;";
1362 else
1363 dbgs() << "false;";
1364 dbgs() << '\n';
1365 }
1366 if (ExitSU.getInstr() != nullptr)
1368#endif
1369}
1370
1371/// schedule - Called back from MachineScheduler::runOnMachineFunction
1372/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1373/// only includes instructions that have DAG nodes, not scheduling boundaries.
1374///
1375/// This is a skeletal driver, with all the functionality pushed into helpers,
1376/// so that it can be easily extended by experimental schedulers. Generally,
1377/// implementing MachineSchedStrategy should be sufficient to implement a new
1378/// scheduling algorithm. However, if a scheduler further subclasses
1379/// ScheduleDAGMILive then it will want to override this virtual method in order
1380/// to update any specialized state.
1382 LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1383 LLVM_DEBUG(SchedImpl->dumpPolicy());
1385
1387
1388 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1389 findRootsAndBiasEdges(TopRoots, BotRoots);
1390
1391 // Initialize the strategy before modifying the DAG.
1392 // This may initialize a DFSResult to be used for queue priority.
1393 SchedImpl->initialize(this);
1394
1395 LLVM_DEBUG(dump());
1396 if (PrintDAGs) dump();
1398
1399 // Initialize ready queues now that the DAG and priority data are finalized.
1400 initQueues(TopRoots, BotRoots);
1401
1402 bool IsTopNode = false;
1403 while (true) {
1404 LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1405 SUnit *SU = SchedImpl->pickNode(IsTopNode);
1406 if (!SU) break;
1407
1408 assert(!SU->isScheduled && "Node already scheduled");
1409 if (!checkSchedLimit())
1410 break;
1411
1412 scheduleMI(SU, IsTopNode);
1413
1414 if (DFSResult) {
1415 unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1416 if (!ScheduledTrees.test(SubtreeID)) {
1417 ScheduledTrees.set(SubtreeID);
1418 DFSResult->scheduleTree(SubtreeID);
1419 SchedImpl->scheduleTree(SubtreeID);
1420 }
1421 }
1422
1423 // Notify the scheduling strategy after updating the DAG.
1424 SchedImpl->schedNode(SU, IsTopNode);
1425
1426 updateQueues(SU, IsTopNode);
1427 }
1428 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1429
1431
1432 LLVM_DEBUG({
1433 dbgs() << "*** Final schedule for "
1434 << printMBBReference(*begin()->getParent()) << " ***\n";
1435 dumpSchedule();
1436 dbgs() << '\n';
1437 });
1438}
1439
1440/// Build the DAG and setup three register pressure trackers.
1442 if (!ShouldTrackPressure) {
1443 RPTracker.reset();
1444 RegionCriticalPSets.clear();
1446 return;
1447 }
1448
1449 // Initialize the register pressure tracker used by buildSchedGraph.
1451 ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1452
1453 // Account for liveness generate by the region boundary.
1454 if (LiveRegionEnd != RegionEnd)
1455 RPTracker.recede();
1456
1457 // Build the DAG, and compute current register pressure.
1459
1460 // Initialize top/bottom trackers after computing region pressure.
1462}
1463
1465 if (!DFSResult)
1466 DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1467 DFSResult->clear();
1469 DFSResult->resize(SUnits.size());
1472}
1473
1474/// Compute the max cyclic critical path through the DAG. The scheduling DAG
1475/// only provides the critical path for single block loops. To handle loops that
1476/// span blocks, we could use the vreg path latencies provided by
1477/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1478/// available for use in the scheduler.
1479///
1480/// The cyclic path estimation identifies a def-use pair that crosses the back
1481/// edge and considers the depth and height of the nodes. For example, consider
1482/// the following instruction sequence where each instruction has unit latency
1483/// and defines an eponymous virtual register:
1484///
1485/// a->b(a,c)->c(b)->d(c)->exit
1486///
1487/// The cyclic critical path is a two cycles: b->c->b
1488/// The acyclic critical path is four cycles: a->b->c->d->exit
1489/// LiveOutHeight = height(c) = len(c->d->exit) = 2
1490/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1491/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1492/// LiveInDepth = depth(b) = len(a->b) = 1
1493///
1494/// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1495/// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1496/// CyclicCriticalPath = min(2, 2) = 2
1497///
1498/// This could be relevant to PostRA scheduling, but is currently implemented
1499/// assuming LiveIntervals.
1501 // This only applies to single block loop.
1502 if (!BB->isSuccessor(BB))
1503 return 0;
1504
1505 unsigned MaxCyclicLatency = 0;
1506 // Visit each live out vreg def to find def/use pairs that cross iterations.
1508 Register Reg = P.RegUnit;
1509 if (!Reg.isVirtual())
1510 continue;
1511 const LiveInterval &LI = LIS->getInterval(Reg);
1512 const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1513 if (!DefVNI)
1514 continue;
1515
1517 const SUnit *DefSU = getSUnit(DefMI);
1518 if (!DefSU)
1519 continue;
1520
1521 unsigned LiveOutHeight = DefSU->getHeight();
1522 unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1523 // Visit all local users of the vreg def.
1524 for (const VReg2SUnit &V2SU
1525 : make_range(VRegUses.find(Reg), VRegUses.end())) {
1526 SUnit *SU = V2SU.SU;
1527 if (SU == &ExitSU)
1528 continue;
1529
1530 // Only consider uses of the phi.
1532 if (!LRQ.valueIn()->isPHIDef())
1533 continue;
1534
1535 // Assume that a path spanning two iterations is a cycle, which could
1536 // overestimate in strange cases. This allows cyclic latency to be
1537 // estimated as the minimum slack of the vreg's depth or height.
1538 unsigned CyclicLatency = 0;
1539 if (LiveOutDepth > SU->getDepth())
1540 CyclicLatency = LiveOutDepth - SU->getDepth();
1541
1542 unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1543 if (LiveInHeight > LiveOutHeight) {
1544 if (LiveInHeight - LiveOutHeight < CyclicLatency)
1545 CyclicLatency = LiveInHeight - LiveOutHeight;
1546 } else
1547 CyclicLatency = 0;
1548
1549 LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1550 << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1551 if (CyclicLatency > MaxCyclicLatency)
1552 MaxCyclicLatency = CyclicLatency;
1553 }
1554 }
1555 LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1556 return MaxCyclicLatency;
1557}
1558
1559/// Release ExitSU predecessors and setup scheduler queues. Re-position
1560/// the Top RP tracker in case the region beginning has changed.
1562 ArrayRef<SUnit*> BotRoots) {
1563 ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1564 if (ShouldTrackPressure) {
1565 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1567 }
1568}
1569
1570/// Move an instruction and update register pressure.
1571void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1572 // Move the instruction to its new location in the instruction stream.
1573 MachineInstr *MI = SU->getInstr();
1574
1575 if (IsTopNode) {
1576 assert(SU->isTopReady() && "node still has unscheduled dependencies");
1577 if (&*CurrentTop == MI)
1579 else {
1582 }
1583
1584 if (ShouldTrackPressure) {
1585 // Update top scheduled pressure.
1586 RegisterOperands RegOpers;
1587 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1589 // Adjust liveness and add missing dead+read-undef flags.
1590 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1591 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1592 } else {
1593 // Adjust for missing dead-def flags.
1594 RegOpers.detectDeadDefs(*MI, *LIS);
1595 }
1596
1597 TopRPTracker.advance(RegOpers);
1598 assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1599 LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1601
1603 }
1604 } else {
1605 assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1608 if (&*priorII == MI)
1609 CurrentBottom = priorII;
1610 else {
1611 if (&*CurrentTop == MI) {
1612 CurrentTop = nextIfDebug(++CurrentTop, priorII);
1614 }
1616 CurrentBottom = MI;
1618 }
1619 if (ShouldTrackPressure) {
1620 RegisterOperands RegOpers;
1621 RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1623 // Adjust liveness and add missing dead+read-undef flags.
1624 SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1625 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1626 } else {
1627 // Adjust for missing dead-def flags.
1628 RegOpers.detectDeadDefs(*MI, *LIS);
1629 }
1630
1634 BotRPTracker.recede(RegOpers, &LiveUses);
1635 assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1636 LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1638
1640 updatePressureDiffs(LiveUses);
1641 }
1642 }
1643}
1644
1645//===----------------------------------------------------------------------===//
1646// BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1647//===----------------------------------------------------------------------===//
1648
1649namespace {
1650
1651/// Post-process the DAG to create cluster edges between neighboring
1652/// loads or between neighboring stores.
1653class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1654 struct MemOpInfo {
1655 SUnit *SU;
1657 int64_t Offset;
1658 unsigned Width;
1659
1660 MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1661 int64_t Offset, unsigned Width)
1662 : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
1663 Width(Width) {}
1664
1665 static bool Compare(const MachineOperand *const &A,
1666 const MachineOperand *const &B) {
1667 if (A->getType() != B->getType())
1668 return A->getType() < B->getType();
1669 if (A->isReg())
1670 return A->getReg() < B->getReg();
1671 if (A->isFI()) {
1672 const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1674 bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1676 return StackGrowsDown ? A->getIndex() > B->getIndex()
1677 : A->getIndex() < B->getIndex();
1678 }
1679
1680 llvm_unreachable("MemOpClusterMutation only supports register or frame "
1681 "index bases.");
1682 }
1683
1684 bool operator<(const MemOpInfo &RHS) const {
1685 // FIXME: Don't compare everything twice. Maybe use C++20 three way
1686 // comparison instead when it's available.
1687 if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
1688 RHS.BaseOps.begin(), RHS.BaseOps.end(),
1689 Compare))
1690 return true;
1691 if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
1692 BaseOps.begin(), BaseOps.end(), Compare))
1693 return false;
1694 if (Offset != RHS.Offset)
1695 return Offset < RHS.Offset;
1696 return SU->NodeNum < RHS.SU->NodeNum;
1697 }
1698 };
1699
1700 const TargetInstrInfo *TII;
1701 const TargetRegisterInfo *TRI;
1702 bool IsLoad;
1703
1704public:
1705 BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1706 const TargetRegisterInfo *tri, bool IsLoad)
1707 : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1708
1709 void apply(ScheduleDAGInstrs *DAGInstrs) override;
1710
1711protected:
1712 void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
1713 ScheduleDAGInstrs *DAG);
1714 void collectMemOpRecords(std::vector<SUnit> &SUnits,
1715 SmallVectorImpl<MemOpInfo> &MemOpRecords);
1716 bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1718};
1719
1720class StoreClusterMutation : public BaseMemOpClusterMutation {
1721public:
1722 StoreClusterMutation(const TargetInstrInfo *tii,
1723 const TargetRegisterInfo *tri)
1724 : BaseMemOpClusterMutation(tii, tri, false) {}
1725};
1726
1727class LoadClusterMutation : public BaseMemOpClusterMutation {
1728public:
1729 LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1730 : BaseMemOpClusterMutation(tii, tri, true) {}
1731};
1732
1733} // end anonymous namespace
1734
1735namespace llvm {
1736
1737std::unique_ptr<ScheduleDAGMutation>
1739 const TargetRegisterInfo *TRI) {
1740 return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
1741 : nullptr;
1742}
1743
1744std::unique_ptr<ScheduleDAGMutation>
1746 const TargetRegisterInfo *TRI) {
1747 return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
1748 : nullptr;
1749}
1750
1751} // end namespace llvm
1752
1753// Sorting all the loads/stores first, then for each load/store, checking the
1754// following load/store one by one, until reach the first non-dependent one and
1755// call target hook to see if they can cluster.
1756// If FastCluster is enabled, we assume that, all the loads/stores have been
1757// preprocessed and now, they didn't have dependencies on each other.
1758void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1759 ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
1760 ScheduleDAGInstrs *DAG) {
1761 // Keep track of the current cluster length and bytes for each SUnit.
1763
1764 // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
1765 // cluster mem ops collected within `MemOpRecords` array.
1766 for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1767 // Decision to cluster mem ops is taken based on target dependent logic
1768 auto MemOpa = MemOpRecords[Idx];
1769
1770 // Seek for the next load/store to do the cluster.
1771 unsigned NextIdx = Idx + 1;
1772 for (; NextIdx < End; ++NextIdx)
1773 // Skip if MemOpb has been clustered already or has dependency with
1774 // MemOpa.
1775 if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
1776 (FastCluster ||
1777 (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
1778 !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
1779 break;
1780 if (NextIdx == End)
1781 continue;
1782
1783 auto MemOpb = MemOpRecords[NextIdx];
1784 unsigned ClusterLength = 2;
1785 unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width;
1786 if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
1787 ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
1788 CurrentClusterBytes =
1789 SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
1790 }
1791
1792 if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
1793 CurrentClusterBytes))
1794 continue;
1795
1796 SUnit *SUa = MemOpa.SU;
1797 SUnit *SUb = MemOpb.SU;
1798 if (SUa->NodeNum > SUb->NodeNum)
1799 std::swap(SUa, SUb);
1800
1801 // FIXME: Is this check really required?
1802 if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
1803 continue;
1804
1805 LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1806 << SUb->NodeNum << ")\n");
1807 ++NumClustered;
1808
1809 if (IsLoad) {
1810 // Copy successor edges from SUa to SUb. Interleaving computation
1811 // dependent on SUa can prevent load combining due to register reuse.
1812 // Predecessor edges do not need to be copied from SUb to SUa since
1813 // nearby loads should have effectively the same inputs.
1814 for (const SDep &Succ : SUa->Succs) {
1815 if (Succ.getSUnit() == SUb)
1816 continue;
1817 LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1818 << ")\n");
1819 DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1820 }
1821 } else {
1822 // Copy predecessor edges from SUb to SUa to avoid the SUnits that
1823 // SUb dependent on scheduled in-between SUb and SUa. Successor edges
1824 // do not need to be copied from SUa to SUb since no one will depend
1825 // on stores.
1826 // Notice that, we don't need to care about the memory dependency as
1827 // we won't try to cluster them if they have any memory dependency.
1828 for (const SDep &Pred : SUb->Preds) {
1829 if (Pred.getSUnit() == SUa)
1830 continue;
1831 LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
1832 << ")\n");
1833 DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
1834 }
1835 }
1836
1837 SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
1838 CurrentClusterBytes};
1839
1840 LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
1841 << ", Curr cluster bytes: " << CurrentClusterBytes
1842 << "\n");
1843 }
1844}
1845
1846void BaseMemOpClusterMutation::collectMemOpRecords(
1847 std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
1848 for (auto &SU : SUnits) {
1849 if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1850 (!IsLoad && !SU.getInstr()->mayStore()))
1851 continue;
1852
1853 const MachineInstr &MI = *SU.getInstr();
1855 int64_t Offset;
1856 bool OffsetIsScalable;
1857 unsigned Width;
1859 OffsetIsScalable, Width, TRI)) {
1860 MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width));
1861
1862 LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
1863 << Offset << ", OffsetIsScalable: " << OffsetIsScalable
1864 << ", Width: " << Width << "\n");
1865 }
1866#ifndef NDEBUG
1867 for (const auto *Op : BaseOps)
1868 assert(Op);
1869#endif
1870 }
1871}
1872
1873bool BaseMemOpClusterMutation::groupMemOps(
1876 bool FastCluster =
1878 MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
1879
1880 for (const auto &MemOp : MemOps) {
1881 unsigned ChainPredID = DAG->SUnits.size();
1882 if (FastCluster) {
1883 for (const SDep &Pred : MemOp.SU->Preds) {
1884 // We only want to cluster the mem ops that have the same ctrl(non-data)
1885 // pred so that they didn't have ctrl dependency for each other. But for
1886 // store instrs, we can still cluster them if the pred is load instr.
1887 if ((Pred.isCtrl() &&
1888 (IsLoad ||
1889 (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
1890 !Pred.isArtificial()) {
1891 ChainPredID = Pred.getSUnit()->NodeNum;
1892 break;
1893 }
1894 }
1895 } else
1896 ChainPredID = 0;
1897
1898 Groups[ChainPredID].push_back(MemOp);
1899 }
1900 return FastCluster;
1901}
1902
1903/// Callback from DAG postProcessing to create cluster edges for loads/stores.
1904void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
1905 // Collect all the clusterable loads/stores
1906 SmallVector<MemOpInfo, 32> MemOpRecords;
1907 collectMemOpRecords(DAG->SUnits, MemOpRecords);
1908
1909 if (MemOpRecords.size() < 2)
1910 return;
1911
1912 // Put the loads/stores without dependency into the same group with some
1913 // heuristic if the DAG is too complex to avoid compiling time blow up.
1914 // Notice that, some fusion pair could be lost with this.
1916 bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
1917
1918 for (auto &Group : Groups) {
1919 // Sorting the loads/stores, so that, we can stop the cluster as early as
1920 // possible.
1921 llvm::sort(Group.second);
1922
1923 // Trying to cluster all the neighboring loads/stores.
1924 clusterNeighboringMemOps(Group.second, FastCluster, DAG);
1925 }
1926}
1927
1928//===----------------------------------------------------------------------===//
1929// CopyConstrain - DAG post-processing to encourage copy elimination.
1930//===----------------------------------------------------------------------===//
1931
1932namespace {
1933
1934/// Post-process the DAG to create weak edges from all uses of a copy to
1935/// the one use that defines the copy's source vreg, most likely an induction
1936/// variable increment.
1937class CopyConstrain : public ScheduleDAGMutation {
1938 // Transient state.
1939 SlotIndex RegionBeginIdx;
1940
1941 // RegionEndIdx is the slot index of the last non-debug instruction in the
1942 // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1943 SlotIndex RegionEndIdx;
1944
1945public:
1946 CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1947
1948 void apply(ScheduleDAGInstrs *DAGInstrs) override;
1949
1950protected:
1951 void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1952};
1953
1954} // end anonymous namespace
1955
1956namespace llvm {
1957
1958std::unique_ptr<ScheduleDAGMutation>
1960 const TargetRegisterInfo *TRI) {
1961 return std::make_unique<CopyConstrain>(TII, TRI);
1962}
1963
1964} // end namespace llvm
1965
1966/// constrainLocalCopy handles two possibilities:
1967/// 1) Local src:
1968/// I0: = dst
1969/// I1: src = ...
1970/// I2: = dst
1971/// I3: dst = src (copy)
1972/// (create pred->succ edges I0->I1, I2->I1)
1973///
1974/// 2) Local copy:
1975/// I0: dst = src (copy)
1976/// I1: = dst
1977/// I2: src = ...
1978/// I3: = dst
1979/// (create pred->succ edges I1->I2, I3->I2)
1980///
1981/// Although the MachineScheduler is currently constrained to single blocks,
1982/// this algorithm should handle extended blocks. An EBB is a set of
1983/// contiguously numbered blocks such that the previous block in the EBB is
1984/// always the single predecessor.
1985void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1986 LiveIntervals *LIS = DAG->getLIS();
1987 MachineInstr *Copy = CopySU->getInstr();
1988
1989 // Check for pure vreg copies.
1990 const MachineOperand &SrcOp = Copy->getOperand(1);
1991 Register SrcReg = SrcOp.getReg();
1992 if (!SrcReg.isVirtual() || !SrcOp.readsReg())
1993 return;
1994
1995 const MachineOperand &DstOp = Copy->getOperand(0);
1996 Register DstReg = DstOp.getReg();
1997 if (!DstReg.isVirtual() || DstOp.isDead())
1998 return;
1999
2000 // Check if either the dest or source is local. If it's live across a back
2001 // edge, it's not local. Note that if both vregs are live across the back
2002 // edge, we cannot successfully contrain the copy without cyclic scheduling.
2003 // If both the copy's source and dest are local live intervals, then we
2004 // should treat the dest as the global for the purpose of adding
2005 // constraints. This adds edges from source's other uses to the copy.
2006 unsigned LocalReg = SrcReg;
2007 unsigned GlobalReg = DstReg;
2008 LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
2009 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
2010 LocalReg = DstReg;
2011 GlobalReg = SrcReg;
2012 LocalLI = &LIS->getInterval(LocalReg);
2013 if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
2014 return;
2015 }
2016 LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
2017
2018 // Find the global segment after the start of the local LI.
2019 LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
2020 // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
2021 // local live range. We could create edges from other global uses to the local
2022 // start, but the coalescer should have already eliminated these cases, so
2023 // don't bother dealing with it.
2024 if (GlobalSegment == GlobalLI->end())
2025 return;
2026
2027 // If GlobalSegment is killed at the LocalLI->start, the call to find()
2028 // returned the next global segment. But if GlobalSegment overlaps with
2029 // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
2030 // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
2031 if (GlobalSegment->contains(LocalLI->beginIndex()))
2032 ++GlobalSegment;
2033
2034 if (GlobalSegment == GlobalLI->end())
2035 return;
2036
2037 // Check if GlobalLI contains a hole in the vicinity of LocalLI.
2038 if (GlobalSegment != GlobalLI->begin()) {
2039 // Two address defs have no hole.
2040 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
2041 GlobalSegment->start)) {
2042 return;
2043 }
2044 // If the prior global segment may be defined by the same two-address
2045 // instruction that also defines LocalLI, then can't make a hole here.
2046 if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
2047 LocalLI->beginIndex())) {
2048 return;
2049 }
2050 // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
2051 // it would be a disconnected component in the live range.
2052 assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
2053 "Disconnected LRG within the scheduling region.");
2054 }
2055 MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
2056 if (!GlobalDef)
2057 return;
2058
2059 SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
2060 if (!GlobalSU)
2061 return;
2062
2063 // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
2064 // constraining the uses of the last local def to precede GlobalDef.
2065 SmallVector<SUnit*,8> LocalUses;
2066 const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
2067 MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
2068 SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
2069 for (const SDep &Succ : LastLocalSU->Succs) {
2070 if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
2071 continue;
2072 if (Succ.getSUnit() == GlobalSU)
2073 continue;
2074 if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
2075 return;
2076 LocalUses.push_back(Succ.getSUnit());
2077 }
2078 // Open the top of the GlobalLI hole by constraining any earlier global uses
2079 // to precede the start of LocalLI.
2080 SmallVector<SUnit*,8> GlobalUses;
2081 MachineInstr *FirstLocalDef =
2082 LIS->getInstructionFromIndex(LocalLI->beginIndex());
2083 SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
2084 for (const SDep &Pred : GlobalSU->Preds) {
2085 if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
2086 continue;
2087 if (Pred.getSUnit() == FirstLocalSU)
2088 continue;
2089 if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
2090 return;
2091 GlobalUses.push_back(Pred.getSUnit());
2092 }
2093 LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
2094 // Add the weak edges.
2095 for (SUnit *LU : LocalUses) {
2096 LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
2097 << GlobalSU->NodeNum << ")\n");
2098 DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
2099 }
2100 for (SUnit *GU : GlobalUses) {
2101 LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
2102 << FirstLocalSU->NodeNum << ")\n");
2103 DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
2104 }
2105}
2106
2107/// Callback from DAG postProcessing to create weak edges to encourage
2108/// copy elimination.
2109void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
2110 ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
2111 assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
2112
2113 MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
2114 if (FirstPos == DAG->end())
2115 return;
2116 RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
2117 RegionEndIdx = DAG->getLIS()->getInstructionIndex(
2118 *priorNonDebug(DAG->end(), DAG->begin()));
2119
2120 for (SUnit &SU : DAG->SUnits) {
2121 if (!SU.getInstr()->isCopy())
2122 continue;
2123
2124 constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
2125 }
2126}
2127
2128//===----------------------------------------------------------------------===//
2129// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
2130// and possibly other custom schedulers.
2131//===----------------------------------------------------------------------===//
2132
2133static const unsigned InvalidCycle = ~0U;
2134
2136
2137/// Given a Count of resource usage and a Latency value, return true if a
2138/// SchedBoundary becomes resource limited.
2139/// If we are checking after scheduling a node, we should return true when
2140/// we just reach the resource limit.
2141static bool checkResourceLimit(unsigned LFactor, unsigned Count,
2142 unsigned Latency, bool AfterSchedNode) {
2143 int ResCntFactor = (int)(Count - (Latency * LFactor));
2144 if (AfterSchedNode)
2145 return ResCntFactor >= (int)LFactor;
2146 else
2147 return ResCntFactor > (int)LFactor;
2148}
2149
2151 // A new HazardRec is created for each DAG and owned by SchedBoundary.
2152 // Destroying and reconstructing it is very expensive though. So keep
2153 // invalid, placeholder HazardRecs.
2154 if (HazardRec && HazardRec->isEnabled()) {
2155 delete HazardRec;
2156 HazardRec = nullptr;
2157 }
2158 Available.clear();
2159 Pending.clear();
2160 CheckPending = false;
2161 CurrCycle = 0;
2162 CurrMOps = 0;
2163 MinReadyCycle = std::numeric_limits<unsigned>::max();
2164 ExpectedLatency = 0;
2165 DependentLatency = 0;
2166 RetiredMOps = 0;
2167 MaxExecutedResCount = 0;
2168 ZoneCritResIdx = 0;
2169 IsResourceLimited = false;
2170 ReservedCycles.clear();
2171 ReservedCyclesIndex.clear();
2172 ResourceGroupSubUnitMasks.clear();
2173#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2174 // Track the maximum number of stall cycles that could arise either from the
2175 // latency of a DAG edge or the number of cycles that a processor resource is
2176 // reserved (SchedBoundary::ReservedCycles).
2177 MaxObservedStall = 0;
2178#endif
2179 // Reserve a zero-count for invalid CritResIdx.
2180 ExecutedResCounts.resize(1);
2181 assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2182}
2183
2185init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
2186 reset();
2187 if (!SchedModel->hasInstrSchedModel())
2188 return;
2190 for (SUnit &SU : DAG->SUnits) {
2191 const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
2192 RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
2193 * SchedModel->getMicroOpFactor();
2195 PI = SchedModel->getWriteProcResBegin(SC),
2196 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2197 unsigned PIdx = PI->ProcResourceIdx;
2198 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2199 RemainingCounts[PIdx] += (Factor * PI->Cycles);
2200 }
2201 }
2202}
2203
2205init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
2206 reset();
2207 DAG = dag;
2208 SchedModel = smodel;
2209 Rem = rem;
2211 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2212 ReservedCyclesIndex.resize(ResourceCount);
2213 ExecutedResCounts.resize(ResourceCount);
2214 ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
2215 unsigned NumUnits = 0;
2216
2217 for (unsigned i = 0; i < ResourceCount; ++i) {
2218 ReservedCyclesIndex[i] = NumUnits;
2219 NumUnits += SchedModel->getProcResource(i)->NumUnits;
2220 if (isUnbufferedGroup(i)) {
2221 auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
2222 for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
2223 U != UE; ++U)
2224 ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2225 }
2226 }
2227
2228 ReservedCycles.resize(NumUnits, InvalidCycle);
2229 }
2230}
2231
2232/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2233/// these "soft stalls" differently than the hard stall cycles based on CPU
2234/// resources and computed by checkHazard(). A fully in-order model
2235/// (MicroOpBufferSize==0) will not make use of this since instructions are not
2236/// available for scheduling until they are ready. However, a weaker in-order
2237/// model may use this for heuristics. For example, if a processor has in-order
2238/// behavior when reading certain resources, this may come into play.
2240 if (!SU->isUnbuffered)
2241 return 0;
2242
2243 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2244 if (ReadyCycle > CurrCycle)
2245 return ReadyCycle - CurrCycle;
2246 return 0;
2247}
2248
2249/// Compute the next cycle at which the given processor resource unit
2250/// can be scheduled.
2252 unsigned Cycles) {
2253 unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2254 // If this resource has never been used, always return cycle zero.
2255 if (NextUnreserved == InvalidCycle)
2256 return 0;
2257 // For bottom-up scheduling add the cycles needed for the current operation.
2258 if (!isTop())
2259 NextUnreserved += Cycles;
2260 return NextUnreserved;
2261}
2262
2263/// Compute the next cycle at which the given processor resource can be
2264/// scheduled. Returns the next cycle and the index of the processor resource
2265/// instance in the reserved cycles vector.
2266std::pair<unsigned, unsigned>
2268 unsigned Cycles) {
2269
2270 unsigned MinNextUnreserved = InvalidCycle;
2271 unsigned InstanceIdx = 0;
2272 unsigned StartIndex = ReservedCyclesIndex[PIdx];
2273 unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2274 assert(NumberOfInstances > 0 &&
2275 "Cannot have zero instances of a ProcResource");
2276
2277 if (isUnbufferedGroup(PIdx)) {
2278 // If any subunits are used by the instruction, report that the resource
2279 // group is available at 0, effectively removing the group record from
2280 // hazarding and basing the hazarding decisions on the subunit records.
2281 // Otherwise, choose the first available instance from among the subunits.
2282 // Specifications which assign cycles to both the subunits and the group or
2283 // which use an unbuffered group with buffered subunits will appear to
2284 // schedule strangely. In the first case, the additional cycles for the
2285 // group will be ignored. In the second, the group will be ignored
2286 // entirely.
2287 for (const MCWriteProcResEntry &PE :
2290 if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2291 return std::make_pair(0u, StartIndex);
2292
2293 auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2294 for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2295 unsigned NextUnreserved, NextInstanceIdx;
2296 std::tie(NextUnreserved, NextInstanceIdx) =
2297 getNextResourceCycle(SC, SubUnits[I], Cycles);
2298 if (MinNextUnreserved > NextUnreserved) {
2299 InstanceIdx = NextInstanceIdx;
2300 MinNextUnreserved = NextUnreserved;
2301 }
2302 }
2303 return std::make_pair(MinNextUnreserved, InstanceIdx);
2304 }
2305
2306 for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2307 ++I) {
2308 unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
2309 if (MinNextUnreserved > NextUnreserved) {
2310 InstanceIdx = I;
2311 MinNextUnreserved = NextUnreserved;
2312 }
2313 }
2314 return std::make_pair(MinNextUnreserved, InstanceIdx);
2315}
2316
2317/// Does this SU have a hazard within the current instruction group.
2318///
2319/// The scheduler supports two modes of hazard recognition. The first is the
2320/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2321/// supports highly complicated in-order reservation tables
2322/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2323///
2324/// The second is a streamlined mechanism that checks for hazards based on
2325/// simple counters that the scheduler itself maintains. It explicitly checks
2326/// for instruction dispatch limitations, including the number of micro-ops that
2327/// can dispatch per cycle.
2328///
2329/// TODO: Also check whether the SU must start a new group.
2331 if (HazardRec->isEnabled()
2333 return true;
2334 }
2335
2336 unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2337 if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2338 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
2339 << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2340 return true;
2341 }
2342
2343 if (CurrMOps > 0 &&
2344 ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2345 (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2346 LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
2347 << (isTop() ? "begin" : "end") << " group\n");
2348 return true;
2349 }
2350
2352 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2353 for (const MCWriteProcResEntry &PE :
2356 unsigned ResIdx = PE.ProcResourceIdx;
2357 unsigned Cycles = PE.Cycles;
2358 unsigned NRCycle, InstanceIdx;
2359 std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles);
2360 if (NRCycle > CurrCycle) {
2361#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2362 MaxObservedStall = std::max(Cycles, MaxObservedStall);
2363#endif
2364 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2365 << SchedModel->getResourceName(ResIdx)
2366 << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
2367 << "=" << NRCycle << "c\n");
2368 return true;
2369 }
2370 }
2371 }
2372 return false;
2373}
2374
2375// Find the unscheduled node in ReadySUs with the highest latency.
2378 SUnit *LateSU = nullptr;
2379 unsigned RemLatency = 0;
2380 for (SUnit *SU : ReadySUs) {
2381 unsigned L = getUnscheduledLatency(SU);
2382 if (L > RemLatency) {
2383 RemLatency = L;
2384 LateSU = SU;
2385 }
2386 }
2387 if (LateSU) {
2388 LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2389 << LateSU->NodeNum << ") " << RemLatency << "c\n");
2390 }
2391 return RemLatency;
2392}
2393
2394// Count resources in this zone and the remaining unscheduled
2395// instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2396// resource index, or zero if the zone is issue limited.
2398getOtherResourceCount(unsigned &OtherCritIdx) {
2399 OtherCritIdx = 0;
2401 return 0;
2402
2403 unsigned OtherCritCount = Rem->RemIssueCount
2404 + (RetiredMOps * SchedModel->getMicroOpFactor());
2405 LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2406 << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2407 for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2408 PIdx != PEnd; ++PIdx) {
2409 unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2410 if (OtherCount > OtherCritCount) {
2411 OtherCritCount = OtherCount;
2412 OtherCritIdx = PIdx;
2413 }
2414 }
2415 if (OtherCritIdx) {
2416 LLVM_DEBUG(
2417 dbgs() << " " << Available.getName() << " + Remain CritRes: "
2418 << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2419 << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2420 }
2421 return OtherCritCount;
2422}
2423
2424void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2425 unsigned Idx) {
2426 assert(SU->getInstr() && "Scheduled SUnit must have instr");
2427
2428#if LLVM_ENABLE_ABI_BREAKING_CHECKS
2429 // ReadyCycle was been bumped up to the CurrCycle when this node was
2430 // scheduled, but CurrCycle may have been eagerly advanced immediately after
2431 // scheduling, so may now be greater than ReadyCycle.
2432 if (ReadyCycle > CurrCycle)
2433 MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2434#endif
2435
2436 if (ReadyCycle < MinReadyCycle)
2437 MinReadyCycle = ReadyCycle;
2438
2439 // Check for interlocks first. For the purpose of other heuristics, an
2440 // instruction that cannot issue appears as if it's not in the ReadyQueue.
2441 bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2442 bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2444
2445 if (!HazardDetected) {
2446 Available.push(SU);
2447
2448 if (InPQueue)
2450 return;
2451 }
2452
2453 if (!InPQueue)
2454 Pending.push(SU);
2455}
2456
2457/// Move the boundary of scheduled code by one cycle.
2458void SchedBoundary::bumpCycle(unsigned NextCycle) {
2459 if (SchedModel->getMicroOpBufferSize() == 0) {
2460 assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2461 "MinReadyCycle uninitialized");
2462 if (MinReadyCycle > NextCycle)
2463 NextCycle = MinReadyCycle;
2464 }
2465 // Update the current micro-ops, which will issue in the next cycle.
2466 unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2467 CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2468
2469 // Decrement DependentLatency based on the next cycle.
2470 if ((NextCycle - CurrCycle) > DependentLatency)
2471 DependentLatency = 0;
2472 else
2473 DependentLatency -= (NextCycle - CurrCycle);
2474
2475 if (!HazardRec->isEnabled()) {
2476 // Bypass HazardRec virtual calls.
2477 CurrCycle = NextCycle;
2478 } else {
2479 // Bypass getHazardType calls in case of long latency.
2480 for (; CurrCycle != NextCycle; ++CurrCycle) {
2481 if (isTop())
2483 else
2485 }
2486 }
2487 CheckPending = true;
2488 IsResourceLimited =
2490 getScheduledLatency(), true);
2491
2492 LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2493 << '\n');
2494}
2495
2496void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2497 ExecutedResCounts[PIdx] += Count;
2498 if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2499 MaxExecutedResCount = ExecutedResCounts[PIdx];
2500}
2501
2502/// Add the given processor resource to this scheduled zone.
2503///
2504/// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2505/// during which this resource is consumed.
2506///
2507/// \return the next cycle at which the instruction may execute without
2508/// oversubscribing resources.
2509unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
2510 unsigned Cycles, unsigned NextCycle) {
2511 unsigned Factor = SchedModel->getResourceFactor(PIdx);
2512 unsigned Count = Factor * Cycles;
2513 LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2514 << Cycles << "x" << Factor << "u\n");
2515
2516 // Update Executed resources counts.
2517 incExecutedResources(PIdx, Count);
2518 assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2519 Rem->RemainingCounts[PIdx] -= Count;
2520
2521 // Check if this resource exceeds the current critical resource. If so, it
2522 // becomes the critical resource.
2523 if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2524 ZoneCritResIdx = PIdx;
2525 LLVM_DEBUG(dbgs() << " *** Critical resource "
2526 << SchedModel->getResourceName(PIdx) << ": "
2528 << "c\n");
2529 }
2530 // For reserved resources, record the highest cycle using the resource.
2531 unsigned NextAvailable, InstanceIdx;
2532 std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(SC, PIdx, Cycles);
2533 if (NextAvailable > CurrCycle) {
2534 LLVM_DEBUG(dbgs() << " Resource conflict: "
2535 << SchedModel->getResourceName(PIdx)
2536 << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2537 << " reserved until @" << NextAvailable << "\n");
2538 }
2539 return NextAvailable;
2540}
2541
2542/// Move the boundary of scheduled code by one SUnit.
2544 // Update the reservation table.
2545 if (HazardRec->isEnabled()) {
2546 if (!isTop() && SU->isCall) {
2547 // Calls are scheduled with their preceding instructions. For bottom-up
2548 // scheduling, clear the pipeline state before emitting.
2549 HazardRec->Reset();
2550 }
2552 // Scheduling an instruction may have made pending instructions available.
2553 CheckPending = true;
2554 }
2555 // checkHazard should prevent scheduling multiple instructions per cycle that
2556 // exceed the issue width.
2557 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2558 unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2559 assert(
2560 (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2561 "Cannot schedule this instruction's MicroOps in the current cycle.");
2562
2563 unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2564 LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2565
2566 unsigned NextCycle = CurrCycle;
2567 switch (SchedModel->getMicroOpBufferSize()) {
2568 case 0:
2569 assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2570 break;
2571 case 1:
2572 if (ReadyCycle > NextCycle) {
2573 NextCycle = ReadyCycle;
2574 LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2575 }
2576 break;
2577 default:
2578 // We don't currently model the OOO reorder buffer, so consider all
2579 // scheduled MOps to be "retired". We do loosely model in-order resource
2580 // latency. If this instruction uses an in-order resource, account for any
2581 // likely stall cycles.
2582 if (SU->isUnbuffered && ReadyCycle > NextCycle)
2583 NextCycle = ReadyCycle;
2584 break;
2585 }
2586 RetiredMOps += IncMOps;
2587
2588 // Update resource counts and critical resource.
2590 unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2591 assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2592 Rem->RemIssueCount -= DecRemIssue;
2593 if (ZoneCritResIdx) {
2594 // Scale scheduled micro-ops for comparing with the critical resource.
2595 unsigned ScaledMOps =
2596 RetiredMOps * SchedModel->getMicroOpFactor();
2597
2598 // If scaled micro-ops are now more than the previous critical resource by
2599 // a full cycle, then micro-ops issue becomes critical.
2600 if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2601 >= (int)SchedModel->getLatencyFactor()) {
2602 ZoneCritResIdx = 0;
2603 LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2604 << ScaledMOps / SchedModel->getLatencyFactor()
2605 << "c\n");
2606 }
2607 }
2610 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2611 unsigned RCycle =
2612 countResource(SC, PI->ProcResourceIdx, PI->Cycles, NextCycle);
2613 if (RCycle > NextCycle)
2614 NextCycle = RCycle;
2615 }
2616 if (SU->hasReservedResource) {
2617 // For reserved resources, record the highest cycle using the resource.
2618 // For top-down scheduling, this is the cycle in which we schedule this
2619 // instruction plus the number of cycles the operations reserves the
2620 // resource. For bottom-up is it simply the instruction's cycle.
2623 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2624 unsigned PIdx = PI->ProcResourceIdx;
2625 if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2626 unsigned ReservedUntil, InstanceIdx;
2627 std::tie(ReservedUntil, InstanceIdx) =
2628 getNextResourceCycle(SC, PIdx, 0);
2629 if (isTop()) {
2630 ReservedCycles[InstanceIdx] =
2631 std::max(ReservedUntil, NextCycle + PI->Cycles);
2632 } else
2633 ReservedCycles[InstanceIdx] = NextCycle;
2634 }
2635 }
2636 }
2637 }
2638 // Update ExpectedLatency and DependentLatency.
2639 unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2640 unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2641 if (SU->getDepth() > TopLatency) {
2642 TopLatency = SU->getDepth();
2643 LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2644 << SU->NodeNum << ") " << TopLatency << "c\n");
2645 }
2646 if (SU->getHeight() > BotLatency) {
2647 BotLatency = SU->getHeight();
2648 LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2649 << SU->NodeNum << ") " << BotLatency << "c\n");
2650 }
2651 // If we stall for any reason, bump the cycle.
2652 if (NextCycle > CurrCycle)
2653 bumpCycle(NextCycle);
2654 else
2655 // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2656 // resource limited. If a stall occurred, bumpCycle does this.
2657 IsResourceLimited =
2659 getScheduledLatency(), true);
2660
2661 // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2662 // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2663 // one cycle. Since we commonly reach the max MOps here, opportunistically
2664 // bump the cycle to avoid uselessly checking everything in the readyQ.
2665 CurrMOps += IncMOps;
2666
2667 // Bump the cycle count for issue group constraints.
2668 // This must be done after NextCycle has been adjust for all other stalls.
2669 // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2670 // currCycle to X.
2671 if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2672 (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2673 LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2674 << " group\n");
2675 bumpCycle(++NextCycle);
2676 }
2677
2678 while (CurrMOps >= SchedModel->getIssueWidth()) {
2679 LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2680 << CurrCycle << '\n');
2681 bumpCycle(++NextCycle);
2682 }
2684}
2685
2686/// Release pending ready nodes in to the available queue. This makes them
2687/// visible to heuristics.
2689 // If the available queue is empty, it is safe to reset MinReadyCycle.
2690 if (Available.empty())
2691 MinReadyCycle = std::numeric_limits<unsigned>::max();
2692
2693 // Check to see if any of the pending instructions are ready to issue. If
2694 // so, add them to the available queue.
2695 for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
2696 SUnit *SU = *(Pending.begin() + I);
2697 unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2698
2699 if (ReadyCycle < MinReadyCycle)
2700 MinReadyCycle = ReadyCycle;
2701
2702 if (Available.size() >= ReadyListLimit)
2703 break;
2704
2705 releaseNode(SU, ReadyCycle, true, I);
2706 if (E != Pending.size()) {
2707 --I;
2708 --E;
2709 }
2710 }
2711 CheckPending = false;
2712}
2713
2714/// Remove SU from the ready set for this boundary.
2716 if (Available.isInQueue(SU))
2718 else {
2719 assert(Pending.isInQueue(SU) && "bad ready count");
2721 }
2722}
2723
2724/// If this queue only has one ready candidate, return it. As a side effect,
2725/// defer any nodes that now hit a hazard, and advance the cycle until at least
2726/// one node is ready. If multiple instructions are ready, return NULL.
2728 if (CheckPending)
2730
2731 // Defer any ready instrs that now have a hazard.
2733 if (checkHazard(*I)) {
2734 Pending.push(*I);
2735 I = Available.remove(I);
2736 continue;
2737 }
2738 ++I;
2739 }
2740 for (unsigned i = 0; Available.empty(); ++i) {
2741// FIXME: Re-enable assert once PR20057 is resolved.
2742// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2743// "permanent hazard");
2744 (void)i;
2745 bumpCycle(CurrCycle + 1);
2747 }
2748
2751
2752 if (Available.size() == 1)
2753 return *Available.begin();
2754 return nullptr;
2755}
2756
2757#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2758
2759/// Dump the content of the \ref ReservedCycles vector for the
2760/// resources that are used in the basic block.
2761///
2764 return;
2765
2766 unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2767 unsigned StartIdx = 0;
2768
2769 for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) {
2770 const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
2771 std::string ResName = SchedModel->getResourceName(ResIdx);
2772 for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
2773 dbgs() << ResName << "(" << UnitIdx
2774 << ") = " << ReservedCycles[StartIdx + UnitIdx] << "\n";
2775 }
2776 StartIdx += NumUnits;
2777 }
2778}
2779
2780// This is useful information to dump after bumpNode.
2781// Note that the Queue contents are more useful before pickNodeFromQueue.
2783 unsigned ResFactor;
2784 unsigned ResCount;
2785 if (ZoneCritResIdx) {
2786 ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2787 ResCount = getResourceCount(ZoneCritResIdx);
2788 } else {
2789 ResFactor = SchedModel->getMicroOpFactor();
2790 ResCount = RetiredMOps * ResFactor;
2791 }
2792 unsigned LFactor = SchedModel->getLatencyFactor();
2793 dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2794 << " Retired: " << RetiredMOps;
2795 dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2796 dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2797 << ResCount / ResFactor << " "
2798 << SchedModel->getResourceName(ZoneCritResIdx)
2799 << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2800 << (IsResourceLimited ? " - Resource" : " - Latency")
2801 << " limited.\n";
2804}
2805#endif
2806
2807//===----------------------------------------------------------------------===//
2808// GenericScheduler - Generic implementation of MachineSchedStrategy.
2809//===----------------------------------------------------------------------===//
2810
2813 const TargetSchedModel *SchedModel) {
2815 return;
2816
2817 const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2820 PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2821 if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2822 ResDelta.CritResources += PI->Cycles;
2823 if (PI->ProcResourceIdx == Policy.DemandResIdx)
2824 ResDelta.DemandedResources += PI->Cycles;
2825 }
2826}
2827
2828/// Compute remaining latency. We need this both to determine whether the
2829/// overall schedule has become latency-limited and whether the instructions
2830/// outside this zone are resource or latency limited.
2831///
2832/// The "dependent" latency is updated incrementally during scheduling as the
2833/// max height/depth of scheduled nodes minus the cycles since it was
2834/// scheduled:
2835/// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2836///
2837/// The "independent" latency is the max ready queue depth:
2838/// ILat = max N.depth for N in Available|Pending
2839///
2840/// RemainingLatency is the greater of independent and dependent latency.
2841///
2842/// These computations are expensive, especially in DAGs with many edges, so
2843/// only do them if necessary.
2844static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2845 unsigned RemLatency = CurrZone.getDependentLatency();
2846 RemLatency = std::max(RemLatency,
2847 CurrZone.findMaxLatency(CurrZone.Available.elements()));
2848 RemLatency = std::max(RemLatency,
2849 CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2850 return RemLatency;
2851}
2852
2853/// Returns true if the current cycle plus remaning latency is greater than
2854/// the critical path in the scheduling region.
2855bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2856 SchedBoundary &CurrZone,
2857 bool ComputeRemLatency,
2858 unsigned &RemLatency) const {
2859 // The current cycle is already greater than the critical path, so we are
2860 // already latency limited and don't need to compute the remaining latency.
2861 if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2862 return true;
2863
2864 // If we haven't scheduled anything yet, then we aren't latency limited.
2865 if (CurrZone.getCurrCycle() == 0)
2866 return false;
2867
2868 if (ComputeRemLatency)
2869 RemLatency = computeRemLatency(CurrZone);
2870
2871 return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2872}
2873
2874/// Set the CandPolicy given a scheduling zone given the current resources and
2875/// latencies inside and outside the zone.
2877 SchedBoundary &CurrZone,
2878 SchedBoundary *OtherZone) {
2879 // Apply preemptive heuristics based on the total latency and resources
2880 // inside and outside this zone. Potential stalls should be considered before
2881 // following this policy.
2882
2883 // Compute the critical resource outside the zone.
2884 unsigned OtherCritIdx = 0;
2885 unsigned OtherCount =
2886 OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2887
2888 bool OtherResLimited = false;
2889 unsigned RemLatency = 0;
2890 bool RemLatencyComputed = false;
2891 if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2892 RemLatency = computeRemLatency(CurrZone);
2893 RemLatencyComputed = true;
2894 OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2895 OtherCount, RemLatency, false);
2896 }
2897
2898 // Schedule aggressively for latency in PostRA mode. We don't check for
2899 // acyclic latency during PostRA, and highly out-of-order processors will
2900 // skip PostRA scheduling.
2901 if (!OtherResLimited &&
2902 (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2903 RemLatency))) {
2904 Policy.ReduceLatency |= true;
2905 LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
2906 << " RemainingLatency " << RemLatency << " + "
2907 << CurrZone.getCurrCycle() << "c > CritPath "
2908 << Rem.CriticalPath << "\n");
2909 }
2910 // If the same resource is limiting inside and outside the zone, do nothing.
2911 if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2912 return;
2913
2914 LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2915 dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2916 << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2917 } if (OtherResLimited) dbgs()
2918 << " RemainingLimit: "
2919 << SchedModel->getResourceName(OtherCritIdx) << "\n";
2920 if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2921 << " Latency limited both directions.\n");
2922
2923 if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2924 Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2925
2926 if (OtherResLimited)
2927 Policy.DemandResIdx = OtherCritIdx;
2928}
2929
2930#ifndef NDEBUG
2933 switch (Reason) {
2934 case NoCand: return "NOCAND ";
2935 case Only1: return "ONLY1 ";
2936 case PhysReg: return "PHYS-REG ";
2937 case RegExcess: return "REG-EXCESS";
2938 case RegCritical: return "REG-CRIT ";
2939 case Stall: return "STALL ";
2940 case Cluster: return "CLUSTER ";
2941 case Weak: return "WEAK ";
2942 case RegMax: return "REG-MAX ";
2943 case ResourceReduce: return "RES-REDUCE";
2944 case ResourceDemand: return "RES-DEMAND";
2945 case TopDepthReduce: return "TOP-DEPTH ";
2946 case TopPathReduce: return "TOP-PATH ";
2947 case BotHeightReduce:return "BOT-HEIGHT";
2948 case BotPathReduce: return "BOT-PATH ";
2949 case NextDefUse: return "DEF-USE ";
2950 case NodeOrder: return "ORDER ";
2951 };
2952 llvm_unreachable("Unknown reason!");
2953}
2954
2957 unsigned ResIdx = 0;
2958 unsigned Latency = 0;
2959 switch (Cand.Reason) {
2960 default:
2961 break;
2962 case RegExcess:
2963 P = Cand.RPDelta.Excess;
2964 break;
2965 case RegCritical:
2966 P = Cand.RPDelta.CriticalMax;
2967 break;
2968 case RegMax:
2969 P = Cand.RPDelta.CurrentMax;
2970 break;
2971 case ResourceReduce:
2972 ResIdx = Cand.Policy.ReduceResIdx;
2973 break;
2974 case ResourceDemand:
2975 ResIdx = Cand.Policy.DemandResIdx;
2976 break;
2977 case TopDepthReduce:
2978 Latency = Cand.SU->getDepth();
2979 break;
2980 case TopPathReduce:
2981 Latency = Cand.SU->getHeight();
2982 break;
2983 case BotHeightReduce:
2984 Latency = Cand.SU->getHeight();
2985 break;
2986 case BotPathReduce:
2987 Latency = Cand.SU->getDepth();
2988 break;
2989 }
2990 dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2991 if (P.isValid())
2992 dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2993 << ":" << P.getUnitInc() << " ";
2994 else
2995 dbgs() << " ";
2996 if (ResIdx)
2997 dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2998 else
2999 dbgs() << " ";
3000 if (Latency)
3001 dbgs() << " " << Latency << " cycles ";
3002 else
3003 dbgs() << " ";
3004 dbgs() << '\n';
3005}
3006#endif
3007
3008namespace llvm {
3009/// Return true if this heuristic determines order.
3010/// TODO: Consider refactor return type of these functions as integer or enum,
3011/// as we may need to differentiate whether TryCand is better than Cand.
3012bool tryLess(int TryVal, int CandVal,
3016 if (TryVal < CandVal) {
3017 TryCand.Reason = Reason;
3018 return true;
3019 }
3020 if (TryVal > CandVal) {
3021 if (Cand.Reason > Reason)
3022 Cand.Reason = Reason;
3023 return true;
3024 }
3025 return false;
3026}
3027
3028bool tryGreater(int TryVal, int CandVal,
3032 if (TryVal > CandVal) {
3033 TryCand.Reason = Reason;
3034 return true;
3035 }
3036 if (TryVal < CandVal) {
3037 if (Cand.Reason > Reason)
3038 Cand.Reason = Reason;
3039 return true;
3040 }
3041 return false;
3042}
3043
3046 SchedBoundary &Zone) {
3047 if (Zone.isTop()) {
3048 // Prefer the candidate with the lesser depth, but only if one of them has
3049 // depth greater than the total latency scheduled so far, otherwise either
3050 // of them could be scheduled now with no stall.
3051 if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
3052 Zone.getScheduledLatency()) {
3053 if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3055 return true;
3056 }
3057 if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3059 return true;
3060 } else {
3061 // Prefer the candidate with the lesser height, but only if one of them has
3062 // height greater than the total latency scheduled so far, otherwise either
3063 // of them could be scheduled now with no stall.
3064 if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
3065 Zone.getScheduledLatency()) {
3066 if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
3068 return true;
3069 }
3070 if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
3072 return true;
3073 }
3074 return false;
3075}
3076} // end namespace llvm
3077
3078static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
3079 LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
3080 << GenericSchedulerBase::getReasonStr(Reason) << '\n');
3081}
3082
3084 tracePick(Cand.Reason, Cand.AtTop);
3085}
3086
3088 assert(dag->hasVRegLiveness() &&
3089 "(PreRA)GenericScheduler needs vreg liveness");
3090 DAG = static_cast<ScheduleDAGMILive*>(dag);
3091 SchedModel = DAG->getSchedModel();
3092 TRI = DAG->TRI;
3093
3094 if (RegionPolicy.ComputeDFSResult)
3095 DAG->computeDFSResult();
3096
3097 Rem.init(DAG, SchedModel);
3098 Top.init(DAG, SchedModel, &Rem);
3099 Bot.init(DAG, SchedModel, &Rem);
3100
3101 // Initialize resource counts.
3102
3103 // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
3104 // are disabled, then these HazardRecs will be disabled.
3106 if (!Top.HazardRec) {
3107 Top.HazardRec =
3109 Itin, DAG);
3110 }
3111 if (!Bot.HazardRec) {
3112 Bot.HazardRec =
3114 Itin, DAG);
3115 }
3116 TopCand.SU = nullptr;
3117 BotCand.SU = nullptr;
3118}
3119
3120/// Initialize the per-region scheduling policy.
3123 unsigned NumRegionInstrs) {
3124 const MachineFunction &MF = *Begin->getMF();
3125 const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
3126
3127 // Avoid setting up the register pressure tracker for small regions to save
3128 // compile time. As a rough heuristic, only track pressure when the number of
3129 // schedulable instructions exceeds half the integer register file.
3130 RegionPolicy.ShouldTrackPressure = true;
3131 for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
3133 if (TLI->isTypeLegal(LegalIntVT)) {
3134 unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
3135 TLI->getRegClassFor(LegalIntVT));
3136 RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
3137 }
3138 }
3139
3140 // For generic targets, we default to bottom-up, because it's simpler and more
3141 // compile-time optimizations have been implemented in that direction.
3142 RegionPolicy.OnlyBottomUp = true;
3143
3144 // Allow the subtarget to override default policy.
3145 MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
3146
3147 // After subtarget overrides, apply command line options.
3148 if (!EnableRegPressure) {
3149 RegionPolicy.ShouldTrackPressure = false;
3150 RegionPolicy.ShouldTrackLaneMasks = false;
3151 }
3152
3153 // Check -misched-topdown/bottomup can force or unforce scheduling direction.
3154 // e.g. -misched-bottomup=false allows scheduling in both directions.
3156 "-misched-topdown incompatible with -misched-bottomup");
3157 if (ForceBottomUp.getNumOccurrences() > 0) {
3158 RegionPolicy.OnlyBottomUp = ForceBottomUp;
3159 if (RegionPolicy.OnlyBottomUp)
3160 RegionPolicy.OnlyTopDown = false;
3161 }
3162 if (ForceTopDown.getNumOccurrences() > 0) {
3163 RegionPolicy.OnlyTopDown = ForceTopDown;
3164 if (RegionPolicy.OnlyTopDown)
3165 RegionPolicy.OnlyBottomUp = false;
3166 }
3167}
3168
3170 // Cannot completely remove virtual function even in release mode.
3171#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3172 dbgs() << "GenericScheduler RegionPolicy: "
3173 << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
3174 << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
3175 << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
3176 << "\n";
3177#endif
3178}
3179
3180/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
3181/// critical path by more cycles than it takes to drain the instruction buffer.
3182/// We estimate an upper bounds on in-flight instructions as:
3183///
3184/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
3185/// InFlightIterations = AcyclicPath / CyclesPerIteration
3186/// InFlightResources = InFlightIterations * LoopResources
3187///
3188/// TODO: Check execution resources in addition to IssueCount.
3191 return;
3192
3193 // Scaled number of cycles per loop iteration.
3194 unsigned IterCount =
3197 // Scaled acyclic critical path.
3198 unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3199 // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
3200 unsigned InFlightCount =
3201 (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3202 unsigned BufferLimit =
3204
3205 Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3206
3207 LLVM_DEBUG(
3208 dbgs() << "IssueCycles="
3210 << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3211 << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3212 << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3213 << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3214 if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
3215}
3216
3219
3220 // Some roots may not feed into ExitSU. Check all of them in case.
3221 for (const SUnit *SU : Bot.Available) {
3222 if (SU->getDepth() > Rem.CriticalPath)
3223 Rem.CriticalPath = SU->getDepth();
3224 }
3225 LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3227 errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3228 }
3229
3231 Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3232 checkAcyclicLatency();
3233 }
3234}
3235
3236namespace llvm {
3238 const PressureChange &CandP,
3242 const TargetRegisterInfo *TRI,
3243 const MachineFunction &MF) {
3244 // If one candidate decreases and the other increases, go with it.
3245 // Invalid candidates have UnitInc==0.
3246 if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
3247 Reason)) {
3248 return true;
3249 }
3250 // Do not compare the magnitude of pressure changes between top and bottom
3251 // boundary.
3252 if (Cand.AtTop != TryCand.AtTop)
3253 return false;
3254
3255 // If both candidates affect the same set in the same boundary, go with the
3256 // smallest increase.
3257 unsigned TryPSet = TryP.getPSetOrMax();
3258 unsigned CandPSet = CandP.getPSetOrMax();
3259 if (TryPSet == CandPSet) {
3260 return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
3261 Reason);
3262 }
3263
3264 int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3265 std::numeric_limits<int>::max();
3266
3267 int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3268 std::numeric_limits<int>::max();
3269
3270 // If the candidates are decreasing pressure, reverse priority.
3271 if (TryP.getUnitInc() < 0)
3272 std::swap(TryRank, CandRank);
3273 return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3274}
3275
3276unsigned getWeakLeft(const SUnit *SU, bool isTop) {
3277 return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3278}
3279
3280/// Minimize physical register live ranges. Regalloc wants them adjacent to
3281/// their physreg def/use.
3282///
3283/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3284/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3285/// with the operation that produces or consumes the physreg. We'll do this when
3286/// regalloc has support for parallel copies.
3287int biasPhysReg(const SUnit *SU, bool isTop) {
3288 const MachineInstr *MI = SU->getInstr();
3289
3290 if (MI->isCopy()) {
3291 unsigned ScheduledOper = isTop ? 1 : 0;
3292 unsigned UnscheduledOper = isTop ? 0 : 1;
3293 // If we have already scheduled the physreg produce/consumer, immediately
3294 // schedule the copy.
3295 if (MI->getOperand(ScheduledOper).getReg().isPhysical())
3296 return 1;
3297 // If the physreg is at the boundary, defer it. Otherwise schedule it
3298 // immediately to free the dependent. We can hoist the copy later.
3299 bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3300 if (MI->getOperand(UnscheduledOper).getReg().isPhysical())
3301 return AtBoundary ? -1 : 1;
3302 }
3303
3304 if (MI->isMoveImmediate()) {
3305 // If we have a move immediate and all successors have been assigned, bias
3306 // towards scheduling this later. Make sure all register defs are to
3307 // physical registers.
3308 bool DoBias = true;
3309 for (const MachineOperand &Op : MI->defs()) {
3310 if (Op.isReg() && !Op.getReg().isPhysical()) {
3311 DoBias = false;
3312 break;
3313 }
3314 }
3315
3316 if (DoBias)
3317 return isTop ? -1 : 1;
3318 }
3319
3320 return 0;
3321}
3322} // end namespace llvm
3323
3325 bool AtTop,
3326 const RegPressureTracker &RPTracker,
3327 RegPressureTracker &TempTracker) {
3328 Cand.SU = SU;
3329 Cand.AtTop = AtTop;
3330 if (DAG->isTrackingPressure()) {
3331 if (AtTop) {
3332 TempTracker.getMaxDownwardPressureDelta(
3333 Cand.SU->getInstr(),
3334 Cand.RPDelta,
3335 DAG->getRegionCriticalPSets(),
3336 DAG->getRegPressure().MaxSetPressure);
3337 } else {
3338 if (VerifyScheduling) {
3339 TempTracker.getMaxUpwardPressureDelta(
3340 Cand.SU->getInstr(),
3341 &DAG->getPressureDiff(Cand.SU),
3342 Cand.RPDelta,
3343 DAG->getRegionCriticalPSets(),
3344 DAG->getRegPressure().MaxSetPressure);
3345 } else {
3346 RPTracker.getUpwardPressureDelta(
3347 Cand.SU->getInstr(),
3348 DAG->getPressureDiff(Cand.SU),
3349 Cand.RPDelta,
3350 DAG->getRegionCriticalPSets(),
3351 DAG->getRegPressure().MaxSetPressure);
3352 }
3353 }
3354 }
3355 LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3356 << " Try SU(" << Cand.SU->NodeNum << ") "
3358 << Cand.RPDelta.Excess.getUnitInc() << "\n");
3359}
3360
3361/// Apply a set of heuristics to a new candidate. Heuristics are currently
3362/// hierarchical. This may be more efficient than a graduated cost model because
3363/// we don't need to evaluate all aspects of the model for each node in the
3364/// queue. But it's really done to make the heuristics easier to debug and
3365/// statistically analyze.
3366///
3367/// \param Cand provides the policy and current best candidate.
3368/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3369/// \param Zone describes the scheduled zone that we are extending, or nullptr
3370/// if Cand is from a different zone than TryCand.
3371/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3373 SchedCandidate &TryCand,
3374 SchedBoundary *Zone) const {
3375 // Initialize the candidate if needed.
3376 if (!Cand.isValid()) {
3377 TryCand.Reason = NodeOrder;
3378 return true;
3379 }
3380
3381 // Bias PhysReg Defs and copies to their uses and defined respectively.
3382 if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3383 biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3384 return TryCand.Reason != NoCand;
3385
3386 // Avoid exceeding the target's limit.
3387 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3388 Cand.RPDelta.Excess,
3389 TryCand, Cand, RegExcess, TRI,
3390 DAG->MF))
3391 return TryCand.Reason != NoCand;
3392
3393 // Avoid increasing the max critical pressure in the scheduled region.
3394 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3395 Cand.RPDelta.CriticalMax,
3396 TryCand, Cand, RegCritical, TRI,
3397 DAG->MF))
3398 return TryCand.Reason != NoCand;
3399
3400 // We only compare a subset of features when comparing nodes between
3401 // Top and Bottom boundary. Some properties are simply incomparable, in many
3402 // other instances we should only override the other boundary if something
3403 // is a clear good pick on one boundary. Skip heuristics that are more
3404 // "tie-breaking" in nature.
3405 bool SameBoundary = Zone != nullptr;
3406 if (SameBoundary) {
3407 // For loops that are acyclic path limited, aggressively schedule for
3408 // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3409 // heuristics to take precedence.
3410 if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3411 tryLatency(TryCand, Cand, *Zone))
3412 return TryCand.Reason != NoCand;
3413
3414 // Prioritize instructions that read unbuffered resources by stall cycles.
3415 if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3416 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3417 return TryCand.Reason != NoCand;
3418 }
3419
3420 // Keep clustered nodes together to encourage downstream peephole
3421 // optimizations which may reduce resource requirements.
3422 //
3423 // This is a best effort to set things up for a post-RA pass. Optimizations
3424 // like generating loads of multiple registers should ideally be done within
3425 // the scheduler pass by combining the loads during DAG postprocessing.
3426 const SUnit *CandNextClusterSU =
3427 Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3428 const SUnit *TryCandNextClusterSU =
3429 TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3430 if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3431 Cand.SU == CandNextClusterSU,
3432 TryCand, Cand, Cluster))
3433 return TryCand.Reason != NoCand;
3434
3435 if (SameBoundary) {
3436 // Weak edges are for clustering and other constraints.
3437 if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3438 getWeakLeft(Cand.SU, Cand.AtTop),
3439 TryCand, Cand, Weak))
3440 return TryCand.Reason != NoCand;
3441 }
3442
3443 // Avoid increasing the max pressure of the entire region.
3444 if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3445 Cand.RPDelta.CurrentMax,
3446 TryCand, Cand, RegMax, TRI,
3447 DAG->MF))
3448 return TryCand.Reason != NoCand;
3449
3450 if (SameBoundary) {
3451 // Avoid critical resource consumption and balance the schedule.
3452 TryCand.initResourceDelta(DAG, SchedModel);
3454 TryCand, Cand, ResourceReduce))
3455 return TryCand.Reason != NoCand;
3458 TryCand, Cand, ResourceDemand))
3459 return TryCand.Reason != NoCand;
3460
3461 // Avoid serializing long latency dependence chains.
3462 // For acyclic path limited loops, latency was already checked above.
3463 if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3464 !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3465 return TryCand.Reason != NoCand;
3466
3467 // Fall through to original instruction order.
3468 if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3469 || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3470 TryCand.Reason = NodeOrder;
3471 return true;
3472 }
3473 }
3474
3475 return false;
3476}
3477
3478/// Pick the best candidate from the queue.
3479///
3480/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3481/// DAG building. To adjust for the current scheduling location we need to
3482/// maintain the number of vreg uses remaining to be top-scheduled.
3484 const CandPolicy &ZonePolicy,
3485 const RegPressureTracker &RPTracker,
3486 SchedCandidate &Cand) {
3487 // getMaxPressureDelta temporarily modifies the tracker.
3488 RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3489
3490 ReadyQueue &Q = Zone.Available;
3491 for (SUnit *SU : Q) {
3492
3493 SchedCandidate TryCand(ZonePolicy);
3494 initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3495 // Pass SchedBoundary only when comparing nodes from the same boundary.
3496 SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3497 if (tryCandidate(Cand, TryCand, ZoneArg)) {
3498 // Initialize resource delta if needed in case future heuristics query it.
3499 if (TryCand.ResDelta == SchedResourceDelta())
3500 TryCand.initResourceDelta(DAG, SchedModel);
3501 Cand.setBest(TryCand);
3503 }
3504 }
3505}
3506
3507/// Pick the best candidate node from either the top or bottom queue.
3509 // Schedule as far as possible in the direction of no choice. This is most
3510 // efficient, but also provides the best heuristics for CriticalPSets.
3511 if (SUnit *SU = Bot.pickOnlyChoice()) {
3512 IsTopNode = false;
3513 tracePick(Only1, false);
3514 return SU;
3515 }
3516 if (SUnit *SU = Top.pickOnlyChoice()) {
3517 IsTopNode = true;
3518 tracePick(Only1, true);
3519 return SU;
3520 }
3521 // Set the bottom-up policy based on the state of the current bottom zone and
3522 // the instructions outside the zone, including the top zone.
3523 CandPolicy BotPolicy;
3524 setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3525 // Set the top-down policy based on the state of the current top zone and
3526 // the instructions outside the zone, including the bottom zone.
3527 CandPolicy TopPolicy;
3528 setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3529
3530 // See if BotCand is still valid (because we previously scheduled from Top).
3531 LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3532 if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3533 BotCand.Policy != BotPolicy) {
3534 BotCand.reset(CandPolicy());
3535 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3536 assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3537 } else {
3538 LLVM_DEBUG(traceCandidate(BotCand));
3539#ifndef NDEBUG
3540 if (VerifyScheduling) {
3541 SchedCandidate TCand;
3542 TCand.reset(CandPolicy());
3543 pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3544 assert(TCand.SU == BotCand.SU &&
3545 "Last pick result should correspond to re-picking right now");
3546 }
3547#endif
3548 }
3549
3550 // Check if the top Q has a better candidate.
3551 LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3552 if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3553 TopCand.Policy != TopPolicy) {
3554 TopCand.reset(CandPolicy());
3555 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3556 assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3557 } else {
3558 LLVM_DEBUG(traceCandidate(TopCand));
3559#ifndef NDEBUG
3560 if (VerifyScheduling) {
3561 SchedCandidate TCand;
3562 TCand.reset(CandPolicy());
3563 pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3564 assert(TCand.SU == TopCand.SU &&
3565 "Last pick result should correspond to re-picking right now");
3566 }
3567#endif
3568 }
3569
3570 // Pick best from BotCand and TopCand.
3571 assert(BotCand.isValid());
3572 assert(TopCand.isValid());
3573 SchedCandidate Cand = BotCand;
3574 TopCand.Reason = NoCand;
3575 if (tryCandidate(Cand, TopCand, nullptr)) {
3576 Cand.setBest(TopCand);
3578 }
3579
3580 IsTopNode = Cand.AtTop;
3581 tracePick(Cand);
3582 return Cand.SU;
3583}
3584
3585/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3587 if (DAG->top() == DAG->bottom()) {
3588 assert(Top.Available.empty() && Top.Pending.empty() &&
3589 Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3590 return nullptr;
3591 }
3592 SUnit *SU;
3593 do {
3594 if (RegionPolicy.OnlyTopDown) {
3595 SU = Top.pickOnlyChoice();
3596 if (!SU) {
3597 CandPolicy NoPolicy;
3598 TopCand.reset(NoPolicy);
3599 pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3600 assert(TopCand.Reason != NoCand && "failed to find a candidate");
3601 tracePick(TopCand);
3602 SU = TopCand.SU;
3603 }
3604 IsTopNode = true;
3605 } else if (RegionPolicy.OnlyBottomUp) {
3606 SU = Bot.pickOnlyChoice();
3607 if (!SU) {
3608 CandPolicy NoPolicy;
3609 BotCand.reset(NoPolicy);
3610 pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3611 assert(BotCand.Reason != NoCand && "failed to find a candidate");
3612 tracePick(BotCand);
3613 SU = BotCand.SU;
3614 }
3615 IsTopNode = false;
3616 } else {
3617 SU = pickNodeBidirectional(IsTopNode);
3618 }
3619 } while (SU->isScheduled);
3620
3621 if (SU->isTopReady())
3622 Top.removeReady(SU);
3623 if (SU->isBottomReady())
3624 Bot.removeReady(SU);
3625
3626 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3627 << *SU->getInstr());
3628 return SU;
3629}
3630
3632 MachineBasicBlock::iterator InsertPos = SU->getInstr();
3633 if (!isTop)
3634 ++InsertPos;
3635 SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3636
3637 // Find already scheduled copies with a single physreg dependence and move
3638 // them just above the scheduled instruction.
3639 for (SDep &Dep : Deps) {
3640 if (Dep.getKind() != SDep::Data ||
3641 !Register::isPhysicalRegister(Dep.getReg()))
3642 continue;
3643 SUnit *DepSU = Dep.getSUnit();
3644 if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3645 continue;
3646 MachineInstr *Copy = DepSU->getInstr();
3647 if (!Copy->isCopy() && !Copy->isMoveImmediate())
3648 continue;
3649 LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3650 DAG->dumpNode(*Dep.getSUnit()));
3651 DAG->moveInstruction(Copy, InsertPos);
3652 }
3653}
3654
3655/// Update the scheduler's state after scheduling a node. This is the same node
3656/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3657/// update it's state based on the current cycle before MachineSchedStrategy
3658/// does.
3659///
3660/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3661/// them here. See comments in biasPhysReg.
3662void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3663 if (IsTopNode) {
3664 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3665 Top.bumpNode(SU);
3666 if (SU->hasPhysRegUses)
3667 reschedulePhysReg(SU, true);
3668 } else {
3669 SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3670 Bot.bumpNode(SU);
3671 if (SU->hasPhysRegDefs)
3672 reschedulePhysReg(SU, false);
3673 }
3674}
3675
3676/// Create the standard converging machine scheduler. This will be used as the
3677/// default scheduler if the target does not set a default.
3679 ScheduleDAGMILive *DAG =
3680 new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
3681 // Register DAG post-processors.
3682 //
3683 // FIXME: extend the mutation API to allow earlier mutations to instantiate
3684 // data and pass it to later mutations. Have a single mutation that gathers
3685 // the interesting nodes in one pass.
3687 return DAG;
3688}
3689
3691 return createGenericSchedLive(C);
3692}
3693
3695GenericSchedRegistry("converge", "Standard converging scheduler.",
3697
3698//===----------------------------------------------------------------------===//
3699// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3700//===----------------------------------------------------------------------===//
3701
3703 DAG = Dag;
3704 SchedModel = DAG->getSchedModel();
3705 TRI = DAG->TRI;
3706
3707 Rem.init(DAG, SchedModel);
3708 Top.init(DAG, SchedModel, &Rem);
3709 BotRoots.clear();
3710
3711 // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3712 // or are disabled, then these HazardRecs will be disabled.
3714 if (!Top.HazardRec) {
3715 Top.HazardRec =
3717 Itin, DAG);
3718 }
3719}
3720
3723
3724 // Some roots may not feed into ExitSU. Check all of them in case.
3725 for (const SUnit *SU : BotRoots) {
3726 if (SU->getDepth() > Rem.CriticalPath)
3727 Rem.CriticalPath = SU->getDepth();
3728 }
3729 LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3731 errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3732 }
3733}
3734
3735/// Apply a set of heuristics to a new candidate for PostRA scheduling.
3736///
3737/// \param Cand provides the policy and current best candidate.
3738/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3739/// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3741 SchedCandidate &TryCand) {
3742 // Initialize the candidate if needed.
3743 if (!Cand.isValid()) {
3744 TryCand.Reason = NodeOrder;
3745 return true;
3746 }
3747
3748 // Prioritize instructions that read unbuffered resources by stall cycles.
3749 if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3750 Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3751 return TryCand.Reason != NoCand;
3752
3753 // Keep clustered nodes together.
3754 if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3755 Cand.SU == DAG->getNextClusterSucc(),
3756 TryCand, Cand, Cluster))
3757 return TryCand.Reason != NoCand;
3758
3759 // Avoid critical resource consumption and balance the schedule.
3761 TryCand, Cand, ResourceReduce))
3762 return TryCand.Reason != NoCand;
3765 TryCand, Cand, ResourceDemand))
3766 return TryCand.Reason != NoCand;
3767
3768 // Avoid serializing long latency dependence chains.
3769 if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3770 return TryCand.Reason != NoCand;
3771 }
3772
3773 // Fall through to original instruction order.
3774 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
3775 TryCand.Reason = NodeOrder;
3776 return true;
3777 }
3778
3779 return false;
3780}
3781
3783 ReadyQueue &Q = Top.Available;
3784 for (SUnit *SU : Q) {
3785 SchedCandidate TryCand(Cand.Policy);
3786 TryCand.SU = SU;
3787 TryCand.AtTop = true;
3788 TryCand.initResourceDelta(DAG, SchedModel);
3789 if (tryCandidate(Cand, TryCand)) {
3790 Cand.setBest(TryCand);
3792 }
3793 }
3794}
3795
3796/// Pick the next node to schedule.
3798 if (DAG->top() == DAG->bottom()) {
3799 assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3800 return nullptr;
3801 }
3802 SUnit *SU;
3803 do {
3804 SU = Top.pickOnlyChoice();
3805 if (SU) {
3806 tracePick(Only1, true);
3807 } else {
3808 CandPolicy NoPolicy;
3809 SchedCandidate TopCand(NoPolicy);
3810 // Set the top-down policy based on the state of the current top zone and
3811 // the instructions outside the zone, including the bottom zone.
3812 setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3813 pickNodeFromQueue(TopCand);
3814 assert(TopCand.Reason != NoCand && "failed to find a candidate");
3815 tracePick(TopCand);
3816 SU = TopCand.SU;
3817 }
3818 } while (SU->isScheduled);
3819
3820 IsTopNode = true;
3821 Top.removeReady(SU);
3822
3823 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3824 << *SU->getInstr());
3825 return SU;
3826}
3827
3828/// Called after ScheduleDAGMI has scheduled an instruction and updated
3829/// scheduled/remaining flags in the DAG nodes.
3830void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3831 SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3832 Top.bumpNode(SU);
3833}
3834
3836 return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
3837 /*RemoveKillFlags=*/true);
3838}
3839
3840//===----------------------------------------------------------------------===//
3841// ILP Scheduler. Currently for experimental analysis of heuristics.
3842//===----------------------------------------------------------------------===//
3843
3844namespace {
3845
3846/// Order nodes by the ILP metric.
3847struct ILPOrder {
3848 const SchedDFSResult *DFSResult = nullptr;
3849 const BitVector *ScheduledTrees = nullptr;
3850 bool MaximizeILP;
3851
3852 ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3853
3854 /// Apply a less-than relation on node priority.
3855 ///
3856 /// (Return true if A comes after B in the Q.)
3857 bool operator()(const SUnit *A, const SUnit *B) const {
3858 unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3859 unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3860 if (SchedTreeA != SchedTreeB) {
3861 // Unscheduled trees have lower priority.
3862 if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3863 return ScheduledTrees->test(SchedTreeB);
3864
3865 // Trees with shallower connections have have lower priority.
3866 if (DFSResult->getSubtreeLevel(SchedTreeA)
3867 != DFSResult->getSubtreeLevel(SchedTreeB)) {
3868 return DFSResult->getSubtreeLevel(SchedTreeA)
3869 < DFSResult->getSubtreeLevel(SchedTreeB);
3870 }
3871 }
3872 if (MaximizeILP)
3873 return DFSResult->getILP(A) < DFSResult->getILP(B);
3874 else
3875 return DFSResult->getILP(A) > DFSResult->getILP(B);
3876 }
3877};
3878
3879/// Schedule based on the ILP metric.
3880class ILPScheduler : public MachineSchedStrategy {
3881 ScheduleDAGMILive *DAG = nullptr;
3882 ILPOrder Cmp;
3883
3884 std::vector<SUnit*> ReadyQ;
3885
3886public:
3887 ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3888
3889 void initialize(ScheduleDAGMI *dag) override {
3890 assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3891 DAG = static_cast<ScheduleDAGMILive*>(dag);
3892 DAG->computeDFSResult();
3893 Cmp.DFSResult = DAG->getDFSResult();
3894 Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3895 ReadyQ.clear();
3896 }
3897
3898 void registerRoots() override {
3899 // Restore the heap in ReadyQ with the updated DFS results.
3900 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3901 }
3902
3903 /// Implement MachineSchedStrategy interface.
3904 /// -----------------------------------------
3905
3906 /// Callback to select the highest priority node from the ready Q.
3907 SUnit *pickNode(bool &IsTopNode) override {
3908 if (ReadyQ.empty()) return nullptr;
3909 std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3910 SUnit *SU = ReadyQ.back();
3911 ReadyQ.pop_back();
3912 IsTopNode = false;
3913 LLVM_DEBUG(dbgs() << "Pick node "
3914 << "SU(" << SU->NodeNum << ") "
3915 << " ILP: " << DAG->getDFSResult()->getILP(SU)
3916 << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3917 << " @"
3918 << DAG->getDFSResult()->getSubtreeLevel(
3919 DAG->getDFSResult()->getSubtreeID(SU))
3920 << '\n'
3921 << "Scheduling " << *SU->getInstr());
3922 return SU;
3923 }
3924
3925 /// Scheduler callback to notify that a new subtree is scheduled.
3926 void scheduleTree(unsigned SubtreeID) override {
3927 std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3928 }
3929
3930 /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3931 /// DFSResults, and resort the priority Q.
3932 void schedNode(SUnit *SU, bool IsTopNode) override {
3933 assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3934 }
3935
3936 void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3937
3938 void releaseBottomNode(SUnit *SU) override {
3939 ReadyQ.push_back(SU);
3940 std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3941 }
3942};
3943
3944} // end anonymous namespace
3945
3947 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
3948}
3950 return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
3951}
3952
3954 "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3956 "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3957
3958//===----------------------------------------------------------------------===//
3959// Machine Instruction Shuffler for Correctness Testing
3960//===----------------------------------------------------------------------===//
3961
3962#ifndef NDEBUG
3963namespace {
3964
3965/// Apply a less-than relation on the node order, which corresponds to the
3966/// instruction order prior to scheduling. IsReverse implements greater-than.
3967template<bool IsReverse>
3968struct SUnitOrder {
3969 bool operator()(SUnit *A, SUnit *B) const {
3970 if (IsReverse)
3971 return A->NodeNum > B->NodeNum;
3972 else
3973 return A->NodeNum < B->NodeNum;
3974 }
3975};
3976
3977/// Reorder instructions as much as possible.
3978class InstructionShuffler : public MachineSchedStrategy {
3979 bool IsAlternating;
3980 bool IsTopDown;
3981
3982 // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3983 // gives nodes with a higher number higher priority causing the latest
3984 // instructions to be scheduled first.
3986 TopQ;
3987
3988 // When scheduling bottom-up, use greater-than as the queue priority.
3990 BottomQ;
3991
3992public:
3993 InstructionShuffler(bool alternate, bool topdown)
3994 : IsAlternating(alternate), IsTopDown(topdown) {}
3995
3996 void initialize(ScheduleDAGMI*) override {
3997 TopQ.clear();
3998 BottomQ.clear();
3999 }
4000
4001 /// Implement MachineSchedStrategy interface.
4002 /// -----------------------------------------
4003
4004 SUnit *pickNode(bool &IsTopNode) override {
4005 SUnit *SU;
4006 if (IsTopDown) {
4007 do {
4008 if (TopQ.empty()) return nullptr;
4009 SU = TopQ.top();
4010 TopQ.pop();
4011 } while (SU->isScheduled);
4012 IsTopNode = true;
4013 } else {
4014 do {
4015 if (BottomQ.empty()) return nullptr;
4016 SU = BottomQ.top();
4017 BottomQ.pop();
4018 } while (SU->isScheduled);
4019 IsTopNode = false;
4020 }
4021 if (IsAlternating)
4022 IsTopDown = !IsTopDown;
4023 return SU;
4024 }
4025
4026 void schedNode(SUnit *SU, bool IsTopNode) override {}
4027
4028 void releaseTopNode(SUnit *SU) override {
4029 TopQ.push(SU);
4030 }
4031 void releaseBottomNode(SUnit *SU) override {
4032 BottomQ.push(SU);
4033 }
4034};
4035
4036} // end anonymous namespace
4037
4039 bool Alternate = !ForceTopDown && !ForceBottomUp;
4040 bool TopDown = !ForceBottomUp;
4041 assert((TopDown || !ForceTopDown) &&
4042 "-misched-topdown incompatible with -misched-bottomup");
4043 return new ScheduleDAGMILive(
4044 C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
4045}
4046
4048 "shuffle", "Shuffle machine instructions alternating directions",
4050#endif // !NDEBUG
4051
4052//===----------------------------------------------------------------------===//
4053// GraphWriter support for ScheduleDAGMILive.
4054//===----------------------------------------------------------------------===//
4055
4056#ifndef NDEBUG
4057namespace llvm {
4058
4059template<> struct GraphTraits<
4061
4062template<>
4065
4066 static std::string getGraphName(const ScheduleDAG *G) {
4067 return std::string(G->MF.getName());
4068 }
4069
4071 return true;
4072 }
4073
4074 static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
4075 if (ViewMISchedCutoff == 0)
4076 return false;
4077 return (Node->Preds.size() > ViewMISchedCutoff
4078 || Node->Succs.size() > ViewMISchedCutoff);
4079 }
4080
4081 /// If you want to override the dot attributes printed for a particular
4082 /// edge, override this method.
4083 static std::string getEdgeAttributes(const SUnit *Node,
4084 SUnitIterator EI,
4085 const ScheduleDAG *Graph) {
4086 if (EI.isArtificialDep())
4087 return "color=cyan,style=dashed";
4088 if (EI.isCtrlDep())
4089 return "color=blue,style=dashed";
4090 return "";
4091 }
4092
4093 static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
4094 std::string Str;
4095 raw_string_ostream SS(Str);
4096 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4097 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4098 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4099 SS << "SU:" << SU->NodeNum;
4100 if (DFS)
4101 SS << " I:" << DFS->getNumInstrs(SU);
4102 return SS.str();
4103 }
4104
4105 static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
4106 return G->getGraphNodeLabel(SU);
4107 }
4108
4109 static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
4110 std::string Str("shape=Mrecord");
4111 const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
4112 const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
4113 static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
4114 if (DFS) {
4115 Str += ",style=filled,fillcolor=\"#";
4116 Str += DOT::getColorString(DFS->getSubtreeID(N));
4117 Str += '"';
4118 }
4119 return Str;
4120 }
4121};
4122
4123} // end namespace llvm
4124#endif // NDEBUG
4125
4126/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
4127/// rendered using 'dot'.
4128void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
4129#ifndef NDEBUG
4130 ViewGraph(this, Name, false, Title);
4131#else
4132 errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
4133 << "systems with Graphviz or gv!\n";
4134#endif // NDEBUG
4135}
4136
4137/// Out-of-line implementation with no arguments is handy for gdb.
4139 viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
4140}
MachineInstrBuilder MachineInstrBuilder & DefMI
MachineBasicBlock & MBB
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
basic Basic Alias true
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
std::string Name
expand large div rem
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A common definition of LaneBitmask for use in TableGen and CodeGen.
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
Machine Instruction Scheduler
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
static const unsigned MinSubtreeSize
static const unsigned InvalidCycle
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConvergingSched)
static cl::opt< unsigned > HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden, cl::desc("Set width of the columns with " "the resources and schedule units"), cl::init(19))
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))
static cl::opt< unsigned > FastClusterThreshold("fast-cluster-threshold", cl::Hidden, cl::desc("The threshold for fast cluster"), cl::init(1000))
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line.
static cl::opt< unsigned > ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden, cl::desc("Set width of the columns showing resource booking."), cl::init(5))
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
static const char * scheduleTableLegend
static ScheduleDAGInstrs * createConvergingSched(MachineSchedContext *C)
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
static cl::opt< bool > MISchedDumpScheduleTrace("misched-dump-schedule-trace", cl::Hidden, cl::init(false), cl::desc("Dump resource usage at schedule boundary."))
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
unsigned const TargetRegisterInfo * TRI
#define P(N)
if(VerifyEach)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file defines the PriorityQueue class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isSimple(Instruction *I)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static const X86InstrFMA3Group Groups[]
Value * RHS
Class recording the (high level) value of a variable.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
Definition: APInt.h:75
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
reverse_iterator rend() const
Definition: ArrayRef.h:155
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
reverse_iterator rbegin() const
Definition: ArrayRef.h:154
bool test(unsigned Idx) const
Definition: BitVector.h:454
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:334
void clear()
clear - Removes all bits from the bitvector.
Definition: BitVector.h:328
BitVector & set()
Definition: BitVector.h:344
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
Register getReg() const
void traceCandidate(const SchedCandidate &Cand)
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
const TargetSchedModel * SchedModel
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
const MachineSchedContext * Context
CandReason
Represent the type of SchedCandidate found within a single queue.
const TargetRegisterInfo * TRI
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
void dumpPolicy() const override
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
void reschedulePhysReg(SUnit *SU, bool isTop)
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler's state after scheduling a node.
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
Itinerary data supplied by a subtarget to be used by a target.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:686
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
LiveInterval & getInterval(Register Reg)
Result of a LiveRange query.
Definition: LiveInterval.h:90
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:541
iterator end()
Definition: LiveInterval.h:216
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx,...
Definition: LiveInterval.h:429
iterator begin()
Definition: LiveInterval.h:215
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:392
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries,...
Definition: LiveInterval.h:518
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().