LLVM  9.0.0svn
MachineScheduler.cpp
Go to the documentation of this file.
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // MachineScheduler schedules machine instructions after phi elimination. It
10 // preserves LiveIntervals so it can be invoked before register allocation.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/PriorityQueue.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/CodeGen/Passes.h"
50 #include "llvm/Config/llvm-config.h"
51 #include "llvm/MC/LaneBitmask.h"
52 #include "llvm/Pass.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstdint>
63 #include <iterator>
64 #include <limits>
65 #include <memory>
66 #include <string>
67 #include <tuple>
68 #include <utility>
69 #include <vector>
70 
71 using namespace llvm;
72 
73 #define DEBUG_TYPE "machine-scheduler"
74 
75 namespace llvm {
76 
77 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
78  cl::desc("Force top-down list scheduling"));
79 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
80  cl::desc("Force bottom-up list scheduling"));
82 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
83  cl::desc("Print critical path length to stdout"));
84 
85 } // end namespace llvm
86 
87 #ifndef NDEBUG
88 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
89  cl::desc("Pop up a window to show MISched dags after they are processed"));
90 
91 /// In some situations a few uninteresting nodes depend on nearly all other
92 /// nodes in the graph, provide a cutoff to hide them.
93 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
94  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
95 
96 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
97  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
98 
99 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
100  cl::desc("Only schedule this function"));
101 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
102  cl::desc("Only schedule this MBB#"));
103 static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
104  cl::desc("Print schedule DAGs"));
105 #else
106 static const bool ViewMISchedDAGs = false;
107 static const bool PrintDAGs = false;
108 #endif // NDEBUG
109 
110 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
111 /// size of the ready lists.
112 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
113  cl::desc("Limit ready list to N instructions"), cl::init(256));
114 
115 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
116  cl::desc("Enable register pressure scheduling."), cl::init(true));
117 
118 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
119  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
120 
121 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
122  cl::desc("Enable memop clustering."),
123  cl::init(true));
124 
125 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
126  cl::desc("Verify machine instrs before and after machine scheduling"));
127 
128 // DAG subtrees must have at least this many nodes.
129 static const unsigned MinSubtreeSize = 8;
130 
131 // Pin the vtables to this file.
132 void MachineSchedStrategy::anchor() {}
133 
134 void ScheduleDAGMutation::anchor() {}
135 
136 //===----------------------------------------------------------------------===//
137 // Machine Instruction Scheduling Pass and Registry
138 //===----------------------------------------------------------------------===//
139 
141  RegClassInfo = new RegisterClassInfo();
142 }
143 
145  delete RegClassInfo;
146 }
147 
148 namespace {
149 
150 /// Base class for a machine scheduler class that can run at any point.
151 class MachineSchedulerBase : public MachineSchedContext,
152  public MachineFunctionPass {
153 public:
154  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
155 
156  void print(raw_ostream &O, const Module* = nullptr) const override;
157 
158 protected:
159  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
160 };
161 
162 /// MachineScheduler runs after coalescing and before register allocation.
163 class MachineScheduler : public MachineSchedulerBase {
164 public:
165  MachineScheduler();
166 
167  void getAnalysisUsage(AnalysisUsage &AU) const override;
168 
169  bool runOnMachineFunction(MachineFunction&) override;
170 
171  static char ID; // Class identification, replacement for typeinfo
172 
173 protected:
174  ScheduleDAGInstrs *createMachineScheduler();
175 };
176 
177 /// PostMachineScheduler runs after shortly before code emission.
178 class PostMachineScheduler : public MachineSchedulerBase {
179 public:
180  PostMachineScheduler();
181 
182  void getAnalysisUsage(AnalysisUsage &AU) const override;
183 
184  bool runOnMachineFunction(MachineFunction&) override;
185 
186  static char ID; // Class identification, replacement for typeinfo
187 
188 protected:
189  ScheduleDAGInstrs *createPostMachineScheduler();
190 };
191 
192 } // end anonymous namespace
193 
194 char MachineScheduler::ID = 0;
195 
197 
198 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
199  "Machine Instruction Scheduler", false, false)
204 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
206 
207 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
209 }
210 
211 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
212  AU.setPreservesCFG();
217  AU.addRequired<SlotIndexes>();
222 }
223 
224 char PostMachineScheduler::ID = 0;
225 
227 
228 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
229  "PostRA Machine Instruction Scheduler", false, false)
230 
231 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
233 }
234 
235 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
236  AU.setPreservesCFG();
241 }
242 
245 
246 /// A dummy default scheduler factory indicates whether the scheduler
247 /// is overridden on the command line.
249  return nullptr;
250 }
251 
252 /// MachineSchedOpt allows command line selection of the scheduler.
255 MachineSchedOpt("misched",
257  cl::desc("Machine instruction scheduler to use"));
258 
260 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
262 
264  "enable-misched",
265  cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
266  cl::Hidden);
267 
269  "enable-post-misched",
270  cl::desc("Enable the post-ra machine instruction scheduling pass."),
271  cl::init(true), cl::Hidden);
272 
273 /// Decrement this iterator until reaching the top or a non-debug instr.
277  assert(I != Beg && "reached the top of the region, cannot decrement");
278  while (--I != Beg) {
279  if (!I->isDebugInstr())
280  break;
281  }
282  return I;
283 }
284 
285 /// Non-const version.
291 }
292 
293 /// If this iterator is a debug value, increment until reaching the End or a
294 /// non-debug instruction.
298  for(; I != End; ++I) {
299  if (!I->isDebugInstr())
300  break;
301  }
302  return I;
303 }
304 
305 /// Non-const version.
311 }
312 
313 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
314 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
315  // Select the scheduler, or set the default.
316  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
317  if (Ctor != useDefaultMachineSched)
318  return Ctor(this);
319 
320  // Get the default scheduler set by the target for this function.
321  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
322  if (Scheduler)
323  return Scheduler;
324 
325  // Default to GenericScheduler.
326  return createGenericSchedLive(this);
327 }
328 
329 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
330 /// the caller. We don't have a command line option to override the postRA
331 /// scheduler. The Target must configure it.
332 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
333  // Get the postRA scheduler set by the target for this function.
334  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
335  if (Scheduler)
336  return Scheduler;
337 
338  // Default to GenericScheduler.
339  return createGenericSchedPostRA(this);
340 }
341 
342 /// Top-level MachineScheduler pass driver.
343 ///
344 /// Visit blocks in function order. Divide each block into scheduling regions
345 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
346 /// consistent with the DAG builder, which traverses the interior of the
347 /// scheduling regions bottom-up.
348 ///
349 /// This design avoids exposing scheduling boundaries to the DAG builder,
350 /// simplifying the DAG builder's support for "special" target instructions.
351 /// At the same time the design allows target schedulers to operate across
352 /// scheduling boundaries, for example to bundle the boundary instructions
353 /// without reordering them. This creates complexity, because the target
354 /// scheduler must update the RegionBegin and RegionEnd positions cached by
355 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
356 /// design would be to split blocks at scheduling boundaries, but LLVM has a
357 /// general bias against block splitting purely for implementation simplicity.
358 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
359  if (skipFunction(mf.getFunction()))
360  return false;
361 
362  if (EnableMachineSched.getNumOccurrences()) {
363  if (!EnableMachineSched)
364  return false;
365  } else if (!mf.getSubtarget().enableMachineScheduler())
366  return false;
367 
368  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
369 
370  // Initialize the context of the pass.
371  MF = &mf;
372  MLI = &getAnalysis<MachineLoopInfo>();
373  MDT = &getAnalysis<MachineDominatorTree>();
374  PassConfig = &getAnalysis<TargetPassConfig>();
375  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
376 
377  LIS = &getAnalysis<LiveIntervals>();
378 
379  if (VerifyScheduling) {
380  LLVM_DEBUG(LIS->dump());
381  MF->verify(this, "Before machine scheduling.");
382  }
383  RegClassInfo->runOnMachineFunction(*MF);
384 
385  // Instantiate the selected scheduler for this target, function, and
386  // optimization level.
387  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
388  scheduleRegions(*Scheduler, false);
389 
390  LLVM_DEBUG(LIS->dump());
391  if (VerifyScheduling)
392  MF->verify(this, "After machine scheduling.");
393  return true;
394 }
395 
396 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
397  if (skipFunction(mf.getFunction()))
398  return false;
399 
400  if (EnablePostRAMachineSched.getNumOccurrences()) {
401  if (!EnablePostRAMachineSched)
402  return false;
403  } else if (!mf.getSubtarget().enablePostRAScheduler()) {
404  LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
405  return false;
406  }
407  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
408 
409  // Initialize the context of the pass.
410  MF = &mf;
411  MLI = &getAnalysis<MachineLoopInfo>();
412  PassConfig = &getAnalysis<TargetPassConfig>();
413 
414  if (VerifyScheduling)
415  MF->verify(this, "Before post machine scheduling.");
416 
417  // Instantiate the selected scheduler for this target, function, and
418  // optimization level.
419  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
420  scheduleRegions(*Scheduler, true);
421 
422  if (VerifyScheduling)
423  MF->verify(this, "After post machine scheduling.");
424  return true;
425 }
426 
427 /// Return true of the given instruction should not be included in a scheduling
428 /// region.
429 ///
430 /// MachineScheduler does not currently support scheduling across calls. To
431 /// handle calls, the DAG builder needs to be modified to create register
432 /// anti/output dependencies on the registers clobbered by the call's regmask
433 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
434 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
435 /// the boundary, but there would be no benefit to postRA scheduling across
436 /// calls this late anyway.
438  MachineBasicBlock *MBB,
439  MachineFunction *MF,
440  const TargetInstrInfo *TII) {
441  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
442 }
443 
444 /// A region of an MBB for scheduling.
445 namespace {
446 struct SchedRegion {
447  /// RegionBegin is the first instruction in the scheduling region, and
448  /// RegionEnd is either MBB->end() or the scheduling boundary after the
449  /// last instruction in the scheduling region. These iterators cannot refer
450  /// to instructions outside of the identified scheduling region because
451  /// those may be reordered before scheduling this region.
452  MachineBasicBlock::iterator RegionBegin;
453  MachineBasicBlock::iterator RegionEnd;
454  unsigned NumRegionInstrs;
455 
457  unsigned N) :
458  RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
459 };
460 } // end anonymous namespace
461 
463 
464 static void
466  MBBRegionsVector &Regions,
467  bool RegionsTopDown) {
468  MachineFunction *MF = MBB->getParent();
469  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
470 
471  MachineBasicBlock::iterator I = nullptr;
472  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
473  RegionEnd != MBB->begin(); RegionEnd = I) {
474 
475  // Avoid decrementing RegionEnd for blocks with no terminator.
476  if (RegionEnd != MBB->end() ||
477  isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
478  --RegionEnd;
479  }
480 
481  // The next region starts above the previous region. Look backward in the
482  // instruction stream until we find the nearest boundary.
483  unsigned NumRegionInstrs = 0;
484  I = RegionEnd;
485  for (;I != MBB->begin(); --I) {
486  MachineInstr &MI = *std::prev(I);
487  if (isSchedBoundary(&MI, &*MBB, MF, TII))
488  break;
489  if (!MI.isDebugInstr()) {
490  // MBB::size() uses instr_iterator to count. Here we need a bundle to
491  // count as a single instruction.
492  ++NumRegionInstrs;
493  }
494  }
495 
496  // It's possible we found a scheduling region that only has debug
497  // instructions. Don't bother scheduling these.
498  if (NumRegionInstrs != 0)
499  Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
500  }
501 
502  if (RegionsTopDown)
503  std::reverse(Regions.begin(), Regions.end());
504 }
505 
506 /// Main driver for both MachineScheduler and PostMachineScheduler.
507 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
508  bool FixKillFlags) {
509  // Visit all machine basic blocks.
510  //
511  // TODO: Visit blocks in global postorder or postorder within the bottom-up
512  // loop tree. Then we can optionally compute global RegPressure.
513  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
514  MBB != MBBEnd; ++MBB) {
515 
516  Scheduler.startBlock(&*MBB);
517 
518 #ifndef NDEBUG
519  if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
520  continue;
521  if (SchedOnlyBlock.getNumOccurrences()
522  && (int)SchedOnlyBlock != MBB->getNumber())
523  continue;
524 #endif
525 
526  // Break the block into scheduling regions [I, RegionEnd). RegionEnd
527  // points to the scheduling boundary at the bottom of the region. The DAG
528  // does not include RegionEnd, but the region does (i.e. the next
529  // RegionEnd is above the previous RegionBegin). If the current block has
530  // no terminator then RegionEnd == MBB->end() for the bottom region.
531  //
532  // All the regions of MBB are first found and stored in MBBRegions, which
533  // will be processed (MBB) top-down if initialized with true.
534  //
535  // The Scheduler may insert instructions during either schedule() or
536  // exitRegion(), even for empty regions. So the local iterators 'I' and
537  // 'RegionEnd' are invalid across these calls. Instructions must not be
538  // added to other regions than the current one without updating MBBRegions.
539 
540  MBBRegionsVector MBBRegions;
541  getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
542  for (MBBRegionsVector::iterator R = MBBRegions.begin();
543  R != MBBRegions.end(); ++R) {
544  MachineBasicBlock::iterator I = R->RegionBegin;
545  MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
546  unsigned NumRegionInstrs = R->NumRegionInstrs;
547 
548  // Notify the scheduler of the region, even if we may skip scheduling
549  // it. Perhaps it still needs to be bundled.
550  Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
551 
552  // Skip empty scheduling regions (0 or 1 schedulable instructions).
553  if (I == RegionEnd || I == std::prev(RegionEnd)) {
554  // Close the current region. Bundle the terminator if needed.
555  // This invalidates 'RegionEnd' and 'I'.
556  Scheduler.exitRegion();
557  continue;
558  }
559  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
560  LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
561  << " " << MBB->getName() << "\n From: " << *I
562  << " To: ";
563  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
564  else dbgs() << "End";
565  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
567  errs() << MF->getName();
568  errs() << ":%bb. " << MBB->getNumber();
569  errs() << " " << MBB->getName() << " \n";
570  }
571 
572  // Schedule a region: possibly reorder instructions.
573  // This invalidates the original region iterators.
574  Scheduler.schedule();
575 
576  // Close the current region.
577  Scheduler.exitRegion();
578  }
579  Scheduler.finishBlock();
580  // FIXME: Ideally, no further passes should rely on kill flags. However,
581  // thumb2 size reduction is currently an exception, so the PostMIScheduler
582  // needs to do this.
583  if (FixKillFlags)
584  Scheduler.fixupKills(*MBB);
585  }
586  Scheduler.finalizeSchedule();
587 }
588 
589 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
590  // unimplemented
591 }
592 
593 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
595  dbgs() << "Queue " << Name << ": ";
596  for (const SUnit *SU : Queue)
597  dbgs() << SU->NodeNum << " ";
598  dbgs() << "\n";
599 }
600 #endif
601 
602 //===----------------------------------------------------------------------===//
603 // ScheduleDAGMI - Basic machine instruction scheduling. This is
604 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
605 // virtual registers.
606 // ===----------------------------------------------------------------------===/
607 
608 // Provide a vtable anchor.
610 
611 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
612 /// NumPredsLeft reaches zero, release the successor node.
613 ///
614 /// FIXME: Adjust SuccSU height based on MinLatency.
615 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
616  SUnit *SuccSU = SuccEdge->getSUnit();
617 
618  if (SuccEdge->isWeak()) {
619  --SuccSU->WeakPredsLeft;
620  if (SuccEdge->isCluster())
621  NextClusterSucc = SuccSU;
622  return;
623  }
624 #ifndef NDEBUG
625  if (SuccSU->NumPredsLeft == 0) {
626  dbgs() << "*** Scheduling failed! ***\n";
627  dumpNode(*SuccSU);
628  dbgs() << " has been released too many times!\n";
629  llvm_unreachable(nullptr);
630  }
631 #endif
632  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
633  // CurrCycle may have advanced since then.
634  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
635  SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
636 
637  --SuccSU->NumPredsLeft;
638  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
639  SchedImpl->releaseTopNode(SuccSU);
640 }
641 
642 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
644  for (SDep &Succ : SU->Succs)
645  releaseSucc(SU, &Succ);
646 }
647 
648 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
649 /// NumSuccsLeft reaches zero, release the predecessor node.
650 ///
651 /// FIXME: Adjust PredSU height based on MinLatency.
652 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
653  SUnit *PredSU = PredEdge->getSUnit();
654 
655  if (PredEdge->isWeak()) {
656  --PredSU->WeakSuccsLeft;
657  if (PredEdge->isCluster())
658  NextClusterPred = PredSU;
659  return;
660  }
661 #ifndef NDEBUG
662  if (PredSU->NumSuccsLeft == 0) {
663  dbgs() << "*** Scheduling failed! ***\n";
664  dumpNode(*PredSU);
665  dbgs() << " has been released too many times!\n";
666  llvm_unreachable(nullptr);
667  }
668 #endif
669  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
670  // CurrCycle may have advanced since then.
671  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
672  PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
673 
674  --PredSU->NumSuccsLeft;
675  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
676  SchedImpl->releaseBottomNode(PredSU);
677 }
678 
679 /// releasePredecessors - Call releasePred on each of SU's predecessors.
681  for (SDep &Pred : SU->Preds)
682  releasePred(SU, &Pred);
683 }
684 
687  SchedImpl->enterMBB(bb);
688 }
689 
691  SchedImpl->leaveMBB();
693 }
694 
695 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
696 /// crossing a scheduling boundary. [begin, end) includes all instructions in
697 /// the region, including the boundary itself and single-instruction regions
698 /// that don't get scheduled.
702  unsigned regioninstrs)
703 {
704  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
705 
706  SchedImpl->initPolicy(begin, end, regioninstrs);
707 }
708 
709 /// This is normally called from the main scheduler loop but may also be invoked
710 /// by the scheduling strategy to perform additional code motion.
713  // Advance RegionBegin if the first instruction moves down.
714  if (&*RegionBegin == MI)
715  ++RegionBegin;
716 
717  // Update the instruction stream.
718  BB->splice(InsertPos, BB, MI);
719 
720  // Update LiveIntervals
721  if (LIS)
722  LIS->handleMove(*MI, /*UpdateFlags=*/true);
723 
724  // Recede RegionBegin if an instruction moves above the first.
725  if (RegionBegin == InsertPos)
726  RegionBegin = MI;
727 }
728 
730 #ifndef NDEBUG
731  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
732  CurrentTop = CurrentBottom;
733  return false;
734  }
735  ++NumInstrsScheduled;
736 #endif
737  return true;
738 }
739 
740 /// Per-region scheduling driver, called back from
741 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
742 /// does not consider liveness or register pressure. It is useful for PostRA
743 /// scheduling and potentially other custom schedulers.
745  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
746  LLVM_DEBUG(SchedImpl->dumpPolicy());
747 
748  // Build the DAG.
749  buildSchedGraph(AA);
750 
751  postprocessDAG();
752 
753  SmallVector<SUnit*, 8> TopRoots, BotRoots;
754  findRootsAndBiasEdges(TopRoots, BotRoots);
755 
756  LLVM_DEBUG(dump());
757  if (PrintDAGs) dump();
758  if (ViewMISchedDAGs) viewGraph();
759 
760  // Initialize the strategy before modifying the DAG.
761  // This may initialize a DFSResult to be used for queue priority.
762  SchedImpl->initialize(this);
763 
764  // Initialize ready queues now that the DAG and priority data are finalized.
765  initQueues(TopRoots, BotRoots);
766 
767  bool IsTopNode = false;
768  while (true) {
769  LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
770  SUnit *SU = SchedImpl->pickNode(IsTopNode);
771  if (!SU) break;
772 
773  assert(!SU->isScheduled && "Node already scheduled");
774  if (!checkSchedLimit())
775  break;
776 
777  MachineInstr *MI = SU->getInstr();
778  if (IsTopNode) {
779  assert(SU->isTopReady() && "node still has unscheduled dependencies");
780  if (&*CurrentTop == MI)
781  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
782  else
783  moveInstruction(MI, CurrentTop);
784  } else {
785  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
787  priorNonDebug(CurrentBottom, CurrentTop);
788  if (&*priorII == MI)
789  CurrentBottom = priorII;
790  else {
791  if (&*CurrentTop == MI)
792  CurrentTop = nextIfDebug(++CurrentTop, priorII);
793  moveInstruction(MI, CurrentBottom);
794  CurrentBottom = MI;
795  }
796  }
797  // Notify the scheduling strategy before updating the DAG.
798  // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
799  // runs, it can then use the accurate ReadyCycle time to determine whether
800  // newly released nodes can move to the readyQ.
801  SchedImpl->schedNode(SU, IsTopNode);
802 
803  updateQueues(SU, IsTopNode);
804  }
805  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
806 
807  placeDebugValues();
808 
809  LLVM_DEBUG({
810  dbgs() << "*** Final schedule for "
811  << printMBBReference(*begin()->getParent()) << " ***\n";
812  dumpSchedule();
813  dbgs() << '\n';
814  });
815 }
816 
817 /// Apply each ScheduleDAGMutation step in order.
819  for (auto &m : Mutations)
820  m->apply(this);
821 }
822 
823 void ScheduleDAGMI::
825  SmallVectorImpl<SUnit*> &BotRoots) {
826  for (SUnit &SU : SUnits) {
827  assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
828 
829  // Order predecessors so DFSResult follows the critical path.
830  SU.biasCriticalPath();
831 
832  // A SUnit is ready to top schedule if it has no predecessors.
833  if (!SU.NumPredsLeft)
834  TopRoots.push_back(&SU);
835  // A SUnit is ready to bottom schedule if it has no successors.
836  if (!SU.NumSuccsLeft)
837  BotRoots.push_back(&SU);
838  }
839  ExitSU.biasCriticalPath();
840 }
841 
842 /// Identify DAG roots and setup scheduler queues.
844  ArrayRef<SUnit*> BotRoots) {
845  NextClusterSucc = nullptr;
846  NextClusterPred = nullptr;
847 
848  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
849  //
850  // Nodes with unreleased weak edges can still be roots.
851  // Release top roots in forward order.
852  for (SUnit *SU : TopRoots)
853  SchedImpl->releaseTopNode(SU);
854 
855  // Release bottom roots in reverse order so the higher priority nodes appear
856  // first. This is more natural and slightly more efficient.
858  I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
859  SchedImpl->releaseBottomNode(*I);
860  }
861 
862  releaseSuccessors(&EntrySU);
863  releasePredecessors(&ExitSU);
864 
865  SchedImpl->registerRoots();
866 
867  // Advance past initial DebugValues.
868  CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
869  CurrentBottom = RegionEnd;
870 }
871 
872 /// Update scheduler queues after scheduling an instruction.
873 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
874  // Release dependent instructions for scheduling.
875  if (IsTopNode)
876  releaseSuccessors(SU);
877  else
878  releasePredecessors(SU);
879 
880  SU->isScheduled = true;
881 }
882 
883 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
885  // If first instruction was a DBG_VALUE then put it back.
886  if (FirstDbgValue) {
887  BB->splice(RegionBegin, BB, FirstDbgValue);
888  RegionBegin = FirstDbgValue;
889  }
890 
891  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
892  DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
893  std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
894  MachineInstr *DbgValue = P.first;
895  MachineBasicBlock::iterator OrigPrevMI = P.second;
896  if (&*RegionBegin == DbgValue)
897  ++RegionBegin;
898  BB->splice(++OrigPrevMI, BB, DbgValue);
899  if (OrigPrevMI == std::prev(RegionEnd))
900  RegionEnd = DbgValue;
901  }
902  DbgValues.clear();
903  FirstDbgValue = nullptr;
904 }
905 
906 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
908  for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
909  if (SUnit *SU = getSUnit(&(*MI)))
910  dumpNode(*SU);
911  else
912  dbgs() << "Missing SUnit\n";
913  }
914 }
915 #endif
916 
917 //===----------------------------------------------------------------------===//
918 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
919 // preservation.
920 //===----------------------------------------------------------------------===//
921 
923  delete DFSResult;
924 }
925 
927  const MachineInstr &MI = *SU.getInstr();
928  for (const MachineOperand &MO : MI.operands()) {
929  if (!MO.isReg())
930  continue;
931  if (!MO.readsReg())
932  continue;
933  if (TrackLaneMasks && !MO.isUse())
934  continue;
935 
936  unsigned Reg = MO.getReg();
938  continue;
939 
940  // Ignore re-defs.
941  if (TrackLaneMasks) {
942  bool FoundDef = false;
943  for (const MachineOperand &MO2 : MI.operands()) {
944  if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
945  FoundDef = true;
946  break;
947  }
948  }
949  if (FoundDef)
950  continue;
951  }
952 
953  // Record this local VReg use.
954  VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
955  for (; UI != VRegUses.end(); ++UI) {
956  if (UI->SU == &SU)
957  break;
958  }
959  if (UI == VRegUses.end())
960  VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
961  }
962 }
963 
964 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
965 /// crossing a scheduling boundary. [begin, end) includes all instructions in
966 /// the region, including the boundary itself and single-instruction regions
967 /// that don't get scheduled.
971  unsigned regioninstrs)
972 {
973  // ScheduleDAGMI initializes SchedImpl's per-region policy.
974  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
975 
976  // For convenience remember the end of the liveness region.
977  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
978 
979  SUPressureDiffs.clear();
980 
981  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
982  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
983 
984  assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
985  "ShouldTrackLaneMasks requires ShouldTrackPressure");
986 }
987 
988 // Setup the register pressure trackers for the top scheduled top and bottom
989 // scheduled regions.
991  VRegUses.clear();
992  VRegUses.setUniverse(MRI.getNumVirtRegs());
993  for (SUnit &SU : SUnits)
994  collectVRegUses(SU);
995 
996  TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
997  ShouldTrackLaneMasks, false);
998  BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
999  ShouldTrackLaneMasks, false);
1000 
1001  // Close the RPTracker to finalize live ins.
1002  RPTracker.closeRegion();
1003 
1004  LLVM_DEBUG(RPTracker.dump());
1005 
1006  // Initialize the live ins and live outs.
1007  TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
1008  BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
1009 
1010  // Close one end of the tracker so we can call
1011  // getMaxUpward/DownwardPressureDelta before advancing across any
1012  // instructions. This converts currently live regs into live ins/outs.
1013  TopRPTracker.closeTop();
1014  BotRPTracker.closeBottom();
1015 
1016  BotRPTracker.initLiveThru(RPTracker);
1017  if (!BotRPTracker.getLiveThru().empty()) {
1018  TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
1019  LLVM_DEBUG(dbgs() << "Live Thru: ";
1020  dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
1021  };
1022 
1023  // For each live out vreg reduce the pressure change associated with other
1024  // uses of the same vreg below the live-out reaching def.
1025  updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
1026 
1027  // Account for liveness generated by the region boundary.
1028  if (LiveRegionEnd != RegionEnd) {
1030  BotRPTracker.recede(&LiveUses);
1031  updatePressureDiffs(LiveUses);
1032  }
1033 
1034  LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1035  dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
1036  dbgs() << "Bottom Pressure:\n";
1037  dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
1038 
1039  assert((BotRPTracker.getPos() == RegionEnd ||
1040  (RegionEnd->isDebugInstr() &&
1041  BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
1042  "Can't find the region bottom");
1043 
1044  // Cache the list of excess pressure sets in this region. This will also track
1045  // the max pressure in the scheduled code for these sets.
1046  RegionCriticalPSets.clear();
1047  const std::vector<unsigned> &RegionPressure =
1048  RPTracker.getPressure().MaxSetPressure;
1049  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1050  unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1051  if (RegionPressure[i] > Limit) {
1052  LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1053  << " Actual " << RegionPressure[i] << "\n");
1054  RegionCriticalPSets.push_back(PressureChange(i));
1055  }
1056  }
1057  LLVM_DEBUG(dbgs() << "Excess PSets: ";
1058  for (const PressureChange &RCPS
1059  : RegionCriticalPSets) dbgs()
1060  << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1061  dbgs() << "\n");
1062 }
1063 
1066  const std::vector<unsigned> &NewMaxPressure) {
1067  const PressureDiff &PDiff = getPressureDiff(SU);
1068  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1069  for (const PressureChange &PC : PDiff) {
1070  if (!PC.isValid())
1071  break;
1072  unsigned ID = PC.getPSet();
1073  while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1074  ++CritIdx;
1075  if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1076  if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1077  && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1078  RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1079  }
1080  unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1081  if (NewMaxPressure[ID] >= Limit - 2) {
1082  LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1083  << NewMaxPressure[ID]
1084  << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1085  << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1086  << " livethru)\n");
1087  }
1088  }
1089 }
1090 
1091 /// Update the PressureDiff array for liveness after scheduling this
1092 /// instruction.
1094  ArrayRef<RegisterMaskPair> LiveUses) {
1095  for (const RegisterMaskPair &P : LiveUses) {
1096  unsigned Reg = P.RegUnit;
1097  /// FIXME: Currently assuming single-use physregs.
1098  if (!TRI->isVirtualRegister(Reg))
1099  continue;
1100 
1101  if (ShouldTrackLaneMasks) {
1102  // If the register has just become live then other uses won't change
1103  // this fact anymore => decrement pressure.
1104  // If the register has just become dead then other uses make it come
1105  // back to life => increment pressure.
1106  bool Decrement = P.LaneMask.any();
1107 
1108  for (const VReg2SUnit &V2SU
1109  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1110  SUnit &SU = *V2SU.SU;
1111  if (SU.isScheduled || &SU == &ExitSU)
1112  continue;
1113 
1114  PressureDiff &PDiff = getPressureDiff(&SU);
1115  PDiff.addPressureChange(Reg, Decrement, &MRI);
1116  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1117  << printReg(Reg, TRI) << ':'
1118  << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1119  dbgs() << " to "; PDiff.dump(*TRI););
1120  }
1121  } else {
1122  assert(P.LaneMask.any());
1123  LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1124  // This may be called before CurrentBottom has been initialized. However,
1125  // BotRPTracker must have a valid position. We want the value live into the
1126  // instruction or live out of the block, so ask for the previous
1127  // instruction's live-out.
1128  const LiveInterval &LI = LIS->getInterval(Reg);
1129  VNInfo *VNI;
1131  nextIfDebug(BotRPTracker.getPos(), BB->end());
1132  if (I == BB->end())
1133  VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1134  else {
1135  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
1136  VNI = LRQ.valueIn();
1137  }
1138  // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1139  assert(VNI && "No live value at use.");
1140  for (const VReg2SUnit &V2SU
1141  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1142  SUnit *SU = V2SU.SU;
1143  // If this use comes before the reaching def, it cannot be a last use,
1144  // so decrease its pressure change.
1145  if (!SU->isScheduled && SU != &ExitSU) {
1146  LiveQueryResult LRQ =
1147  LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1148  if (LRQ.valueIn() == VNI) {
1149  PressureDiff &PDiff = getPressureDiff(SU);
1150  PDiff.addPressureChange(Reg, true, &MRI);
1151  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1152  << *SU->getInstr();
1153  dbgs() << " to "; PDiff.dump(*TRI););
1154  }
1155  }
1156  }
1157  }
1158  }
1159 }
1160 
1162 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1163  if (EntrySU.getInstr() != nullptr)
1164  dumpNodeAll(EntrySU);
1165  for (const SUnit &SU : SUnits) {
1166  dumpNodeAll(SU);
1167  if (ShouldTrackPressure) {
1168  dbgs() << " Pressure Diff : ";
1169  getPressureDiff(&SU).dump(*TRI);
1170  }
1171  dbgs() << " Single Issue : ";
1172  if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1173  SchedModel.mustEndGroup(SU.getInstr()))
1174  dbgs() << "true;";
1175  else
1176  dbgs() << "false;";
1177  dbgs() << '\n';
1178  }
1179  if (ExitSU.getInstr() != nullptr)
1180  dumpNodeAll(ExitSU);
1181 #endif
1182 }
1183 
1184 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1185 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1186 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1187 ///
1188 /// This is a skeletal driver, with all the functionality pushed into helpers,
1189 /// so that it can be easily extended by experimental schedulers. Generally,
1190 /// implementing MachineSchedStrategy should be sufficient to implement a new
1191 /// scheduling algorithm. However, if a scheduler further subclasses
1192 /// ScheduleDAGMILive then it will want to override this virtual method in order
1193 /// to update any specialized state.
1195  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1196  LLVM_DEBUG(SchedImpl->dumpPolicy());
1197  buildDAGWithRegPressure();
1198 
1199  postprocessDAG();
1200 
1201  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1202  findRootsAndBiasEdges(TopRoots, BotRoots);
1203 
1204  // Initialize the strategy before modifying the DAG.
1205  // This may initialize a DFSResult to be used for queue priority.
1206  SchedImpl->initialize(this);
1207 
1208  LLVM_DEBUG(dump());
1209  if (PrintDAGs) dump();
1210  if (ViewMISchedDAGs) viewGraph();
1211 
1212  // Initialize ready queues now that the DAG and priority data are finalized.
1213  initQueues(TopRoots, BotRoots);
1214 
1215  bool IsTopNode = false;
1216  while (true) {
1217  LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1218  SUnit *SU = SchedImpl->pickNode(IsTopNode);
1219  if (!SU) break;
1220 
1221  assert(!SU->isScheduled && "Node already scheduled");
1222  if (!checkSchedLimit())
1223  break;
1224 
1225  scheduleMI(SU, IsTopNode);
1226 
1227  if (DFSResult) {
1228  unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1229  if (!ScheduledTrees.test(SubtreeID)) {
1230  ScheduledTrees.set(SubtreeID);
1231  DFSResult->scheduleTree(SubtreeID);
1232  SchedImpl->scheduleTree(SubtreeID);
1233  }
1234  }
1235 
1236  // Notify the scheduling strategy after updating the DAG.
1237  SchedImpl->schedNode(SU, IsTopNode);
1238 
1239  updateQueues(SU, IsTopNode);
1240  }
1241  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1242 
1243  placeDebugValues();
1244 
1245  LLVM_DEBUG({
1246  dbgs() << "*** Final schedule for "
1247  << printMBBReference(*begin()->getParent()) << " ***\n";
1248  dumpSchedule();
1249  dbgs() << '\n';
1250  });
1251 }
1252 
1253 /// Build the DAG and setup three register pressure trackers.
1255  if (!ShouldTrackPressure) {
1256  RPTracker.reset();
1257  RegionCriticalPSets.clear();
1258  buildSchedGraph(AA);
1259  return;
1260  }
1261 
1262  // Initialize the register pressure tracker used by buildSchedGraph.
1263  RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
1264  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1265 
1266  // Account for liveness generate by the region boundary.
1267  if (LiveRegionEnd != RegionEnd)
1268  RPTracker.recede();
1269 
1270  // Build the DAG, and compute current register pressure.
1271  buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
1272 
1273  // Initialize top/bottom trackers after computing region pressure.
1274  initRegPressure();
1275 }
1276 
1278  if (!DFSResult)
1279  DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1280  DFSResult->clear();
1281  ScheduledTrees.clear();
1282  DFSResult->resize(SUnits.size());
1283  DFSResult->compute(SUnits);
1284  ScheduledTrees.resize(DFSResult->getNumSubtrees());
1285 }
1286 
1287 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1288 /// only provides the critical path for single block loops. To handle loops that
1289 /// span blocks, we could use the vreg path latencies provided by
1290 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1291 /// available for use in the scheduler.
1292 ///
1293 /// The cyclic path estimation identifies a def-use pair that crosses the back
1294 /// edge and considers the depth and height of the nodes. For example, consider
1295 /// the following instruction sequence where each instruction has unit latency
1296 /// and defines an epomymous virtual register:
1297 ///
1298 /// a->b(a,c)->c(b)->d(c)->exit
1299 ///
1300 /// The cyclic critical path is a two cycles: b->c->b
1301 /// The acyclic critical path is four cycles: a->b->c->d->exit
1302 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1303 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1304 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1305 /// LiveInDepth = depth(b) = len(a->b) = 1
1306 ///
1307 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1308 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1309 /// CyclicCriticalPath = min(2, 2) = 2
1310 ///
1311 /// This could be relevant to PostRA scheduling, but is currently implemented
1312 /// assuming LiveIntervals.
1314  // This only applies to single block loop.
1315  if (!BB->isSuccessor(BB))
1316  return 0;
1317 
1318  unsigned MaxCyclicLatency = 0;
1319  // Visit each live out vreg def to find def/use pairs that cross iterations.
1320  for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
1321  unsigned Reg = P.RegUnit;
1322  if (!TRI->isVirtualRegister(Reg))
1323  continue;
1324  const LiveInterval &LI = LIS->getInterval(Reg);
1325  const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1326  if (!DefVNI)
1327  continue;
1328 
1329  MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
1330  const SUnit *DefSU = getSUnit(DefMI);
1331  if (!DefSU)
1332  continue;
1333 
1334  unsigned LiveOutHeight = DefSU->getHeight();
1335  unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1336  // Visit all local users of the vreg def.
1337  for (const VReg2SUnit &V2SU
1338  : make_range(VRegUses.find(Reg), VRegUses.end())) {
1339  SUnit *SU = V2SU.SU;
1340  if (SU == &ExitSU)
1341  continue;
1342 
1343  // Only consider uses of the phi.
1344  LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1345  if (!LRQ.valueIn()->isPHIDef())
1346  continue;
1347 
1348  // Assume that a path spanning two iterations is a cycle, which could
1349  // overestimate in strange cases. This allows cyclic latency to be
1350  // estimated as the minimum slack of the vreg's depth or height.
1351  unsigned CyclicLatency = 0;
1352  if (LiveOutDepth > SU->getDepth())
1353  CyclicLatency = LiveOutDepth - SU->getDepth();
1354 
1355  unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1356  if (LiveInHeight > LiveOutHeight) {
1357  if (LiveInHeight - LiveOutHeight < CyclicLatency)
1358  CyclicLatency = LiveInHeight - LiveOutHeight;
1359  } else
1360  CyclicLatency = 0;
1361 
1362  LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1363  << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1364  if (CyclicLatency > MaxCyclicLatency)
1365  MaxCyclicLatency = CyclicLatency;
1366  }
1367  }
1368  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1369  return MaxCyclicLatency;
1370 }
1371 
1372 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1373 /// the Top RP tracker in case the region beginning has changed.
1375  ArrayRef<SUnit*> BotRoots) {
1376  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1377  if (ShouldTrackPressure) {
1378  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1379  TopRPTracker.setPos(CurrentTop);
1380  }
1381 }
1382 
1383 /// Move an instruction and update register pressure.
1384 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1385  // Move the instruction to its new location in the instruction stream.
1386  MachineInstr *MI = SU->getInstr();
1387 
1388  if (IsTopNode) {
1389  assert(SU->isTopReady() && "node still has unscheduled dependencies");
1390  if (&*CurrentTop == MI)
1391  CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
1392  else {
1393  moveInstruction(MI, CurrentTop);
1394  TopRPTracker.setPos(MI);
1395  }
1396 
1397  if (ShouldTrackPressure) {
1398  // Update top scheduled pressure.
1399  RegisterOperands RegOpers;
1400  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1401  if (ShouldTrackLaneMasks) {
1402  // Adjust liveness and add missing dead+read-undef flags.
1403  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1404  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1405  } else {
1406  // Adjust for missing dead-def flags.
1407  RegOpers.detectDeadDefs(*MI, *LIS);
1408  }
1409 
1410  TopRPTracker.advance(RegOpers);
1411  assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1412  LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1413  TopRPTracker.getRegSetPressureAtPos(), TRI););
1414 
1415  updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
1416  }
1417  } else {
1418  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1419  MachineBasicBlock::iterator priorII =
1420  priorNonDebug(CurrentBottom, CurrentTop);
1421  if (&*priorII == MI)
1422  CurrentBottom = priorII;
1423  else {
1424  if (&*CurrentTop == MI) {
1425  CurrentTop = nextIfDebug(++CurrentTop, priorII);
1426  TopRPTracker.setPos(CurrentTop);
1427  }
1428  moveInstruction(MI, CurrentBottom);
1429  CurrentBottom = MI;
1430  BotRPTracker.setPos(CurrentBottom);
1431  }
1432  if (ShouldTrackPressure) {
1433  RegisterOperands RegOpers;
1434  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1435  if (ShouldTrackLaneMasks) {
1436  // Adjust liveness and add missing dead+read-undef flags.
1437  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1438  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1439  } else {
1440  // Adjust for missing dead-def flags.
1441  RegOpers.detectDeadDefs(*MI, *LIS);
1442  }
1443 
1444  if (BotRPTracker.getPos() != CurrentBottom)
1445  BotRPTracker.recedeSkipDebugValues();
1447  BotRPTracker.recede(RegOpers, &LiveUses);
1448  assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1449  LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1450  BotRPTracker.getRegSetPressureAtPos(), TRI););
1451 
1452  updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
1453  updatePressureDiffs(LiveUses);
1454  }
1455  }
1456 }
1457 
1458 //===----------------------------------------------------------------------===//
1459 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1460 //===----------------------------------------------------------------------===//
1461 
1462 namespace {
1463 
1464 /// Post-process the DAG to create cluster edges between neighboring
1465 /// loads or between neighboring stores.
1466 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1467  struct MemOpInfo {
1468  SUnit *SU;
1469  const MachineOperand *BaseOp;
1470  int64_t Offset;
1471 
1472  MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs)
1473  : SU(su), BaseOp(Op), Offset(ofs) {}
1474 
1475  bool operator<(const MemOpInfo &RHS) const {
1476  if (BaseOp->getType() != RHS.BaseOp->getType())
1477  return BaseOp->getType() < RHS.BaseOp->getType();
1478 
1479  if (BaseOp->isReg())
1480  return std::make_tuple(BaseOp->getReg(), Offset, SU->NodeNum) <
1481  std::make_tuple(RHS.BaseOp->getReg(), RHS.Offset,
1482  RHS.SU->NodeNum);
1483  if (BaseOp->isFI()) {
1484  const MachineFunction &MF =
1485  *BaseOp->getParent()->getParent()->getParent();
1486  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1487  bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1489  // Can't use tuple comparison here since we might need to use a
1490  // different order when the stack grows down.
1491  if (BaseOp->getIndex() != RHS.BaseOp->getIndex())
1492  return StackGrowsDown ? BaseOp->getIndex() > RHS.BaseOp->getIndex()
1493  : BaseOp->getIndex() < RHS.BaseOp->getIndex();
1494 
1495  if (Offset != RHS.Offset)
1496  return StackGrowsDown ? Offset > RHS.Offset : Offset < RHS.Offset;
1497 
1498  return SU->NodeNum < RHS.SU->NodeNum;
1499  }
1500 
1501  llvm_unreachable("MemOpClusterMutation only supports register or frame "
1502  "index bases.");
1503  }
1504  };
1505 
1506  const TargetInstrInfo *TII;
1507  const TargetRegisterInfo *TRI;
1508  bool IsLoad;
1509 
1510 public:
1511  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1512  const TargetRegisterInfo *tri, bool IsLoad)
1513  : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1514 
1515  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1516 
1517 protected:
1518  void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG);
1519 };
1520 
1521 class StoreClusterMutation : public BaseMemOpClusterMutation {
1522 public:
1523  StoreClusterMutation(const TargetInstrInfo *tii,
1524  const TargetRegisterInfo *tri)
1525  : BaseMemOpClusterMutation(tii, tri, false) {}
1526 };
1527 
1528 class LoadClusterMutation : public BaseMemOpClusterMutation {
1529 public:
1530  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1531  : BaseMemOpClusterMutation(tii, tri, true) {}
1532 };
1533 
1534 } // end anonymous namespace
1535 
1536 namespace llvm {
1537 
1538 std::unique_ptr<ScheduleDAGMutation>
1540  const TargetRegisterInfo *TRI) {
1541  return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
1542  : nullptr;
1543 }
1544 
1545 std::unique_ptr<ScheduleDAGMutation>
1547  const TargetRegisterInfo *TRI) {
1548  return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
1549  : nullptr;
1550 }
1551 
1552 } // end namespace llvm
1553 
1554 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1555  ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
1556  SmallVector<MemOpInfo, 32> MemOpRecords;
1557  for (SUnit *SU : MemOps) {
1558  const MachineOperand *BaseOp;
1559  int64_t Offset;
1560  if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
1561  MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
1562  }
1563  if (MemOpRecords.size() < 2)
1564  return;
1565 
1566  llvm::sort(MemOpRecords);
1567  unsigned ClusterLength = 1;
1568  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1569  SUnit *SUa = MemOpRecords[Idx].SU;
1570  SUnit *SUb = MemOpRecords[Idx+1].SU;
1571  if (TII->shouldClusterMemOps(*MemOpRecords[Idx].BaseOp,
1572  *MemOpRecords[Idx + 1].BaseOp,
1573  ClusterLength) &&
1574  DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
1575  LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1576  << SUb->NodeNum << ")\n");
1577  // Copy successor edges from SUa to SUb. Interleaving computation
1578  // dependent on SUa can prevent load combining due to register reuse.
1579  // Predecessor edges do not need to be copied from SUb to SUa since nearby
1580  // loads should have effectively the same inputs.
1581  for (const SDep &Succ : SUa->Succs) {
1582  if (Succ.getSUnit() == SUb)
1583  continue;
1584  LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1585  << ")\n");
1586  DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1587  }
1588  ++ClusterLength;
1589  } else
1590  ClusterLength = 1;
1591  }
1592 }
1593 
1594 /// Callback from DAG postProcessing to create cluster edges for loads.
1596  // Map DAG NodeNum to store chain ID.
1597  DenseMap<unsigned, unsigned> StoreChainIDs;
1598  // Map each store chain to a set of dependent MemOps.
1599  SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
1600  for (SUnit &SU : DAG->SUnits) {
1601  if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1602  (!IsLoad && !SU.getInstr()->mayStore()))
1603  continue;
1604 
1605  unsigned ChainPredID = DAG->SUnits.size();
1606  for (const SDep &Pred : SU.Preds) {
1607  if (Pred.isCtrl()) {
1608  ChainPredID = Pred.getSUnit()->NodeNum;
1609  break;
1610  }
1611  }
1612  // Check if this chain-like pred has been seen
1613  // before. ChainPredID==MaxNodeID at the top of the schedule.
1614  unsigned NumChains = StoreChainDependents.size();
1615  std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
1616  StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
1617  if (Result.second)
1618  StoreChainDependents.resize(NumChains + 1);
1619  StoreChainDependents[Result.first->second].push_back(&SU);
1620  }
1621 
1622  // Iterate over the store chains.
1623  for (auto &SCD : StoreChainDependents)
1624  clusterNeighboringMemOps(SCD, DAG);
1625 }
1626 
1627 //===----------------------------------------------------------------------===//
1628 // CopyConstrain - DAG post-processing to encourage copy elimination.
1629 //===----------------------------------------------------------------------===//
1630 
1631 namespace {
1632 
1633 /// Post-process the DAG to create weak edges from all uses of a copy to
1634 /// the one use that defines the copy's source vreg, most likely an induction
1635 /// variable increment.
1636 class CopyConstrain : public ScheduleDAGMutation {
1637  // Transient state.
1638  SlotIndex RegionBeginIdx;
1639 
1640  // RegionEndIdx is the slot index of the last non-debug instruction in the
1641  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1642  SlotIndex RegionEndIdx;
1643 
1644 public:
1645  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1646 
1647  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1648 
1649 protected:
1650  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1651 };
1652 
1653 } // end anonymous namespace
1654 
1655 namespace llvm {
1656 
1657 std::unique_ptr<ScheduleDAGMutation>
1659  const TargetRegisterInfo *TRI) {
1660  return llvm::make_unique<CopyConstrain>(TII, TRI);
1661 }
1662 
1663 } // end namespace llvm
1664 
1665 /// constrainLocalCopy handles two possibilities:
1666 /// 1) Local src:
1667 /// I0: = dst
1668 /// I1: src = ...
1669 /// I2: = dst
1670 /// I3: dst = src (copy)
1671 /// (create pred->succ edges I0->I1, I2->I1)
1672 ///
1673 /// 2) Local copy:
1674 /// I0: dst = src (copy)
1675 /// I1: = dst
1676 /// I2: src = ...
1677 /// I3: = dst
1678 /// (create pred->succ edges I1->I2, I3->I2)
1679 ///
1680 /// Although the MachineScheduler is currently constrained to single blocks,
1681 /// this algorithm should handle extended blocks. An EBB is a set of
1682 /// contiguously numbered blocks such that the previous block in the EBB is
1683 /// always the single predecessor.
1684 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1685  LiveIntervals *LIS = DAG->getLIS();
1686  MachineInstr *Copy = CopySU->getInstr();
1687 
1688  // Check for pure vreg copies.
1689  const MachineOperand &SrcOp = Copy->getOperand(1);
1690  unsigned SrcReg = SrcOp.getReg();
1691  if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1692  return;
1693 
1694  const MachineOperand &DstOp = Copy->getOperand(0);
1695  unsigned DstReg = DstOp.getReg();
1696  if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
1697  return;
1698 
1699  // Check if either the dest or source is local. If it's live across a back
1700  // edge, it's not local. Note that if both vregs are live across the back
1701  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1702  // If both the copy's source and dest are local live intervals, then we
1703  // should treat the dest as the global for the purpose of adding
1704  // constraints. This adds edges from source's other uses to the copy.
1705  unsigned LocalReg = SrcReg;
1706  unsigned GlobalReg = DstReg;
1707  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1708  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1709  LocalReg = DstReg;
1710  GlobalReg = SrcReg;
1711  LocalLI = &LIS->getInterval(LocalReg);
1712  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1713  return;
1714  }
1715  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1716 
1717  // Find the global segment after the start of the local LI.
1718  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1719  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1720  // local live range. We could create edges from other global uses to the local
1721  // start, but the coalescer should have already eliminated these cases, so
1722  // don't bother dealing with it.
1723  if (GlobalSegment == GlobalLI->end())
1724  return;
1725 
1726  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1727  // returned the next global segment. But if GlobalSegment overlaps with
1728  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
1729  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1730  if (GlobalSegment->contains(LocalLI->beginIndex()))
1731  ++GlobalSegment;
1732 
1733  if (GlobalSegment == GlobalLI->end())
1734  return;
1735 
1736  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1737  if (GlobalSegment != GlobalLI->begin()) {
1738  // Two address defs have no hole.
1739  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1740  GlobalSegment->start)) {
1741  return;
1742  }
1743  // If the prior global segment may be defined by the same two-address
1744  // instruction that also defines LocalLI, then can't make a hole here.
1745  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1746  LocalLI->beginIndex())) {
1747  return;
1748  }
1749  // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1750  // it would be a disconnected component in the live range.
1751  assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1752  "Disconnected LRG within the scheduling region.");
1753  }
1754  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1755  if (!GlobalDef)
1756  return;
1757 
1758  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1759  if (!GlobalSU)
1760  return;
1761 
1762  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1763  // constraining the uses of the last local def to precede GlobalDef.
1764  SmallVector<SUnit*,8> LocalUses;
1765  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1766  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1767  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1768  for (const SDep &Succ : LastLocalSU->Succs) {
1769  if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1770  continue;
1771  if (Succ.getSUnit() == GlobalSU)
1772  continue;
1773  if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1774  return;
1775  LocalUses.push_back(Succ.getSUnit());
1776  }
1777  // Open the top of the GlobalLI hole by constraining any earlier global uses
1778  // to precede the start of LocalLI.
1779  SmallVector<SUnit*,8> GlobalUses;
1780  MachineInstr *FirstLocalDef =
1781  LIS->getInstructionFromIndex(LocalLI->beginIndex());
1782  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1783  for (const SDep &Pred : GlobalSU->Preds) {
1784  if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1785  continue;
1786  if (Pred.getSUnit() == FirstLocalSU)
1787  continue;
1788  if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1789  return;
1790  GlobalUses.push_back(Pred.getSUnit());
1791  }
1792  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1793  // Add the weak edges.
1795  I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
1796  LLVM_DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU("
1797  << GlobalSU->NodeNum << ")\n");
1798  DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
1799  }
1801  I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
1802  LLVM_DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU("
1803  << FirstLocalSU->NodeNum << ")\n");
1804  DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
1805  }
1806 }
1807 
1808 /// Callback from DAG postProcessing to create weak edges to encourage
1809 /// copy elimination.
1810 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1811  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1812  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1813 
1814  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1815  if (FirstPos == DAG->end())
1816  return;
1817  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1818  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1819  *priorNonDebug(DAG->end(), DAG->begin()));
1820 
1821  for (SUnit &SU : DAG->SUnits) {
1822  if (!SU.getInstr()->isCopy())
1823  continue;
1824 
1825  constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1826  }
1827 }
1828 
1829 //===----------------------------------------------------------------------===//
1830 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1831 // and possibly other custom schedulers.
1832 //===----------------------------------------------------------------------===//
1833 
1834 static const unsigned InvalidCycle = ~0U;
1835 
1836 SchedBoundary::~SchedBoundary() { delete HazardRec; }
1837 
1838 /// Given a Count of resource usage and a Latency value, return true if a
1839 /// SchedBoundary becomes resource limited.
1840 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1841  unsigned Latency) {
1842  return (int)(Count - (Latency * LFactor)) > (int)LFactor;
1843 }
1844 
1846  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1847  // Destroying and reconstructing it is very expensive though. So keep
1848  // invalid, placeholder HazardRecs.
1849  if (HazardRec && HazardRec->isEnabled()) {
1850  delete HazardRec;
1851  HazardRec = nullptr;
1852  }
1853  Available.clear();
1854  Pending.clear();
1855  CheckPending = false;
1856  CurrCycle = 0;
1857  CurrMOps = 0;
1858  MinReadyCycle = std::numeric_limits<unsigned>::max();
1859  ExpectedLatency = 0;
1860  DependentLatency = 0;
1861  RetiredMOps = 0;
1862  MaxExecutedResCount = 0;
1863  ZoneCritResIdx = 0;
1864  IsResourceLimited = false;
1865  ReservedCycles.clear();
1866  ReservedCyclesIndex.clear();
1867 #ifndef NDEBUG
1868  // Track the maximum number of stall cycles that could arise either from the
1869  // latency of a DAG edge or the number of cycles that a processor resource is
1870  // reserved (SchedBoundary::ReservedCycles).
1871  MaxObservedStall = 0;
1872 #endif
1873  // Reserve a zero-count for invalid CritResIdx.
1874  ExecutedResCounts.resize(1);
1875  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
1876 }
1877 
1878 void SchedRemainder::
1879 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
1880  reset();
1881  if (!SchedModel->hasInstrSchedModel())
1882  return;
1883  RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
1884  for (SUnit &SU : DAG->SUnits) {
1885  const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
1886  RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
1887  * SchedModel->getMicroOpFactor();
1889  PI = SchedModel->getWriteProcResBegin(SC),
1890  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
1891  unsigned PIdx = PI->ProcResourceIdx;
1892  unsigned Factor = SchedModel->getResourceFactor(PIdx);
1893  RemainingCounts[PIdx] += (Factor * PI->Cycles);
1894  }
1895  }
1896 }
1897 
1898 void SchedBoundary::
1900  reset();
1901  DAG = dag;
1902  SchedModel = smodel;
1903  Rem = rem;
1904  if (SchedModel->hasInstrSchedModel()) {
1905  unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
1906  ReservedCyclesIndex.resize(ResourceCount);
1907  ExecutedResCounts.resize(ResourceCount);
1908  unsigned NumUnits = 0;
1909 
1910  for (unsigned i = 0; i < ResourceCount; ++i) {
1911  ReservedCyclesIndex[i] = NumUnits;
1912  NumUnits += SchedModel->getProcResource(i)->NumUnits;
1913  }
1914 
1915  ReservedCycles.resize(NumUnits, InvalidCycle);
1916  }
1917 }
1918 
1919 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
1920 /// these "soft stalls" differently than the hard stall cycles based on CPU
1921 /// resources and computed by checkHazard(). A fully in-order model
1922 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
1923 /// available for scheduling until they are ready. However, a weaker in-order
1924 /// model may use this for heuristics. For example, if a processor has in-order
1925 /// behavior when reading certain resources, this may come into play.
1927  if (!SU->isUnbuffered)
1928  return 0;
1929 
1930  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
1931  if (ReadyCycle > CurrCycle)
1932  return ReadyCycle - CurrCycle;
1933  return 0;
1934 }
1935 
1936 /// Compute the next cycle at which the given processor resource unit
1937 /// can be scheduled.
1939  unsigned Cycles) {
1940  unsigned NextUnreserved = ReservedCycles[InstanceIdx];
1941  // If this resource has never been used, always return cycle zero.
1942  if (NextUnreserved == InvalidCycle)
1943  return 0;
1944  // For bottom-up scheduling add the cycles needed for the current operation.
1945  if (!isTop())
1946  NextUnreserved += Cycles;
1947  return NextUnreserved;
1948 }
1949 
1950 /// Compute the next cycle at which the given processor resource can be
1951 /// scheduled. Returns the next cycle and the index of the processor resource
1952 /// instance in the reserved cycles vector.
1953 std::pair<unsigned, unsigned>
1954 SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1955  unsigned MinNextUnreserved = InvalidCycle;
1956  unsigned InstanceIdx = 0;
1957  unsigned StartIndex = ReservedCyclesIndex[PIdx];
1958  unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
1959  assert(NumberOfInstances > 0 &&
1960  "Cannot have zero instances of a ProcResource");
1961 
1962  for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
1963  ++I) {
1964  unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
1965  if (MinNextUnreserved > NextUnreserved) {
1966  InstanceIdx = I;
1967  MinNextUnreserved = NextUnreserved;
1968  }
1969  }
1970  return std::make_pair(MinNextUnreserved, InstanceIdx);
1971 }
1972 
1973 /// Does this SU have a hazard within the current instruction group.
1974 ///
1975 /// The scheduler supports two modes of hazard recognition. The first is the
1976 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
1977 /// supports highly complicated in-order reservation tables
1978 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
1979 ///
1980 /// The second is a streamlined mechanism that checks for hazards based on
1981 /// simple counters that the scheduler itself maintains. It explicitly checks
1982 /// for instruction dispatch limitations, including the number of micro-ops that
1983 /// can dispatch per cycle.
1984 ///
1985 /// TODO: Also check whether the SU must start a new group.
1987  if (HazardRec->isEnabled()
1988  && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
1989  return true;
1990  }
1991 
1992  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
1993  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
1994  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
1995  << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
1996  return true;
1997  }
1998 
1999  if (CurrMOps > 0 &&
2000  ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2001  (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2002  LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
2003  << (isTop() ? "begin" : "end") << " group\n");
2004  return true;
2005  }
2006 
2007  if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
2008  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2009  for (const MCWriteProcResEntry &PE :
2010  make_range(SchedModel->getWriteProcResBegin(SC),
2011  SchedModel->getWriteProcResEnd(SC))) {
2012  unsigned ResIdx = PE.ProcResourceIdx;
2013  unsigned Cycles = PE.Cycles;
2014  unsigned NRCycle, InstanceIdx;
2015  std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles);
2016  if (NRCycle > CurrCycle) {
2017 #ifndef NDEBUG
2018  MaxObservedStall = std::max(Cycles, MaxObservedStall);
2019 #endif
2020  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2021  << SchedModel->getResourceName(ResIdx)
2022  << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
2023  << "=" << NRCycle << "c\n");
2024  return true;
2025  }
2026  }
2027  }
2028  return false;
2029 }
2030 
2031 // Find the unscheduled node in ReadySUs with the highest latency.
2032 unsigned SchedBoundary::
2034  SUnit *LateSU = nullptr;
2035  unsigned RemLatency = 0;
2036  for (SUnit *SU : ReadySUs) {
2037  unsigned L = getUnscheduledLatency(SU);
2038  if (L > RemLatency) {
2039  RemLatency = L;
2040  LateSU = SU;
2041  }
2042  }
2043  if (LateSU) {
2044  LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2045  << LateSU->NodeNum << ") " << RemLatency << "c\n");
2046  }
2047  return RemLatency;
2048 }
2049 
2050 // Count resources in this zone and the remaining unscheduled
2051 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2052 // resource index, or zero if the zone is issue limited.
2053 unsigned SchedBoundary::
2054 getOtherResourceCount(unsigned &OtherCritIdx) {
2055  OtherCritIdx = 0;
2056  if (!SchedModel->hasInstrSchedModel())
2057  return 0;
2058 
2059  unsigned OtherCritCount = Rem->RemIssueCount
2060  + (RetiredMOps * SchedModel->getMicroOpFactor());
2061  LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2062  << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2063  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2064  PIdx != PEnd; ++PIdx) {
2065  unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2066  if (OtherCount > OtherCritCount) {
2067  OtherCritCount = OtherCount;
2068  OtherCritIdx = PIdx;
2069  }
2070  }
2071  if (OtherCritIdx) {
2072  LLVM_DEBUG(
2073  dbgs() << " " << Available.getName() << " + Remain CritRes: "
2074  << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2075  << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2076  }
2077  return OtherCritCount;
2078 }
2079 
2080 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
2081  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2082 
2083 #ifndef NDEBUG
2084  // ReadyCycle was been bumped up to the CurrCycle when this node was
2085  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2086  // scheduling, so may now be greater than ReadyCycle.
2087  if (ReadyCycle > CurrCycle)
2088  MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2089 #endif
2090 
2091  if (ReadyCycle < MinReadyCycle)
2092  MinReadyCycle = ReadyCycle;
2093 
2094  // Check for interlocks first. For the purpose of other heuristics, an
2095  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2096  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2097  if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
2098  Available.size() >= ReadyListLimit)
2099  Pending.push(SU);
2100  else
2101  Available.push(SU);
2102 }
2103 
2104 /// Move the boundary of scheduled code by one cycle.
2105 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2106  if (SchedModel->getMicroOpBufferSize() == 0) {
2107  assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2108  "MinReadyCycle uninitialized");
2109  if (MinReadyCycle > NextCycle)
2110  NextCycle = MinReadyCycle;
2111  }
2112  // Update the current micro-ops, which will issue in the next cycle.
2113  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2114  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2115 
2116  // Decrement DependentLatency based on the next cycle.
2117  if ((NextCycle - CurrCycle) > DependentLatency)
2118  DependentLatency = 0;
2119  else
2120  DependentLatency -= (NextCycle - CurrCycle);
2121 
2122  if (!HazardRec->isEnabled()) {
2123  // Bypass HazardRec virtual calls.
2124  CurrCycle = NextCycle;
2125  } else {
2126  // Bypass getHazardType calls in case of long latency.
2127  for (; CurrCycle != NextCycle; ++CurrCycle) {
2128  if (isTop())
2129  HazardRec->AdvanceCycle();
2130  else
2131  HazardRec->RecedeCycle();
2132  }
2133  }
2134  CheckPending = true;
2135  IsResourceLimited =
2136  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2137  getScheduledLatency());
2138 
2139  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2140  << '\n');
2141 }
2142 
2143 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2144  ExecutedResCounts[PIdx] += Count;
2145  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2146  MaxExecutedResCount = ExecutedResCounts[PIdx];
2147 }
2148 
2149 /// Add the given processor resource to this scheduled zone.
2150 ///
2151 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2152 /// during which this resource is consumed.
2153 ///
2154 /// \return the next cycle at which the instruction may execute without
2155 /// oversubscribing resources.
2156 unsigned SchedBoundary::
2157 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
2158  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2159  unsigned Count = Factor * Cycles;
2160  LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2161  << Cycles << "x" << Factor << "u\n");
2162 
2163  // Update Executed resources counts.
2164  incExecutedResources(PIdx, Count);
2165  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2166  Rem->RemainingCounts[PIdx] -= Count;
2167 
2168  // Check if this resource exceeds the current critical resource. If so, it
2169  // becomes the critical resource.
2170  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2171  ZoneCritResIdx = PIdx;
2172  LLVM_DEBUG(dbgs() << " *** Critical resource "
2173  << SchedModel->getResourceName(PIdx) << ": "
2174  << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
2175  << "c\n");
2176  }
2177  // For reserved resources, record the highest cycle using the resource.
2178  unsigned NextAvailable, InstanceIdx;
2179  std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles);
2180  if (NextAvailable > CurrCycle) {
2181  LLVM_DEBUG(dbgs() << " Resource conflict: "
2182  << SchedModel->getResourceName(PIdx)
2183  << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2184  << " reserved until @" << NextAvailable << "\n");
2185  }
2186  return NextAvailable;
2187 }
2188 
2189 /// Move the boundary of scheduled code by one SUnit.
2191  // Update the reservation table.
2192  if (HazardRec->isEnabled()) {
2193  if (!isTop() && SU->isCall) {
2194  // Calls are scheduled with their preceding instructions. For bottom-up
2195  // scheduling, clear the pipeline state before emitting.
2196  HazardRec->Reset();
2197  }
2198  HazardRec->EmitInstruction(SU);
2199  // Scheduling an instruction may have made pending instructions available.
2200  CheckPending = true;
2201  }
2202  // checkHazard should prevent scheduling multiple instructions per cycle that
2203  // exceed the issue width.
2204  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2205  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2206  assert(
2207  (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2208  "Cannot schedule this instruction's MicroOps in the current cycle.");
2209 
2210  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2211  LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2212 
2213  unsigned NextCycle = CurrCycle;
2214  switch (SchedModel->getMicroOpBufferSize()) {
2215  case 0:
2216  assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2217  break;
2218  case 1:
2219  if (ReadyCycle > NextCycle) {
2220  NextCycle = ReadyCycle;
2221  LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2222  }
2223  break;
2224  default:
2225  // We don't currently model the OOO reorder buffer, so consider all
2226  // scheduled MOps to be "retired". We do loosely model in-order resource
2227  // latency. If this instruction uses an in-order resource, account for any
2228  // likely stall cycles.
2229  if (SU->isUnbuffered && ReadyCycle > NextCycle)
2230  NextCycle = ReadyCycle;
2231  break;
2232  }
2233  RetiredMOps += IncMOps;
2234 
2235  // Update resource counts and critical resource.
2236  if (SchedModel->hasInstrSchedModel()) {
2237  unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2238  assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2239  Rem->RemIssueCount -= DecRemIssue;
2240  if (ZoneCritResIdx) {
2241  // Scale scheduled micro-ops for comparing with the critical resource.
2242  unsigned ScaledMOps =
2243  RetiredMOps * SchedModel->getMicroOpFactor();
2244 
2245  // If scaled micro-ops are now more than the previous critical resource by
2246  // a full cycle, then micro-ops issue becomes critical.
2247  if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2248  >= (int)SchedModel->getLatencyFactor()) {
2249  ZoneCritResIdx = 0;
2250  LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2251  << ScaledMOps / SchedModel->getLatencyFactor()
2252  << "c\n");
2253  }
2254  }
2256  PI = SchedModel->getWriteProcResBegin(SC),
2257  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2258  unsigned RCycle =
2259  countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
2260  if (RCycle > NextCycle)
2261  NextCycle = RCycle;
2262  }
2263  if (SU->hasReservedResource) {
2264  // For reserved resources, record the highest cycle using the resource.
2265  // For top-down scheduling, this is the cycle in which we schedule this
2266  // instruction plus the number of cycles the operations reserves the
2267  // resource. For bottom-up is it simply the instruction's cycle.
2269  PI = SchedModel->getWriteProcResBegin(SC),
2270  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2271  unsigned PIdx = PI->ProcResourceIdx;
2272  if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2273  unsigned ReservedUntil, InstanceIdx;
2274  std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0);
2275  if (isTop()) {
2276  ReservedCycles[InstanceIdx] =
2277  std::max(ReservedUntil, NextCycle + PI->Cycles);
2278  } else
2279  ReservedCycles[InstanceIdx] = NextCycle;
2280  }
2281  }
2282  }
2283  }
2284  // Update ExpectedLatency and DependentLatency.
2285  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2286  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2287  if (SU->getDepth() > TopLatency) {
2288  TopLatency = SU->getDepth();
2289  LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2290  << SU->NodeNum << ") " << TopLatency << "c\n");
2291  }
2292  if (SU->getHeight() > BotLatency) {
2293  BotLatency = SU->getHeight();
2294  LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2295  << SU->NodeNum << ") " << BotLatency << "c\n");
2296  }
2297  // If we stall for any reason, bump the cycle.
2298  if (NextCycle > CurrCycle)
2299  bumpCycle(NextCycle);
2300  else
2301  // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2302  // resource limited. If a stall occurred, bumpCycle does this.
2303  IsResourceLimited =
2304  checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
2305  getScheduledLatency());
2306 
2307  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2308  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2309  // one cycle. Since we commonly reach the max MOps here, opportunistically
2310  // bump the cycle to avoid uselessly checking everything in the readyQ.
2311  CurrMOps += IncMOps;
2312 
2313  // Bump the cycle count for issue group constraints.
2314  // This must be done after NextCycle has been adjust for all other stalls.
2315  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2316  // currCycle to X.
2317  if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2318  (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2319  LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2320  << " group\n");
2321  bumpCycle(++NextCycle);
2322  }
2323 
2324  while (CurrMOps >= SchedModel->getIssueWidth()) {
2325  LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2326  << CurrCycle << '\n');
2327  bumpCycle(++NextCycle);
2328  }
2329  LLVM_DEBUG(dumpScheduledState());
2330 }
2331 
2332 /// Release pending ready nodes in to the available queue. This makes them
2333 /// visible to heuristics.
2335  // If the available queue is empty, it is safe to reset MinReadyCycle.
2336  if (Available.empty())
2337  MinReadyCycle = std::numeric_limits<unsigned>::max();
2338 
2339  // Check to see if any of the pending instructions are ready to issue. If
2340  // so, add them to the available queue.
2341  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2342  for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
2343  SUnit *SU = *(Pending.begin()+i);
2344  unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2345 
2346  if (ReadyCycle < MinReadyCycle)
2347  MinReadyCycle = ReadyCycle;
2348 
2349  if (!IsBuffered && ReadyCycle > CurrCycle)
2350  continue;
2351 
2352  if (checkHazard(SU))
2353  continue;
2354 
2355  if (Available.size() >= ReadyListLimit)
2356  break;
2357 
2358  Available.push(SU);
2359  Pending.remove(Pending.begin()+i);
2360  --i; --e;
2361  }
2362  CheckPending = false;
2363 }
2364 
2365 /// Remove SU from the ready set for this boundary.
2367  if (Available.isInQueue(SU))
2368  Available.remove(Available.find(SU));
2369  else {
2370  assert(Pending.isInQueue(SU) && "bad ready count");
2371  Pending.remove(Pending.find(SU));
2372  }
2373 }
2374 
2375 /// If this queue only has one ready candidate, return it. As a side effect,
2376 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2377 /// one node is ready. If multiple instructions are ready, return NULL.
2379  if (CheckPending)
2380  releasePending();
2381 
2382  if (CurrMOps > 0) {
2383  // Defer any ready instrs that now have a hazard.
2384  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2385  if (checkHazard(*I)) {
2386  Pending.push(*I);
2387  I = Available.remove(I);
2388  continue;
2389  }
2390  ++I;
2391  }
2392  }
2393  for (unsigned i = 0; Available.empty(); ++i) {
2394 // FIXME: Re-enable assert once PR20057 is resolved.
2395 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2396 // "permanent hazard");
2397  (void)i;
2398  bumpCycle(CurrCycle + 1);
2399  releasePending();
2400  }
2401 
2402  LLVM_DEBUG(Pending.dump());
2403  LLVM_DEBUG(Available.dump());
2404 
2405  if (Available.size() == 1)
2406  return *Available.begin();
2407  return nullptr;
2408 }
2409 
2410 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2411 // This is useful information to dump after bumpNode.
2412 // Note that the Queue contents are more useful before pickNodeFromQueue.
2414  unsigned ResFactor;
2415  unsigned ResCount;
2416  if (ZoneCritResIdx) {
2417  ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2418  ResCount = getResourceCount(ZoneCritResIdx);
2419  } else {
2420  ResFactor = SchedModel->getMicroOpFactor();
2421  ResCount = RetiredMOps * ResFactor;
2422  }
2423  unsigned LFactor = SchedModel->getLatencyFactor();
2424  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2425  << " Retired: " << RetiredMOps;
2426  dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2427  dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2428  << ResCount / ResFactor << " "
2429  << SchedModel->getResourceName(ZoneCritResIdx)
2430  << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2431  << (IsResourceLimited ? " - Resource" : " - Latency")
2432  << " limited.\n";
2433 }
2434 #endif
2435 
2436 //===----------------------------------------------------------------------===//
2437 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2438 //===----------------------------------------------------------------------===//
2439 
2442  const TargetSchedModel *SchedModel) {
2443  if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
2444  return;
2445 
2446  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2448  PI = SchedModel->getWriteProcResBegin(SC),
2449  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2450  if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2451  ResDelta.CritResources += PI->Cycles;
2452  if (PI->ProcResourceIdx == Policy.DemandResIdx)
2453  ResDelta.DemandedResources += PI->Cycles;
2454  }
2455 }
2456 
2457 /// Compute remaining latency. We need this both to determine whether the
2458 /// overall schedule has become latency-limited and whether the instructions
2459 /// outside this zone are resource or latency limited.
2460 ///
2461 /// The "dependent" latency is updated incrementally during scheduling as the
2462 /// max height/depth of scheduled nodes minus the cycles since it was
2463 /// scheduled:
2464 /// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2465 ///
2466 /// The "independent" latency is the max ready queue depth:
2467 /// ILat = max N.depth for N in Available|Pending
2468 ///
2469 /// RemainingLatency is the greater of independent and dependent latency.
2470 ///
2471 /// These computations are expensive, especially in DAGs with many edges, so
2472 /// only do them if necessary.
2473 static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2474  unsigned RemLatency = CurrZone.getDependentLatency();
2475  RemLatency = std::max(RemLatency,
2476  CurrZone.findMaxLatency(CurrZone.Available.elements()));
2477  RemLatency = std::max(RemLatency,
2478  CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2479  return RemLatency;
2480 }
2481 
2482 /// Returns true if the current cycle plus remaning latency is greater than
2483 /// the critical path in the scheduling region.
2484 bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2485  SchedBoundary &CurrZone,
2486  bool ComputeRemLatency,
2487  unsigned &RemLatency) const {
2488  // The current cycle is already greater than the critical path, so we are
2489  // already latency limited and don't need to compute the remaining latency.
2490  if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2491  return true;
2492 
2493  // If we haven't scheduled anything yet, then we aren't latency limited.
2494  if (CurrZone.getCurrCycle() == 0)
2495  return false;
2496 
2497  if (ComputeRemLatency)
2498  RemLatency = computeRemLatency(CurrZone);
2499 
2500  return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2501 }
2502 
2503 /// Set the CandPolicy given a scheduling zone given the current resources and
2504 /// latencies inside and outside the zone.
2505 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2506  SchedBoundary &CurrZone,
2507  SchedBoundary *OtherZone) {
2508  // Apply preemptive heuristics based on the total latency and resources
2509  // inside and outside this zone. Potential stalls should be considered before
2510  // following this policy.
2511 
2512  // Compute the critical resource outside the zone.
2513  unsigned OtherCritIdx = 0;
2514  unsigned OtherCount =
2515  OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2516 
2517  bool OtherResLimited = false;
2518  unsigned RemLatency = 0;
2519  bool RemLatencyComputed = false;
2520  if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2521  RemLatency = computeRemLatency(CurrZone);
2522  RemLatencyComputed = true;
2523  OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2524  OtherCount, RemLatency);
2525  }
2526 
2527  // Schedule aggressively for latency in PostRA mode. We don't check for
2528  // acyclic latency during PostRA, and highly out-of-order processors will
2529  // skip PostRA scheduling.
2530  if (!OtherResLimited &&
2531  (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2532  RemLatency))) {
2533  Policy.ReduceLatency |= true;
2534  LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
2535  << " RemainingLatency " << RemLatency << " + "
2536  << CurrZone.getCurrCycle() << "c > CritPath "
2537  << Rem.CriticalPath << "\n");
2538  }
2539  // If the same resource is limiting inside and outside the zone, do nothing.
2540  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2541  return;
2542 
2543  LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2544  dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2545  << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2546  } if (OtherResLimited) dbgs()
2547  << " RemainingLimit: "
2548  << SchedModel->getResourceName(OtherCritIdx) << "\n";
2549  if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2550  << " Latency limited both directions.\n");
2551 
2552  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2553  Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2554 
2555  if (OtherResLimited)
2556  Policy.DemandResIdx = OtherCritIdx;
2557 }
2558 
2559 #ifndef NDEBUG
2562  switch (Reason) {
2563  case NoCand: return "NOCAND ";
2564  case Only1: return "ONLY1 ";
2565  case PhysReg: return "PHYS-REG ";
2566  case RegExcess: return "REG-EXCESS";
2567  case RegCritical: return "REG-CRIT ";
2568  case Stall: return "STALL ";
2569  case Cluster: return "CLUSTER ";
2570  case Weak: return "WEAK ";
2571  case RegMax: return "REG-MAX ";
2572  case ResourceReduce: return "RES-REDUCE";
2573  case ResourceDemand: return "RES-DEMAND";
2574  case TopDepthReduce: return "TOP-DEPTH ";
2575  case TopPathReduce: return "TOP-PATH ";
2576  case BotHeightReduce:return "BOT-HEIGHT";
2577  case BotPathReduce: return "BOT-PATH ";
2578  case NextDefUse: return "DEF-USE ";
2579  case NodeOrder: return "ORDER ";
2580  };
2581  llvm_unreachable("Unknown reason!");
2582 }
2583 
2585  PressureChange P;
2586  unsigned ResIdx = 0;
2587  unsigned Latency = 0;
2588  switch (Cand.Reason) {
2589  default:
2590  break;
2591  case RegExcess:
2592  P = Cand.RPDelta.Excess;
2593  break;
2594  case RegCritical:
2595  P = Cand.RPDelta.CriticalMax;
2596  break;
2597  case RegMax:
2598  P = Cand.RPDelta.CurrentMax;
2599  break;
2600  case ResourceReduce:
2601  ResIdx = Cand.Policy.ReduceResIdx;
2602  break;
2603  case ResourceDemand:
2604  ResIdx = Cand.Policy.DemandResIdx;
2605  break;
2606  case TopDepthReduce:
2607  Latency = Cand.SU->getDepth();
2608  break;
2609  case TopPathReduce:
2610  Latency = Cand.SU->getHeight();
2611  break;
2612  case BotHeightReduce:
2613  Latency = Cand.SU->getHeight();
2614  break;
2615  case BotPathReduce:
2616  Latency = Cand.SU->getDepth();
2617  break;
2618  }
2619  dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2620  if (P.isValid())
2621  dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2622  << ":" << P.getUnitInc() << " ";
2623  else
2624  dbgs() << " ";
2625  if (ResIdx)
2626  dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2627  else
2628  dbgs() << " ";
2629  if (Latency)
2630  dbgs() << " " << Latency << " cycles ";
2631  else
2632  dbgs() << " ";
2633  dbgs() << '\n';
2634 }
2635 #endif
2636 
2637 namespace llvm {
2638 /// Return true if this heuristic determines order.
2639 bool tryLess(int TryVal, int CandVal,
2643  if (TryVal < CandVal) {
2644  TryCand.Reason = Reason;
2645  return true;
2646  }
2647  if (TryVal > CandVal) {
2648  if (Cand.Reason > Reason)
2649  Cand.Reason = Reason;
2650  return true;
2651  }
2652  return false;
2653 }
2654 
2655 bool tryGreater(int TryVal, int CandVal,
2659  if (TryVal > CandVal) {
2660  TryCand.Reason = Reason;
2661  return true;
2662  }
2663  if (TryVal < CandVal) {
2664  if (Cand.Reason > Reason)
2665  Cand.Reason = Reason;
2666  return true;
2667  }
2668  return false;
2669 }
2670 
2673  SchedBoundary &Zone) {
2674  if (Zone.isTop()) {
2675  if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
2676  if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2677  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2678  return true;
2679  }
2680  if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2681  TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2682  return true;
2683  } else {
2684  if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
2685  if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2686  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2687  return true;
2688  }
2689  if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2690  TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2691  return true;
2692  }
2693  return false;
2694 }
2695 } // end namespace llvm
2696 
2697 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2698  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2699  << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2700 }
2701 
2703  tracePick(Cand.Reason, Cand.AtTop);
2704 }
2705 
2707  assert(dag->hasVRegLiveness() &&
2708  "(PreRA)GenericScheduler needs vreg liveness");
2709  DAG = static_cast<ScheduleDAGMILive*>(dag);
2710  SchedModel = DAG->getSchedModel();
2711  TRI = DAG->TRI;
2712 
2713  Rem.init(DAG, SchedModel);
2714  Top.init(DAG, SchedModel, &Rem);
2715  Bot.init(DAG, SchedModel, &Rem);
2716 
2717  // Initialize resource counts.
2718 
2719  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2720  // are disabled, then these HazardRecs will be disabled.
2721  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
2722  if (!Top.HazardRec) {
2723  Top.HazardRec =
2724  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2725  Itin, DAG);
2726  }
2727  if (!Bot.HazardRec) {
2728  Bot.HazardRec =
2729  DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
2730  Itin, DAG);
2731  }
2732  TopCand.SU = nullptr;
2733  BotCand.SU = nullptr;
2734 }
2735 
2736 /// Initialize the per-region scheduling policy.
2739  unsigned NumRegionInstrs) {
2740  const MachineFunction &MF = *Begin->getMF();
2741  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2742 
2743  // Avoid setting up the register pressure tracker for small regions to save
2744  // compile time. As a rough heuristic, only track pressure when the number of
2745  // schedulable instructions exceeds half the integer register file.
2746  RegionPolicy.ShouldTrackPressure = true;
2747  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2748  MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2749  if (TLI->isTypeLegal(LegalIntVT)) {
2750  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2751  TLI->getRegClassFor(LegalIntVT));
2752  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2753  }
2754  }
2755 
2756  // For generic targets, we default to bottom-up, because it's simpler and more
2757  // compile-time optimizations have been implemented in that direction.
2758  RegionPolicy.OnlyBottomUp = true;
2759 
2760  // Allow the subtarget to override default policy.
2761  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2762 
2763  // After subtarget overrides, apply command line options.
2764  if (!EnableRegPressure)
2765  RegionPolicy.ShouldTrackPressure = false;
2766 
2767  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2768  // e.g. -misched-bottomup=false allows scheduling in both directions.
2770  "-misched-topdown incompatible with -misched-bottomup");
2771  if (ForceBottomUp.getNumOccurrences() > 0) {
2772  RegionPolicy.OnlyBottomUp = ForceBottomUp;
2773  if (RegionPolicy.OnlyBottomUp)
2774  RegionPolicy.OnlyTopDown = false;
2775  }
2776  if (ForceTopDown.getNumOccurrences() > 0) {
2777  RegionPolicy.OnlyTopDown = ForceTopDown;
2778  if (RegionPolicy.OnlyTopDown)
2779  RegionPolicy.OnlyBottomUp = false;
2780  }
2781 }
2782 
2784  // Cannot completely remove virtual function even in release mode.
2785 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2786  dbgs() << "GenericScheduler RegionPolicy: "
2787  << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2788  << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2789  << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2790  << "\n";
2791 #endif
2792 }
2793 
2794 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2795 /// critical path by more cycles than it takes to drain the instruction buffer.
2796 /// We estimate an upper bounds on in-flight instructions as:
2797 ///
2798 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2799 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2800 /// InFlightResources = InFlightIterations * LoopResources
2801 ///
2802 /// TODO: Check execution resources in addition to IssueCount.
2804  if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
2805  return;
2806 
2807  // Scaled number of cycles per loop iteration.
2808  unsigned IterCount =
2809  std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
2810  Rem.RemIssueCount);
2811  // Scaled acyclic critical path.
2812  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
2813  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
2814  unsigned InFlightCount =
2815  (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
2816  unsigned BufferLimit =
2817  SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
2818 
2819  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
2820 
2821  LLVM_DEBUG(
2822  dbgs() << "IssueCycles="
2823  << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
2824  << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
2825  << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
2826  << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
2827  << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
2828  if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
2829 }
2830 
2832  Rem.CriticalPath = DAG->ExitSU.getDepth();
2833 
2834  // Some roots may not feed into ExitSU. Check all of them in case.
2835  for (const SUnit *SU : Bot.Available) {
2836  if (SU->getDepth() > Rem.CriticalPath)
2837  Rem.CriticalPath = SU->getDepth();
2838  }
2839  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
2840  if (DumpCriticalPathLength) {
2841  errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
2842  }
2843 
2844  if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
2845  Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
2846  checkAcyclicLatency();
2847  }
2848 }
2849 
2850 namespace llvm {
2851 bool tryPressure(const PressureChange &TryP,
2852  const PressureChange &CandP,
2856  const TargetRegisterInfo *TRI,
2857  const MachineFunction &MF) {
2858  // If one candidate decreases and the other increases, go with it.
2859  // Invalid candidates have UnitInc==0.
2860  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
2861  Reason)) {
2862  return true;
2863  }
2864  // Do not compare the magnitude of pressure changes between top and bottom
2865  // boundary.
2866  if (Cand.AtTop != TryCand.AtTop)
2867  return false;
2868 
2869  // If both candidates affect the same set in the same boundary, go with the
2870  // smallest increase.
2871  unsigned TryPSet = TryP.getPSetOrMax();
2872  unsigned CandPSet = CandP.getPSetOrMax();
2873  if (TryPSet == CandPSet) {
2874  return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
2875  Reason);
2876  }
2877 
2878  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
2880 
2881  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
2883 
2884  // If the candidates are decreasing pressure, reverse priority.
2885  if (TryP.getUnitInc() < 0)
2886  std::swap(TryRank, CandRank);
2887  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
2888 }
2889 
2890 unsigned getWeakLeft(const SUnit *SU, bool isTop) {
2891  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
2892 }
2893 
2894 /// Minimize physical register live ranges. Regalloc wants them adjacent to
2895 /// their physreg def/use.
2896 ///
2897 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
2898 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
2899 /// with the operation that produces or consumes the physreg. We'll do this when
2900 /// regalloc has support for parallel copies.
2901 int biasPhysReg(const SUnit *SU, bool isTop) {
2902  const MachineInstr *MI = SU->getInstr();
2903 
2904  if (MI->isCopy()) {
2905  unsigned ScheduledOper = isTop ? 1 : 0;
2906  unsigned UnscheduledOper = isTop ? 0 : 1;
2907  // If we have already scheduled the physreg produce/consumer, immediately
2908  // schedule the copy.
2910  MI->getOperand(ScheduledOper).getReg()))
2911  return 1;
2912  // If the physreg is at the boundary, defer it. Otherwise schedule it
2913  // immediately to free the dependent. We can hoist the copy later.
2914  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
2916  MI->getOperand(UnscheduledOper).getReg()))
2917  return AtBoundary ? -1 : 1;
2918  }
2919 
2920  if (MI->isMoveImmediate()) {
2921  // If we have a move immediate and all successors have been assigned, bias
2922  // towards scheduling this later. Make sure all register defs are to
2923  // physical registers.
2924  bool DoBias = true;
2925  for (const MachineOperand &Op : MI->defs()) {
2926  if (Op.isReg() && !TargetRegisterInfo::isPhysicalRegister(Op.getReg())) {
2927  DoBias = false;
2928  break;
2929  }
2930  }
2931 
2932  if (DoBias)
2933  return isTop ? -1 : 1;
2934  }
2935 
2936  return 0;
2937 }
2938 } // end namespace llvm
2939 
2941  bool AtTop,
2942  const RegPressureTracker &RPTracker,
2943  RegPressureTracker &TempTracker) {
2944  Cand.SU = SU;
2945  Cand.AtTop = AtTop;
2946  if (DAG->isTrackingPressure()) {
2947  if (AtTop) {
2948  TempTracker.getMaxDownwardPressureDelta(
2949  Cand.SU->getInstr(),
2950  Cand.RPDelta,
2951  DAG->getRegionCriticalPSets(),
2953  } else {
2954  if (VerifyScheduling) {
2955  TempTracker.getMaxUpwardPressureDelta(
2956  Cand.SU->getInstr(),
2957  &DAG->getPressureDiff(Cand.SU),
2958  Cand.RPDelta,
2959  DAG->getRegionCriticalPSets(),
2961  } else {
2962  RPTracker.getUpwardPressureDelta(
2963  Cand.SU->getInstr(),
2964  DAG->getPressureDiff(Cand.SU),
2965  Cand.RPDelta,
2966  DAG->getRegionCriticalPSets(),
2968  }
2969  }
2970  }
2971  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
2972  << " Try SU(" << Cand.SU->NodeNum << ") "
2973  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
2974  << Cand.RPDelta.Excess.getUnitInc() << "\n");
2975 }
2976 
2977 /// Apply a set of heuristics to a new candidate. Heuristics are currently
2978 /// hierarchical. This may be more efficient than a graduated cost model because
2979 /// we don't need to evaluate all aspects of the model for each node in the
2980 /// queue. But it's really done to make the heuristics easier to debug and
2981 /// statistically analyze.
2982 ///
2983 /// \param Cand provides the policy and current best candidate.
2984 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
2985 /// \param Zone describes the scheduled zone that we are extending, or nullptr
2986 // if Cand is from a different zone than TryCand.
2988  SchedCandidate &TryCand,
2989  SchedBoundary *Zone) const {
2990  // Initialize the candidate if needed.
2991  if (!Cand.isValid()) {
2992  TryCand.Reason = NodeOrder;
2993  return;
2994  }
2995 
2996  // Bias PhysReg Defs and copies to their uses and defined respectively.
2997  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
2998  biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
2999  return;
3000 
3001  // Avoid exceeding the target's limit.
3002  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3003  Cand.RPDelta.Excess,
3004  TryCand, Cand, RegExcess, TRI,
3005  DAG->MF))
3006  return;
3007 
3008  // Avoid increasing the max critical pressure in the scheduled region.
3009  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3010  Cand.RPDelta.CriticalMax,
3011  TryCand, Cand, RegCritical, TRI,
3012  DAG->MF))
3013  return;
3014 
3015  // We only compare a subset of features when comparing nodes between
3016  // Top and Bottom boundary. Some properties are simply incomparable, in many
3017  // other instances we should only override the other boundary if something
3018  // is a clear good pick on one boundary. Skip heuristics that are more
3019  // "tie-breaking" in nature.
3020  bool SameBoundary = Zone != nullptr;
3021  if (SameBoundary) {
3022  // For loops that are acyclic path limited, aggressively schedule for
3023  // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3024  // heuristics to take precedence.
3025  if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3026  tryLatency(TryCand, Cand, *Zone))
3027  return;
3028 
3029  // Prioritize instructions that read unbuffered resources by stall cycles.
3030  if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3031  Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3032  return;
3033  }
3034 
3035  // Keep clustered nodes together to encourage downstream peephole
3036  // optimizations which may reduce resource requirements.
3037  //
3038  // This is a best effort to set things up for a post-RA pass. Optimizations
3039  // like generating loads of multiple registers should ideally be done within
3040  // the scheduler pass by combining the loads during DAG postprocessing.
3041  const SUnit *CandNextClusterSU =
3042  Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3043  const SUnit *TryCandNextClusterSU =
3044  TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3045  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3046  Cand.SU == CandNextClusterSU,
3047  TryCand, Cand, Cluster))
3048  return;
3049 
3050  if (SameBoundary) {
3051  // Weak edges are for clustering and other constraints.
3052  if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3053  getWeakLeft(Cand.SU, Cand.AtTop),
3054  TryCand, Cand, Weak))
3055  return;
3056  }
3057 
3058  // Avoid increasing the max pressure of the entire region.
3059  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3060  Cand.RPDelta.CurrentMax,
3061  TryCand, Cand, RegMax, TRI,
3062  DAG->MF))
3063  return;
3064 
3065  if (SameBoundary) {
3066  // Avoid critical resource consumption and balance the schedule.
3067  TryCand.initResourceDelta(DAG, SchedModel);
3069  TryCand, Cand, ResourceReduce))
3070  return;
3073  TryCand, Cand, ResourceDemand))
3074  return;
3075 
3076  // Avoid serializing long latency dependence chains.
3077  // For acyclic path limited loops, latency was already checked above.
3078  if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3079  !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3080  return;
3081 
3082  // Fall through to original instruction order.
3083  if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3084  || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3085  TryCand.Reason = NodeOrder;
3086  }
3087  }
3088 }
3089 
3090 /// Pick the best candidate from the queue.
3091 ///
3092 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3093 /// DAG building. To adjust for the current scheduling location we need to
3094 /// maintain the number of vreg uses remaining to be top-scheduled.
3096  const CandPolicy &ZonePolicy,
3097  const RegPressureTracker &RPTracker,
3098  SchedCandidate &Cand) {
3099  // getMaxPressureDelta temporarily modifies the tracker.
3100  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3101 
3102  ReadyQueue &Q = Zone.Available;
3103  for (SUnit *SU : Q) {
3104 
3105  SchedCandidate TryCand(ZonePolicy);
3106  initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3107  // Pass SchedBoundary only when comparing nodes from the same boundary.
3108  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3109  tryCandidate(Cand, TryCand, ZoneArg);
3110  if (TryCand.Reason != NoCand) {
3111  // Initialize resource delta if needed in case future heuristics query it.
3112  if (TryCand.ResDelta == SchedResourceDelta())
3113  TryCand.initResourceDelta(DAG, SchedModel);
3114  Cand.setBest(TryCand);
3115  LLVM_DEBUG(traceCandidate(Cand));
3116  }
3117  }
3118 }
3119 
3120 /// Pick the best candidate node from either the top or bottom queue.
3122  // Schedule as far as possible in the direction of no choice. This is most
3123  // efficient, but also provides the best heuristics for CriticalPSets.
3124  if (SUnit *SU = Bot.pickOnlyChoice()) {
3125  IsTopNode = false;
3126  tracePick(Only1, false);
3127  return SU;
3128  }
3129  if (SUnit *SU = Top.pickOnlyChoice()) {
3130  IsTopNode = true;
3131  tracePick(Only1, true);
3132  return SU;
3133  }
3134  // Set the bottom-up policy based on the state of the current bottom zone and
3135  // the instructions outside the zone, including the top zone.
3136  CandPolicy BotPolicy;
3137  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3138  // Set the top-down policy based on the state of the current top zone and
3139  // the instructions outside the zone, including the bottom zone.
3140  CandPolicy TopPolicy;
3141  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3142 
3143  // See if BotCand is still valid (because we previously scheduled from Top).
3144  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3145  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3146  BotCand.Policy != BotPolicy) {
3147  BotCand.reset(CandPolicy());
3148  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3149  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3150  } else {
3151  LLVM_DEBUG(traceCandidate(BotCand));
3152 #ifndef NDEBUG
3153  if (VerifyScheduling) {
3154  SchedCandidate TCand;
3155  TCand.reset(CandPolicy());
3156  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3157  assert(TCand.SU == BotCand.SU &&
3158  "Last pick result should correspond to re-picking right now");
3159  }
3160 #endif
3161  }
3162 
3163  // Check if the top Q has a better candidate.
3164  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3165  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3166  TopCand.Policy != TopPolicy) {
3167  TopCand.reset(CandPolicy());
3168  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3169  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3170  } else {
3171  LLVM_DEBUG(traceCandidate(TopCand));
3172 #ifndef NDEBUG
3173  if (VerifyScheduling) {
3174  SchedCandidate TCand;
3175  TCand.reset(CandPolicy());
3176  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3177  assert(TCand.SU == TopCand.SU &&
3178  "Last pick result should correspond to re-picking right now");
3179  }
3180 #endif
3181  }
3182 
3183  // Pick best from BotCand and TopCand.
3184  assert(BotCand.isValid());
3185  assert(TopCand.isValid());
3186  SchedCandidate Cand = BotCand;
3187  TopCand.Reason = NoCand;
3188  tryCandidate(Cand, TopCand, nullptr);
3189  if (TopCand.Reason != NoCand) {
3190  Cand.setBest(TopCand);
3191  LLVM_DEBUG(traceCandidate(Cand));
3192  }
3193 
3194  IsTopNode = Cand.AtTop;
3195  tracePick(Cand);
3196  return Cand.SU;
3197 }
3198 
3199 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3201  if (DAG->top() == DAG->bottom()) {
3202  assert(Top.Available.empty() && Top.Pending.empty() &&
3203  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3204  return nullptr;
3205  }
3206  SUnit *SU;
3207  do {
3208  if (RegionPolicy.OnlyTopDown) {
3209  SU = Top.pickOnlyChoice();
3210  if (!SU) {
3211  CandPolicy NoPolicy;
3212  TopCand.reset(NoPolicy);
3213  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3214  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3215  tracePick(TopCand);
3216  SU = TopCand.SU;
3217  }
3218  IsTopNode = true;
3219  } else if (RegionPolicy.OnlyBottomUp) {
3220  SU = Bot.pickOnlyChoice();
3221  if (!SU) {
3222  CandPolicy NoPolicy;
3223  BotCand.reset(NoPolicy);
3224  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3225  assert(BotCand.Reason != NoCand && "failed to find a candidate");
3226  tracePick(BotCand);
3227  SU = BotCand.SU;
3228  }
3229  IsTopNode = false;
3230  } else {
3231  SU = pickNodeBidirectional(IsTopNode);
3232  }
3233  } while (SU->isScheduled);
3234 
3235  if (SU->isTopReady())
3236  Top.removeReady(SU);
3237  if (SU->isBottomReady())
3238  Bot.removeReady(SU);
3239 
3240  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3241  << *SU->getInstr());
3242  return SU;
3243 }
3244 
3246  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3247  if (!isTop)
3248  ++InsertPos;
3249  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3250 
3251  // Find already scheduled copies with a single physreg dependence and move
3252  // them just above the scheduled instruction.
3253  for (SDep &Dep : Deps) {
3254  if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
3255  continue;
3256  SUnit *DepSU = Dep.getSUnit();
3257  if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3258  continue;
3259  MachineInstr *Copy = DepSU->getInstr();
3260  if (!Copy->isCopy() && !Copy->isMoveImmediate())
3261  continue;
3262  LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3263  DAG->dumpNode(*Dep.getSUnit()));
3264  DAG->moveInstruction(Copy, InsertPos);
3265  }
3266 }
3267 
3268 /// Update the scheduler's state after scheduling a node. This is the same node
3269 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3270 /// update it's state based on the current cycle before MachineSchedStrategy
3271 /// does.
3272 ///
3273 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3274 /// them here. See comments in biasPhysReg.
3275 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3276  if (IsTopNode) {
3277  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3278  Top.bumpNode(SU);
3279  if (SU->hasPhysRegUses)
3280  reschedulePhysReg(SU, true);
3281  } else {
3282  SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3283  Bot.bumpNode(SU);
3284  if (SU->hasPhysRegDefs)
3285  reschedulePhysReg(SU, false);
3286  }
3287 }
3288 
3289 /// Create the standard converging machine scheduler. This will be used as the
3290 /// default scheduler if the target does not set a default.
3292  ScheduleDAGMILive *DAG =
3293  new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
3294  // Register DAG post-processors.
3295  //
3296  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3297  // data and pass it to later mutations. Have a single mutation that gathers
3298  // the interesting nodes in one pass.
3300  return DAG;
3301 }
3302 
3304  return createGenericSchedLive(C);
3305 }
3306 
3307 static MachineSchedRegistry
3308 GenericSchedRegistry("converge", "Standard converging scheduler.",
3310 
3311 //===----------------------------------------------------------------------===//
3312 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3313 //===----------------------------------------------------------------------===//
3314 
3316  DAG = Dag;
3317  SchedModel = DAG->getSchedModel();
3318  TRI = DAG->TRI;
3319 
3320  Rem.init(DAG, SchedModel);
3321  Top.init(DAG, SchedModel, &Rem);
3322  BotRoots.clear();
3323 
3324  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3325  // or are disabled, then these HazardRecs will be disabled.
3326  const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
3327  if (!Top.HazardRec) {
3328  Top.HazardRec =
3330  Itin, DAG);
3331  }
3332 }
3333 
3335  Rem.CriticalPath = DAG->ExitSU.getDepth();
3336 
3337  // Some roots may not feed into ExitSU. Check all of them in case.
3338  for (const SUnit *SU : BotRoots) {
3339  if (SU->getDepth() > Rem.CriticalPath)
3340  Rem.CriticalPath = SU->getDepth();
3341  }
3342  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3343  if (DumpCriticalPathLength) {
3344  errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3345  }
3346 }
3347 
3348 /// Apply a set of heuristics to a new candidate for PostRA scheduling.
3349 ///
3350 /// \param Cand provides the policy and current best candidate.
3351 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3353  SchedCandidate &TryCand) {
3354  // Initialize the candidate if needed.
3355  if (!Cand.isValid()) {
3356  TryCand.Reason = NodeOrder;
3357  return;
3358  }
3359 
3360  // Prioritize instructions that read unbuffered resources by stall cycles.
3361  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3362  Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3363  return;
3364 
3365  // Keep clustered nodes together.
3366  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3367  Cand.SU == DAG->getNextClusterSucc(),
3368  TryCand, Cand, Cluster))
3369  return;
3370 
3371  // Avoid critical resource consumption and balance the schedule.
3372  if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
3373  TryCand, Cand, ResourceReduce))
3374  return;
3376  Cand.ResDelta.DemandedResources,
3377  TryCand, Cand, ResourceDemand))
3378  return;
3379 
3380  // Avoid serializing long latency dependence chains.
3381  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3382  return;
3383  }
3384 
3385  // Fall through to original instruction order.
3386  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3387  TryCand.Reason = NodeOrder;
3388 }
3389 
3391  ReadyQueue &Q = Top.Available;
3392  for (SUnit *SU : Q) {
3393  SchedCandidate TryCand(Cand.Policy);
3394  TryCand.SU = SU;
3395  TryCand.AtTop = true;
3396  TryCand.initResourceDelta(DAG, SchedModel);
3397  tryCandidate(Cand, TryCand);
3398  if (TryCand.Reason != NoCand) {
3399  Cand.setBest(TryCand);
3400  LLVM_DEBUG(traceCandidate(Cand));
3401  }
3402  }
3403 }
3404 
3405 /// Pick the next node to schedule.
3407  if (DAG->top() == DAG->bottom()) {
3408  assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3409  return nullptr;
3410  }
3411  SUnit *SU;
3412  do {
3413  SU = Top.pickOnlyChoice();
3414  if (SU) {
3415  tracePick(Only1, true);
3416  } else {
3417  CandPolicy NoPolicy;
3418  SchedCandidate TopCand(NoPolicy);
3419  // Set the top-down policy based on the state of the current top zone and
3420  // the instructions outside the zone, including the bottom zone.
3421  setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3422  pickNodeFromQueue(TopCand);
3423  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3424  tracePick(TopCand);
3425  SU = TopCand.SU;
3426  }
3427  } while (SU->isScheduled);
3428 
3429  IsTopNode = true;
3430  Top.removeReady(SU);
3431 
3432  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3433  << *SU->getInstr());
3434  return SU;
3435 }
3436 
3437 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3438 /// scheduled/remaining flags in the DAG nodes.
3439 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3440  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3441  Top.bumpNode(SU);
3442 }
3443 
3445  return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
3446  /*RemoveKillFlags=*/true);
3447 }
3448 
3449 //===----------------------------------------------------------------------===//
3450 // ILP Scheduler. Currently for experimental analysis of heuristics.
3451 //===----------------------------------------------------------------------===//
3452 
3453 namespace {
3454 
3455 /// Order nodes by the ILP metric.
3456 struct ILPOrder {
3457  const SchedDFSResult *DFSResult = nullptr;
3458  const BitVector *ScheduledTrees = nullptr;
3459  bool MaximizeILP;
3460 
3461  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3462 
3463  /// Apply a less-than relation on node priority.
3464  ///
3465  /// (Return true if A comes after B in the Q.)
3466  bool operator()(const SUnit *A, const SUnit *B) const {
3467  unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3468  unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3469  if (SchedTreeA != SchedTreeB) {
3470  // Unscheduled trees have lower priority.
3471  if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3472  return ScheduledTrees->test(SchedTreeB);
3473 
3474  // Trees with shallower connections have have lower priority.
3475  if (DFSResult->getSubtreeLevel(SchedTreeA)
3476  != DFSResult->getSubtreeLevel(SchedTreeB)) {
3477  return DFSResult->getSubtreeLevel(SchedTreeA)
3478  < DFSResult->getSubtreeLevel(SchedTreeB);
3479  }
3480  }
3481  if (MaximizeILP)
3482  return DFSResult->getILP(A) < DFSResult->getILP(B);
3483  else
3484  return DFSResult->getILP(A) > DFSResult->getILP(B);
3485  }
3486 };
3487 
3488 /// Schedule based on the ILP metric.
3489 class ILPScheduler : public MachineSchedStrategy {
3490  ScheduleDAGMILive *DAG = nullptr;
3491  ILPOrder Cmp;
3492 
3493  std::vector<SUnit*> ReadyQ;
3494 
3495 public:
3496  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3497 
3498  void initialize(ScheduleDAGMI *dag) override {
3499  assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3500  DAG = static_cast<ScheduleDAGMILive*>(dag);
3501  DAG->computeDFSResult();
3502  Cmp.DFSResult = DAG->getDFSResult();
3503  Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3504  ReadyQ.clear();
3505  }
3506 
3507  void registerRoots() override {
3508  // Restore the heap in ReadyQ with the updated DFS results.
3509  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3510  }
3511 
3512  /// Implement MachineSchedStrategy interface.
3513  /// -----------------------------------------
3514 
3515  /// Callback to select the highest priority node from the ready Q.
3516  SUnit *pickNode(bool &IsTopNode) override {
3517  if (ReadyQ.empty()) return nullptr;
3518  std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3519  SUnit *SU = ReadyQ.back();
3520  ReadyQ.pop_back();
3521  IsTopNode = false;
3522  LLVM_DEBUG(dbgs() << "Pick node "
3523  << "SU(" << SU->NodeNum << ") "
3524  << " ILP: " << DAG->getDFSResult()->getILP(SU)
3525  << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3526  << " @"
3527  << DAG->getDFSResult()->getSubtreeLevel(
3528  DAG->getDFSResult()->getSubtreeID(SU))
3529  << '\n'
3530  << "Scheduling " << *SU->getInstr());
3531  return SU;
3532  }
3533 
3534  /// Scheduler callback to notify that a new subtree is scheduled.
3535  void scheduleTree(unsigned SubtreeID) override {
3536  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3537  }
3538 
3539  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3540  /// DFSResults, and resort the priority Q.
3541  void schedNode(SUnit *SU, bool IsTopNode) override {
3542  assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3543  }
3544 
3545  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3546 
3547  void releaseBottomNode(SUnit *SU) override {
3548  ReadyQ.push_back(SU);
3549  std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3550  }
3551 };
3552 
3553 } // end anonymous namespace
3554 
3556  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
3557 }
3559  return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
3560 }
3561 
3563  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3565  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3566 
3567 //===----------------------------------------------------------------------===//
3568 // Machine Instruction Shuffler for Correctness Testing
3569 //===----------------------------------------------------------------------===//
3570 
3571 #ifndef NDEBUG
3572 namespace {
3573 
3574 /// Apply a less-than relation on the node order, which corresponds to the
3575 /// instruction order prior to scheduling. IsReverse implements greater-than.
3576 template<bool IsReverse>
3577 struct SUnitOrder {
3578  bool operator()(SUnit *A, SUnit *B) const {
3579  if (IsReverse)
3580  return A->NodeNum > B->NodeNum;
3581  else
3582  return A->NodeNum < B->NodeNum;
3583  }
3584 };
3585 
3586 /// Reorder instructions as much as possible.
3587 class InstructionShuffler : public MachineSchedStrategy {
3588  bool IsAlternating;
3589  bool IsTopDown;
3590 
3591  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3592  // gives nodes with a higher number higher priority causing the latest
3593  // instructions to be scheduled first.
3594  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3595  TopQ;
3596 
3597  // When scheduling bottom-up, use greater-than as the queue priority.
3599  BottomQ;
3600 
3601 public:
3602  InstructionShuffler(bool alternate, bool topdown)
3603  : IsAlternating(alternate), IsTopDown(topdown) {}
3604 
3605  void initialize(ScheduleDAGMI*) override {
3606  TopQ.clear();
3607  BottomQ.clear();
3608  }
3609 
3610  /// Implement MachineSchedStrategy interface.
3611  /// -----------------------------------------
3612 
3613  SUnit *pickNode(bool &IsTopNode) override {
3614  SUnit *SU;
3615  if (IsTopDown) {
3616  do {
3617  if (TopQ.empty()) return nullptr;
3618  SU = TopQ.top();
3619  TopQ.pop();
3620  } while (SU->isScheduled);
3621  IsTopNode = true;
3622  } else {
3623  do {
3624  if (BottomQ.empty()) return nullptr;
3625  SU = BottomQ.top();
3626  BottomQ.pop();
3627  } while (SU->isScheduled);
3628  IsTopNode = false;
3629  }
3630  if (IsAlternating)
3631  IsTopDown = !IsTopDown;
3632  return SU;
3633  }
3634 
3635  void schedNode(SUnit *SU, bool IsTopNode) override {}
3636 
3637  void releaseTopNode(SUnit *SU) override {
3638  TopQ.push(SU);
3639  }
3640  void releaseBottomNode(SUnit *SU) override {
3641  BottomQ.push(SU);
3642  }
3643 };
3644 
3645 } // end anonymous namespace
3646 
3648  bool Alternate = !ForceTopDown && !ForceBottomUp;
3649  bool TopDown = !ForceBottomUp;
3650  assert((TopDown || !ForceTopDown) &&
3651  "-misched-topdown incompatible with -misched-bottomup");
3652  return new ScheduleDAGMILive(
3653  C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
3654 }
3655 
3657  "shuffle", "Shuffle machine instructions alternating directions",
3659 #endif // !NDEBUG
3660 
3661 //===----------------------------------------------------------------------===//
3662 // GraphWriter support for ScheduleDAGMILive.
3663 //===----------------------------------------------------------------------===//
3664 
3665 #ifndef NDEBUG
3666 namespace llvm {
3667 
3668 template<> struct GraphTraits<
3670 
3671 template<>
3674 
3675  static std::string getGraphName(const ScheduleDAG *G) {
3676  return G->MF.getName();
3677  }
3678 
3679  static bool renderGraphFromBottomUp() {
3680  return true;
3681  }
3682 
3683  static bool isNodeHidden(const SUnit *Node) {
3684  if (ViewMISchedCutoff == 0)
3685  return false;
3686  return (Node->Preds.size() > ViewMISchedCutoff
3687  || Node->Succs.size() > ViewMISchedCutoff);
3688  }
3689 
3690  /// If you want to override the dot attributes printed for a particular
3691  /// edge, override this method.
3692  static std::string getEdgeAttributes(const SUnit *Node,
3693  SUnitIterator EI,
3694  const ScheduleDAG *Graph) {
3695  if (EI.isArtificialDep())
3696  return "color=cyan,style=dashed";
3697  if (EI.isCtrlDep())
3698  return "color=blue,style=dashed";
3699  return "";
3700  }
3701 
3702  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3703  std::string Str;
3704  raw_string_ostream SS(Str);
3705  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3706  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3707  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3708  SS << "SU:" << SU->NodeNum;
3709  if (DFS)
3710  SS << " I:" << DFS->getNumInstrs(SU);
3711  return SS.str();
3712  }
3713 
3714  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3715  return G->getGraphNodeLabel(SU);
3716  }
3717 
3718  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3719  std::string Str("shape=Mrecord");
3720  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3721  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3722  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3723  if (DFS) {
3724  Str += ",style=filled,fillcolor=\"#";
3725  Str += DOT::getColorString(DFS->getSubtreeID(N));
3726  Str += '"';
3727  }
3728  return Str;
3729  }
3730 };
3731 
3732 } // end namespace llvm
3733 #endif // NDEBUG
3734 
3735 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3736 /// rendered using 'dot'.
3737 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3738 #ifndef NDEBUG
3739  ViewGraph(this, Name, false, Title);
3740 #else
3741  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3742  << "systems with Graphviz or gv!\n";
3743 #endif // NDEBUG
3744 }
3745 
3746 /// Out-of-line implementation with no arguments is handy for gdb.
3748  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3749 }
uint64_t CallInst * C
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
std::reverse_iterator< const_iterator > const_reverse_iterator
Definition: SmallVector.h:118
unsigned findMaxLatency(ArrayRef< SUnit *> ReadySUs)
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
int biasPhysReg(const SUnit *SU, bool isTop)
Minimize physical register live ranges.
Weak DAG edge linking a chain of clustered instrs.
Definition: ScheduleDAG.h:74
bool isPHIDef() const
Returns true if this value is defined by a PHI instruction (or was, PHI instructions may have been el...
Definition: LiveInterval.h:77
unsigned getNumInstrs(const SUnit *SU) const
Get the number of instructions in the given subtree and its children.
Definition: ScheduleDFS.h:145
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
virtual void finishBlock()
Cleans up after scheduling in the given block.
virtual void initialize(ScheduleDAGMI *DAG)=0
Initialize the strategy after building the DAG for a new region.
A common definition of LaneBitmask for use in TableGen and CodeGen.
unsigned getZoneCritResIdx() const
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static cl::opt< bool > ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed"))
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
LLVMContext & Context
static MachinePassRegistry< ScheduleDAGCtor > Registry
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
void dumpSchedule() const
dump the scheduled Sequence.
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:320
Each Scheduling boundary is associated with ready queues.
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:60
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void addPressureChange(unsigned RegUnit, bool IsDec, const MachineRegisterInfo *MRI)
Add a change in pressure to the pressure diff of a given instruction.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:473
SUnit * pickNodeBidirectional(bool &IsTopNode)
Pick the best candidate node from either the top or bottom queue.
virtual void releaseTopNode(SUnit *SU)=0
When all predecessor dependencies have been resolved, free this node for top-down scheduling...
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Segments::iterator iterator
Definition: LiveInterval.h:207
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void push_back(const T &Elt)
Definition: SmallVector.h:211
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
char & MachineDominatorsID
MachineDominators - This pass is a machine dominators analysis pass.
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:637
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
static cl::opt< std::string > SchedOnlyFunc("misched-only-func", cl::Hidden, cl::desc("Only schedule this function"))
unsigned getReg() const
getReg - Returns the register number.
unsigned computeCyclicCriticalPath()
Compute the cyclic critical path through the DAG.
void dump() const override
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
#define DEBUG_TYPE
unsigned Reg
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
virtual const TargetLowering * getTargetLowering() const
void traceCandidate(const SchedCandidate &Cand)
reverse_iterator rbegin() const
Definition: ArrayRef.h:139
bool test(unsigned Idx) const
Definition: BitVector.h:501
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
Mutate the DAG as a postpass after normal DAG building.
const IntervalPressure & getRegPressure() const
Get register pressure for the entire scheduling region before scheduling.
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
virtual std::string getGraphNodeLabel(const SUnit *SU) const =0
Returns a label for an SUnit node in a visualization of the ScheduleDAG.
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
unsigned getDependentLatency() const
static ScheduleDAGInstrs * createInstructionShuffler(MachineSchedContext *C)
unsigned const TargetRegisterInfo * TRI
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:93
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
Summarize the unscheduled region.
void reset(const CandPolicy &NewPolicy)
const SchedDFSResult * getDFSResult() const
Return a non-null DFS result if the scheduling strategy initialized it.
RegisterPassParser class - Handle the addition of new machine passes.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
MachineSchedRegistry provides a selection of available machine instruction schedulers.
bool isBottomReady() const
Definition: ScheduleDAG.h:449
VNInfo - Value Number Information.
Definition: LiveInterval.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:458
static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:300
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
virtual void schedNode(SUnit *SU, bool IsTopNode)=0
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
static cl::opt< bool > PrintDAGs("misched-print-dags", cl::Hidden, cl::desc("Print schedule DAGs"))
bool isMoveImmediate(QueryType Type=IgnoreBundle) const
Return true if this instruction is a move immediate (including conditional moves) instruction...
Definition: MachineInstr.h:699
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
*ViewGraph Emit a dot run run gv on the postscript *then cleanup For use from the debugger *void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
Definition: GraphWriter.h:366
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:560
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
ArrayRef< SUnit * > elements()
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
const RegPressureTracker & getBotRPTracker() const
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:269
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
Provide an instruction scheduling machine model to CodeGen passes.
const HexagonInstrInfo * TII
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method...
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
An individual mapping from virtual register number to SUnit.
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
static cl::opt< MachineSchedRegistry::ScheduleDAGCtor, false, RegisterPassParser< MachineSchedRegistry > > MachineSchedOpt("misched", cl::init(&useDefaultMachineSched), cl::Hidden, cl::desc("Machine instruction scheduler to use"))
MachineSchedOpt allows command line selection of the scheduler.
iterator end()
Definition: LiveInterval.h:211
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
Result of a LiveRange query.
Definition: LiveInterval.h:89
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:279
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1217
StringRef getName() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
PressureDiff & getPressureDiff(const SUnit *SU)
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model. ...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Target-Independent Code Generator Pass Configuration Options.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:82
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
static bool isSimple(Instruction *I)
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:266
static const char * getReasonStr(SIScheduleCandReason Reason)
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:299
static const unsigned InvalidCycle
unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return the number of issue slots required for this MI.
static cl::opt< bool > VerifyScheduling("verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling"))
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register. ...
void bumpCycle(unsigned NextCycle)
Move the boundary of scheduled code by one cycle.
unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle)
Add the given processor resource to this scheduled zone.
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:289
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
SlotIndexes pass.
Definition: SlotIndexes.h:328
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def...
Definition: SlotIndexes.h:254
MachineBasicBlock::iterator top() const
void buildDAGWithRegPressure()
Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking enabled.
unsigned WeakSuccsLeft
of weak succs not scheduled.
Definition: ScheduleDAG.h:271
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
static const char * getReasonStr(GenericSchedulerBase::CandReason Reason)
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
static std::string getGraphName(const ScheduleDAG *G)
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
Itinerary data supplied by a subtarget to be used by a target.
unsigned NumPredsLeft
of preds not scheduled.
Definition: ScheduleDAG.h:268
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
bool isTopReady() const
Definition: ScheduleDAG.h:446
void releasePending()
Release pending ready nodes in to the available queue.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:365
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
MachinePassRegistry - Track the registration of machine passes.
List of registers defined and used by a machine instruction.
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
std::vector< SUnit * >::iterator iterator
void collectVRegUses(SUnit &SU)
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
const SUnit * getNextClusterSucc() const
void bumpNode(SUnit *SU)
Move the boundary of scheduled code by one SUnit.
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule.
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:169
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
void checkAcyclicLatency()
Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic critical path by more cycle...
static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G)
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:104
void incExecutedResources(unsigned PIdx, unsigned Count)
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:528
TargetInstrInfo - Interface to description of machine instruction set.
virtual void registerRoots()
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Scheduling dependency.
Definition: ScheduleDAG.h:49
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:819
#define P(N)
unsigned getPSetOrMax() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
SlotIndex endIndex() const
endNumber - return the maximum point of the range of the whole, exclusive.
Definition: LiveInterval.h:379
CandReason
Represent the type of SchedCandidate found within a single queue.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
Helpers for implementing custom MachineSchedStrategy classes.
unsigned const MachineRegisterInfo * MRI
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
static const unsigned MinSubtreeSize
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void removeReady(SUnit *SU)
Remove SU from the ready set for this boundary.
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
RegisterPressure computed within a region of instructions delimited by TopPos and BottomPos...
int getNumOccurrences() const
Definition: CommandLine.h:393
virtual bool doMBBSchedRegionsTopDown() const
If this method returns true, handling of the scheduling regions themselves (in case of a scheduling b...
Represent the analysis usage information of a pass.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
bool checkHazard(SUnit *SU)
Does this SU have a hazard within the current instruction group.
iterator_range< mop_iterator > defs()
Returns a range over all explicit operands that are register definitions.
Definition: MachineInstr.h:480
void releasePred(SUnit *SU, SDep *PredEdge)
ReleasePred - Decrement the NumSuccsLeft count of a predecessor.
Track the current register pressure at some position in the instruction stream, and remember the high...
virtual void releaseBottomNode(SUnit *SU)=0
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
Policy for scheduling the next instruction in the candidate&#39;s zone.
static ScheduleDAGInstrs * useDefaultMachineSched(MachineSchedContext *C)
A dummy default scheduler factory indicates whether the scheduler is overridden on the command line...
const TargetSchedModel * getSchedModel() const
Gets the machine model for instruction scheduling.
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
unsigned getScheduledLatency() const
Get the number of latency cycles "covered" by the scheduled instructions.
List of PressureChanges in order of increasing, unique PSetID.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:73
ScheduleDAGInstrs *(*)(MachineSchedContext *) ScheduleDAGCtor
std::unique_ptr< ScheduleDAGMutation > createStoreClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction...
void initQueues(ArrayRef< SUnit *> TopRoots, ArrayRef< SUnit *> BotRoots)
Release ExitSU predecessors and setup scheduler queues.
bool isCopy() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
unsigned getWeakLeft(const SUnit *SU, bool isTop)
size_t size() const
Definition: SmallVector.h:52
bool isDebugInstr() const
Definition: MachineInstr.h:998
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU&#39;s successors.
void print(raw_ostream &OS, const SlotIndexes *=nullptr) const
print - Print out the MachineFunction in a format suitable for debugging to the specified stream...
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:498
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
virtual bool enableMachineScheduler() const
True if the subtarget should run MachineScheduler after aggressive coalescing.
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1115
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
unsigned WeakPredsLeft
of weak preds not scheduled.
Definition: ScheduleDAG.h:270
unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned Cycles)
Compute the next cycle at which the given processor resource unit can be scheduled.
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to &#39;dot...
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
Iterator for intrusive lists based on ilist_node.
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:197
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:314
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
MachineInstrBuilder MachineInstrBuilder & DefMI
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
Information about stack frame layout on the target.
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
cl::opt< bool > DumpCriticalPathLength("misched-dcpl", cl::Hidden, cl::desc("Print critical path length to stdout"))
void releaseNode(SUnit *SU, unsigned ReadyCycle)
unsigned getMicroOpFactor() const
Multiply number of micro-ops by this factor to normalize it relative to other resources.
CHAIN = SC CHAIN, Imm128 - System call.
const RegPressureTracker & getTopRPTracker() const
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:70
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
bool isResourceLimited() const
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
LiveInterval & getInterval(unsigned Reg)
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
static cl::opt< bool > EnableCyclicPath("misched-cyclicpath", cl::Hidden, cl::desc("Enable cyclic critical path analysis."), cl::init(true))
void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
INITIALIZE_PASS(PostMachineScheduler, "postmisched", "PostRA Machine Instruction Scheduler", false, false) PostMachineScheduler
const Function & getFunction() const
Return the LLVM function that this machine code represents.
void initializeMachineSchedulerPass(PassRegistry &)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned getOtherResourceCount(unsigned &OtherCritIdx)
void dumpPolicy() const override
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
std::pair< unsigned, unsigned > getNextResourceCycle(unsigned PIdx, unsigned Cycles)
Compute the next cycle at which the given processor resource can be scheduled.
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx...
Definition: LiveInterval.h:416
virtual void finalizeSchedule()
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
PressureChange CriticalMax
virtual void schedule()=0
Orders nodes according to selected style.
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
typename SuperClass::iterator iterator
Definition: SmallVector.h:319
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
unsigned getNumProcResourceKinds() const
Get the number of kinds of resources for this target.
void dumpNode(const SUnit &SU) const override
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
void schedNode(SUnit *SU, bool IsTopNode) override
Update the scheduler&#39;s state after scheduling a node.
virtual void scheduleTree(unsigned SubtreeID)
Scheduler callback to notify that a new subtree is scheduled.
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU&#39;s predecessors.
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:180
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
virtual SUnit * pickNode(bool &IsTopNode)=0
Pick the next node to schedule, or return NULL.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
void reschedulePhysReg(SUnit *SU, bool isTop)
A ScheduleDAG for scheduling lists of MachineInstr.
reverse_iterator rend() const
Definition: ArrayRef.h:140
Representation of each machine instruction.
Definition: MachineInstr.h:63
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:564
void initializePostMachineSchedulerPass(PassRegistry &)
void dump(const TargetRegisterInfo &TRI) const
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
SUnit * pickOnlyChoice()
Call this before applying any other heuristics to the Available queue.
void findRootsAndBiasEdges(SmallVectorImpl< SUnit *> &TopRoots, SmallVectorImpl< SUnit *> &BotRoots)
void dumpRegSetPressure(ArrayRef< unsigned > SetPressure, const TargetRegisterInfo *TRI)
Status of an instruction&#39;s critical resource consumption.
~ScheduleDAGMI() override
LiveIntervals * getLIS() const
char & PostMachineSchedulerID
PostMachineScheduler - This pass schedules machine instructions postRA.
cl::opt< bool > ForceBottomUp
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use...
void dumpScheduledState() const
MachineBasicBlock::iterator bottom() const
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Capture a change in pressure for a single pressure set.
virtual const TargetFrameLowering * getFrameLowering() const
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
const SUnit * getNextClusterPred() const
static ScheduleDAGInstrs * createConveringSched(MachineSchedContext *C)
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph...
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit&#39;s ready time and the current cycle.
void schedule() override
Implement ScheduleDAGInstrs interface for scheduling a sequence of reorderable instructions.
SUnit * pickNode(bool &IsTopNode) override
Pick the best node to balance the schedule. Implements MachineSchedStrategy.
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Candidate)
Pick the best candidate from the queue.
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
static bool isNodeHidden(const SUnit *Node)
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
static MachineSchedRegistry GenericSchedRegistry("converge", "Standard converging scheduler.", createConveringSched)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
virtual bool enablePostRAScheduler() const
True if the subtarget should run a scheduler after register allocation.
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists...
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
const std::vector< PressureChange > & getRegionCriticalPSets() const
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:652
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
iterator begin()
Definition: LiveInterval.h:210
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:806
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:343
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:482
bool isTrackingPressure() const
Return true if register pressure tracking is enabled.
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:372
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries, exclusively.
Definition: LiveInterval.h:505
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
void postprocessDAG()
Apply each ScheduleDAGMutation step in order.
nonconst_iterator getNonConstIterator() const
virtual ScheduleHazardRecognizer * CreateTargetMIHazardRecognizer(const InstrItineraryData *, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
bool getMemOperandWithOffset(const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const override
Get the base register and byte offset of a load/store instr.
bool isCluster() const
Tests if this is an Order dependence that is marked as "cluster", meaning it is artificial and wants ...
Definition: ScheduleDAG.h:206
static const Function * getParent(const Value *V)
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
bool isWeak() const
Tests if this a weak dependence.
Definition: ScheduleDAG.h:194
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
DefaultDOTGraphTraits - This class provides the default implementations of all of the DOTGraphTraits ...
Machine Instruction Scheduler
IRTranslator LLVM IR MI
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:75
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_END(MachineScheduler
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
void finishBlock() override
Cleans up after scheduling in the given block.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
virtual unsigned getRegPressureSetScore(const MachineFunction &MF, unsigned PSetID) const
Return a heuristic for the machine scheduler to compare the profitability of increasing one register ...
void pickNodeFromQueue(SchedCandidate &Cand)
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
This file describes how to lower LLVM code to machine code.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
cl::opt< bool > ForceTopDown
static unsigned computeRemLatency(SchedBoundary &CurrZone)
Compute remaining latency.
ILPValue getILP(const SUnit *SU) const
Get the ILP value for a DAG node.
Definition: ScheduleDFS.h:158
static cl::opt< bool > EnableMemOpCluster("misched-cluster", cl::Hidden, cl::desc("Enable memop clustering."), cl::init(true))
bool isArtificialDep() const
Definition: ScheduleDAG.h:655
void resize(size_type N)
Definition: SmallVector.h:344