LLVM  13.0.0git
MachineScheduler.cpp
Go to the documentation of this file.
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // MachineScheduler schedules machine instructions after phi elimination. It
10 // preserves LiveIntervals so it can be invoked before register allocation.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/PriorityQueue.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
35 #include "llvm/CodeGen/Passes.h"
51 #include "llvm/Config/llvm-config.h"
52 #include "llvm/InitializePasses.h"
53 #include "llvm/MC/LaneBitmask.h"
54 #include "llvm/Pass.h"
56 #include "llvm/Support/Compiler.h"
57 #include "llvm/Support/Debug.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <iterator>
66 #include <limits>
67 #include <memory>
68 #include <string>
69 #include <tuple>
70 #include <utility>
71 #include <vector>
72 
73 using namespace llvm;
74 
75 #define DEBUG_TYPE "machine-scheduler"
76 
77 STATISTIC(NumClustered, "Number of load/store pairs clustered");
78 
79 namespace llvm {
80 
81 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
82  cl::desc("Force top-down list scheduling"));
83 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
84  cl::desc("Force bottom-up list scheduling"));
86 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
87  cl::desc("Print critical path length to stdout"));
88 
90  "verify-misched", cl::Hidden,
91  cl::desc("Verify machine instrs before and after machine scheduling"));
92 
93 } // end namespace llvm
94 
95 #ifndef NDEBUG
96 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
97  cl::desc("Pop up a window to show MISched dags after they are processed"));
98 
99 /// In some situations a few uninteresting nodes depend on nearly all other
100 /// nodes in the graph, provide a cutoff to hide them.
101 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
102  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
103 
104 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
105  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
106 
107 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
108  cl::desc("Only schedule this function"));
109 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
110  cl::desc("Only schedule this MBB#"));
111 static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
112  cl::desc("Print schedule DAGs"));
113 #else
114 static const bool ViewMISchedDAGs = false;
115 static const bool PrintDAGs = false;
116 #endif // NDEBUG
117 
118 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
119 /// size of the ready lists.
120 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
121  cl::desc("Limit ready list to N instructions"), cl::init(256));
122 
123 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
124  cl::desc("Enable register pressure scheduling."), cl::init(true));
125 
126 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
127  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
128 
129 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
130  cl::desc("Enable memop clustering."),
131  cl::init(true));
132 static cl::opt<bool>
133  ForceFastCluster("force-fast-cluster", cl::Hidden,
134  cl::desc("Switch to fast cluster algorithm with the lost "
135  "of some fusion opportunities"),
136  cl::init(false));
137 static cl::opt<unsigned>
138  FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
139  cl::desc("The threshold for fast cluster"),
140  cl::init(1000));
141 
142 // DAG subtrees must have at least this many nodes.
143 static const unsigned MinSubtreeSize = 8;
144 
145 // Pin the vtables to this file.
146 void MachineSchedStrategy::anchor() {}
147 
148 void ScheduleDAGMutation::anchor() {}
149 
150 //===----------------------------------------------------------------------===//
151 // Machine Instruction Scheduling Pass and Registry
152 //===----------------------------------------------------------------------===//
153 
156 }
157 
159  delete RegClassInfo;
160 }
161 
162 namespace {
163 
164 /// Base class for a machine scheduler class that can run at any point.
165 class MachineSchedulerBase : public MachineSchedContext,
166  public MachineFunctionPass {
167 public:
168  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
169 
170  void print(raw_ostream &O, const Module* = nullptr) const override;
171 
172 protected:
173  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
174 };
175 
176 /// MachineScheduler runs after coalescing and before register allocation.
177 class MachineScheduler : public MachineSchedulerBase {
178 public:
179  MachineScheduler();
180 
181  void getAnalysisUsage(AnalysisUsage &AU) const override;
182 
183  bool runOnMachineFunction(MachineFunction&) override;
184 
185  static char ID; // Class identification, replacement for typeinfo
186 
187 protected:
188  ScheduleDAGInstrs *createMachineScheduler();
189 };
190 
191 /// PostMachineScheduler runs after shortly before code emission.
192 class PostMachineScheduler : public MachineSchedulerBase {
193 public:
194  PostMachineScheduler();
195 
196  void getAnalysisUsage(AnalysisUsage &AU) const override;
197 
198  bool runOnMachineFunction(MachineFunction&) override;
199 
200  static char ID; // Class identification, replacement for typeinfo
201 
202 protected:
203  ScheduleDAGInstrs *createPostMachineScheduler();
204 };
205 
206 } // end anonymous namespace
207 
208 char MachineScheduler::ID = 0;
209 
211 
212 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
213  "Machine Instruction Scheduler", false, false)
219 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
221 
222 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
224 }
225 
226 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
227  AU.setPreservesCFG();
232  AU.addRequired<SlotIndexes>();
237 }
238 
239 char PostMachineScheduler::ID = 0;
240 
242 
243 INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
244  "PostRA Machine Instruction Scheduler", false, false)
248 INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
250 
251 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
253 }
254 
255 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
256  AU.setPreservesCFG();
262 }
263 
266 
267 /// A dummy default scheduler factory indicates whether the scheduler
268 /// is overridden on the command line.
270  return nullptr;
271 }
272 
273 /// MachineSchedOpt allows command line selection of the scheduler.
276 MachineSchedOpt("misched",
278  cl::desc("Machine instruction scheduler to use"));
279 
281 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
283 
285  "enable-misched",
286  cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
287  cl::Hidden);
288 
290  "enable-post-misched",
291  cl::desc("Enable the post-ra machine instruction scheduling pass."),
292  cl::init(true), cl::Hidden);
293 
294 /// Decrement this iterator until reaching the top or a non-debug instr.
298  assert(I != Beg && "reached the top of the region, cannot decrement");
299  while (--I != Beg) {
300  if (!I->isDebugOrPseudoInstr())
301  break;
302  }
303  return I;
304 }
305 
306 /// Non-const version.
312 }
313 
314 /// If this iterator is a debug value, increment until reaching the End or a
315 /// non-debug instruction.
319  for(; I != End; ++I) {
320  if (!I->isDebugOrPseudoInstr())
321  break;
322  }
323  return I;
324 }
325 
326 /// Non-const version.
332 }
333 
334 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
335 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
336  // Select the scheduler, or set the default.
338  if (Ctor != useDefaultMachineSched)
339  return Ctor(this);
340 
341  // Get the default scheduler set by the target for this function.
342  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
343  if (Scheduler)
344  return Scheduler;
345 
346  // Default to GenericScheduler.
347  return createGenericSchedLive(this);
348 }
349 
350 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
351 /// the caller. We don't have a command line option to override the postRA
352 /// scheduler. The Target must configure it.
353 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
354  // Get the postRA scheduler set by the target for this function.
355  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
356  if (Scheduler)
357  return Scheduler;
358 
359  // Default to GenericScheduler.
360  return createGenericSchedPostRA(this);
361 }
362 
363 /// Top-level MachineScheduler pass driver.
364 ///
365 /// Visit blocks in function order. Divide each block into scheduling regions
366 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
367 /// consistent with the DAG builder, which traverses the interior of the
368 /// scheduling regions bottom-up.
369 ///
370 /// This design avoids exposing scheduling boundaries to the DAG builder,
371 /// simplifying the DAG builder's support for "special" target instructions.
372 /// At the same time the design allows target schedulers to operate across
373 /// scheduling boundaries, for example to bundle the boundary instructions
374 /// without reordering them. This creates complexity, because the target
375 /// scheduler must update the RegionBegin and RegionEnd positions cached by
376 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
377 /// design would be to split blocks at scheduling boundaries, but LLVM has a
378 /// general bias against block splitting purely for implementation simplicity.
379 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
380  if (skipFunction(mf.getFunction()))
381  return false;
382 
384  if (!EnableMachineSched)
385  return false;
386  } else if (!mf.getSubtarget().enableMachineScheduler())
387  return false;
388 
389  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
390 
391  // Initialize the context of the pass.
392  MF = &mf;
393  MLI = &getAnalysis<MachineLoopInfo>();
394  MDT = &getAnalysis<MachineDominatorTree>();
395  PassConfig = &getAnalysis<TargetPassConfig>();
396  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
397 
398  LIS = &getAnalysis<LiveIntervals>();
399 
400  if (VerifyScheduling) {
401  LLVM_DEBUG(LIS->dump());
402  MF->verify(this, "Before machine scheduling.");
403  }
404  RegClassInfo->runOnMachineFunction(*MF);
405 
406  // Instantiate the selected scheduler for this target, function, and
407  // optimization level.
408  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
409  scheduleRegions(*Scheduler, false);
410 
411  LLVM_DEBUG(LIS->dump());
412  if (VerifyScheduling)
413  MF->verify(this, "After machine scheduling.");
414  return true;
415 }
416 
417 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
418  if (skipFunction(mf.getFunction()))
419  return false;
420 
423  return false;
424  } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
425  LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
426  return false;
427  }
428  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
429 
430  // Initialize the context of the pass.
431  MF = &mf;
432  MLI = &getAnalysis<MachineLoopInfo>();
433  PassConfig = &getAnalysis<TargetPassConfig>();
434  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
435 
436  if (VerifyScheduling)
437  MF->verify(this, "Before post machine scheduling.");
438 
439  // Instantiate the selected scheduler for this target, function, and
440  // optimization level.
441  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
442  scheduleRegions(*Scheduler, true);
443 
444  if (VerifyScheduling)
445  MF->verify(this, "After post machine scheduling.");
446  return true;
447 }
448 
449 /// Return true of the given instruction should not be included in a scheduling
450 /// region.
451 ///
452 /// MachineScheduler does not currently support scheduling across calls. To
453 /// handle calls, the DAG builder needs to be modified to create register
454 /// anti/output dependencies on the registers clobbered by the call's regmask
455 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
456 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
457 /// the boundary, but there would be no benefit to postRA scheduling across
458 /// calls this late anyway.
461  MachineFunction *MF,
462  const TargetInstrInfo *TII) {
463  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
464 }
465 
466 /// A region of an MBB for scheduling.
467 namespace {
468 struct SchedRegion {
469  /// RegionBegin is the first instruction in the scheduling region, and
470  /// RegionEnd is either MBB->end() or the scheduling boundary after the
471  /// last instruction in the scheduling region. These iterators cannot refer
472  /// to instructions outside of the identified scheduling region because
473  /// those may be reordered before scheduling this region.
474  MachineBasicBlock::iterator RegionBegin;
475  MachineBasicBlock::iterator RegionEnd;
476  unsigned NumRegionInstrs;
477 
479  unsigned N) :
480  RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
481 };
482 } // end anonymous namespace
483 
485 
486 static void
488  MBBRegionsVector &Regions,
489  bool RegionsTopDown) {
490  MachineFunction *MF = MBB->getParent();
491  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
492 
493  MachineBasicBlock::iterator I = nullptr;
494  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
495  RegionEnd != MBB->begin(); RegionEnd = I) {
496 
497  // Avoid decrementing RegionEnd for blocks with no terminator.
498  if (RegionEnd != MBB->end() ||
499  isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
500  --RegionEnd;
501  }
502 
503  // The next region starts above the previous region. Look backward in the
504  // instruction stream until we find the nearest boundary.
505  unsigned NumRegionInstrs = 0;
506  I = RegionEnd;
507  for (;I != MBB->begin(); --I) {
508  MachineInstr &MI = *std::prev(I);
509  if (isSchedBoundary(&MI, &*MBB, MF, TII))
510  break;
511  if (!MI.isDebugOrPseudoInstr()) {
512  // MBB::size() uses instr_iterator to count. Here we need a bundle to
513  // count as a single instruction.
514  ++NumRegionInstrs;
515  }
516  }
517 
518  // It's possible we found a scheduling region that only has debug
519  // instructions. Don't bother scheduling these.
520  if (NumRegionInstrs != 0)
521  Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
522  }
523 
524  if (RegionsTopDown)
525  std::reverse(Regions.begin(), Regions.end());
526 }
527 
528 /// Main driver for both MachineScheduler and PostMachineScheduler.
529 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
530  bool FixKillFlags) {
531  // Visit all machine basic blocks.
532  //
533  // TODO: Visit blocks in global postorder or postorder within the bottom-up
534  // loop tree. Then we can optionally compute global RegPressure.
535  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
536  MBB != MBBEnd; ++MBB) {
537 
538  Scheduler.startBlock(&*MBB);
539 
540 #ifndef NDEBUG
541  if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
542  continue;
543  if (SchedOnlyBlock.getNumOccurrences()
544  && (int)SchedOnlyBlock != MBB->getNumber())
545  continue;
546 #endif
547 
548  // Break the block into scheduling regions [I, RegionEnd). RegionEnd
549  // points to the scheduling boundary at the bottom of the region. The DAG
550  // does not include RegionEnd, but the region does (i.e. the next
551  // RegionEnd is above the previous RegionBegin). If the current block has
552  // no terminator then RegionEnd == MBB->end() for the bottom region.
553  //
554  // All the regions of MBB are first found and stored in MBBRegions, which
555  // will be processed (MBB) top-down if initialized with true.
556  //
557  // The Scheduler may insert instructions during either schedule() or
558  // exitRegion(), even for empty regions. So the local iterators 'I' and
559  // 'RegionEnd' are invalid across these calls. Instructions must not be
560  // added to other regions than the current one without updating MBBRegions.
561 
562  MBBRegionsVector MBBRegions;
563  getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
564  for (MBBRegionsVector::iterator R = MBBRegions.begin();
565  R != MBBRegions.end(); ++R) {
566  MachineBasicBlock::iterator I = R->RegionBegin;
567  MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
568  unsigned NumRegionInstrs = R->NumRegionInstrs;
569 
570  // Notify the scheduler of the region, even if we may skip scheduling
571  // it. Perhaps it still needs to be bundled.
572  Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
573 
574  // Skip empty scheduling regions (0 or 1 schedulable instructions).
575  if (I == RegionEnd || I == std::prev(RegionEnd)) {
576  // Close the current region. Bundle the terminator if needed.
577  // This invalidates 'RegionEnd' and 'I'.
578  Scheduler.exitRegion();
579  continue;
580  }
581  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
582  LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
583  << " " << MBB->getName() << "\n From: " << *I
584  << " To: ";
585  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
586  else dbgs() << "End";
587  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
589  errs() << MF->getName();
590  errs() << ":%bb. " << MBB->getNumber();
591  errs() << " " << MBB->getName() << " \n";
592  }
593 
594  // Schedule a region: possibly reorder instructions.
595  // This invalidates the original region iterators.
596  Scheduler.schedule();
597 
598  // Close the current region.
599  Scheduler.exitRegion();
600  }
601  Scheduler.finishBlock();
602  // FIXME: Ideally, no further passes should rely on kill flags. However,
603  // thumb2 size reduction is currently an exception, so the PostMIScheduler
604  // needs to do this.
605  if (FixKillFlags)
606  Scheduler.fixupKills(*MBB);
607  }
608  Scheduler.finalizeSchedule();
609 }
610 
611 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
612  // unimplemented
613 }
614 
615 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
617  dbgs() << "Queue " << Name << ": ";
618  for (const SUnit *SU : Queue)
619  dbgs() << SU->NodeNum << " ";
620  dbgs() << "\n";
621 }
622 #endif
623 
624 //===----------------------------------------------------------------------===//
625 // ScheduleDAGMI - Basic machine instruction scheduling. This is
626 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
627 // virtual registers.
628 // ===----------------------------------------------------------------------===/
629 
630 // Provide a vtable anchor.
632 
633 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
634 /// NumPredsLeft reaches zero, release the successor node.
635 ///
636 /// FIXME: Adjust SuccSU height based on MinLatency.
637 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
638  SUnit *SuccSU = SuccEdge->getSUnit();
639 
640  if (SuccEdge->isWeak()) {
641  --SuccSU->WeakPredsLeft;
642  if (SuccEdge->isCluster())
643  NextClusterSucc = SuccSU;
644  return;
645  }
646 #ifndef NDEBUG
647  if (SuccSU->NumPredsLeft == 0) {
648  dbgs() << "*** Scheduling failed! ***\n";
649  dumpNode(*SuccSU);
650  dbgs() << " has been released too many times!\n";
651  llvm_unreachable(nullptr);
652  }
653 #endif
654  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
655  // CurrCycle may have advanced since then.
656  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
657  SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
658 
659  --SuccSU->NumPredsLeft;
660  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
661  SchedImpl->releaseTopNode(SuccSU);
662 }
663 
664 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
666  for (SDep &Succ : SU->Succs)
667  releaseSucc(SU, &Succ);
668 }
669 
670 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
671 /// NumSuccsLeft reaches zero, release the predecessor node.
672 ///
673 /// FIXME: Adjust PredSU height based on MinLatency.
674 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
675  SUnit *PredSU = PredEdge->getSUnit();
676 
677  if (PredEdge->isWeak()) {
678  --PredSU->WeakSuccsLeft;
679  if (PredEdge->isCluster())
680  NextClusterPred = PredSU;
681  return;
682  }
683 #ifndef NDEBUG
684  if (PredSU->NumSuccsLeft == 0) {
685  dbgs() << "*** Scheduling failed! ***\n";
686  dumpNode(*PredSU);
687  dbgs() << " has been released too many times!\n";
688  llvm_unreachable(nullptr);
689  }
690 #endif
691  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
692  // CurrCycle may have advanced since then.
693  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
694  PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
695 
696  --PredSU->NumSuccsLeft;
697  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
698  SchedImpl->releaseBottomNode(PredSU);
699 }
700 
701 /// releasePredecessors - Call releasePred on each of SU's predecessors.
703  for (SDep &Pred : SU->Preds)
704  releasePred(SU, &Pred);
705 }
706 
709  SchedImpl->enterMBB(bb);
710 }
711 
713  SchedImpl->leaveMBB();
715 }
716 
717 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
718 /// crossing a scheduling boundary. [begin, end) includes all instructions in
719 /// the region, including the boundary itself and single-instruction regions
720 /// that don't get scheduled.
724  unsigned regioninstrs)
725 {
726  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
727 
728  SchedImpl->initPolicy(begin, end, regioninstrs);
729 }
730 
731 /// This is normally called from the main scheduler loop but may also be invoked
732 /// by the scheduling strategy to perform additional code motion.
735  // Advance RegionBegin if the first instruction moves down.
736  if (&*RegionBegin == MI)
737  ++RegionBegin;
738 
739  // Update the instruction stream.
740  BB->splice(InsertPos, BB, MI);
741 
742  // Update LiveIntervals
743  if (LIS)
744  LIS->handleMove(*MI, /*UpdateFlags=*/true);
745 
746  // Recede RegionBegin if an instruction moves above the first.
747  if (RegionBegin == InsertPos)
748  RegionBegin = MI;
749 }
750 
752 #ifndef NDEBUG
755  return false;
756  }
758 #endif
759  return true;
760 }
761 
762 /// Per-region scheduling driver, called back from
763 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
764 /// does not consider liveness or register pressure. It is useful for PostRA
765 /// scheduling and potentially other custom schedulers.
767  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
768  LLVM_DEBUG(SchedImpl->dumpPolicy());
769 
770  // Build the DAG.
772 
773  postprocessDAG();
774 
775  SmallVector<SUnit*, 8> TopRoots, BotRoots;
776  findRootsAndBiasEdges(TopRoots, BotRoots);
777 
778  LLVM_DEBUG(dump());
779  if (PrintDAGs) dump();
780  if (ViewMISchedDAGs) viewGraph();
781 
782  // Initialize the strategy before modifying the DAG.
783  // This may initialize a DFSResult to be used for queue priority.
784  SchedImpl->initialize(this);
785 
786  // Initialize ready queues now that the DAG and priority data are finalized.
787  initQueues(TopRoots, BotRoots);
788 
789  bool IsTopNode = false;
790  while (true) {
791  LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
792  SUnit *SU = SchedImpl->pickNode(IsTopNode);
793  if (!SU) break;
794 
795  assert(!SU->isScheduled && "Node already scheduled");
796  if (!checkSchedLimit())
797  break;
798 
799  MachineInstr *MI = SU->getInstr();
800  if (IsTopNode) {
801  assert(SU->isTopReady() && "node still has unscheduled dependencies");
802  if (&*CurrentTop == MI)
804  else
806  } else {
807  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
810  if (&*priorII == MI)
811  CurrentBottom = priorII;
812  else {
813  if (&*CurrentTop == MI)
814  CurrentTop = nextIfDebug(++CurrentTop, priorII);
816  CurrentBottom = MI;
817  }
818  }
819  // Notify the scheduling strategy before updating the DAG.
820  // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
821  // runs, it can then use the accurate ReadyCycle time to determine whether
822  // newly released nodes can move to the readyQ.
823  SchedImpl->schedNode(SU, IsTopNode);
824 
825  updateQueues(SU, IsTopNode);
826  }
827  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
828 
830 
831  LLVM_DEBUG({
832  dbgs() << "*** Final schedule for "
833  << printMBBReference(*begin()->getParent()) << " ***\n";
834  dumpSchedule();
835  dbgs() << '\n';
836  });
837 }
838 
839 /// Apply each ScheduleDAGMutation step in order.
841  for (auto &m : Mutations)
842  m->apply(this);
843 }
844 
845 void ScheduleDAGMI::
847  SmallVectorImpl<SUnit*> &BotRoots) {
848  for (SUnit &SU : SUnits) {
849  assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
850 
851  // Order predecessors so DFSResult follows the critical path.
852  SU.biasCriticalPath();
853 
854  // A SUnit is ready to top schedule if it has no predecessors.
855  if (!SU.NumPredsLeft)
856  TopRoots.push_back(&SU);
857  // A SUnit is ready to bottom schedule if it has no successors.
858  if (!SU.NumSuccsLeft)
859  BotRoots.push_back(&SU);
860  }
862 }
863 
864 /// Identify DAG roots and setup scheduler queues.
866  ArrayRef<SUnit*> BotRoots) {
867  NextClusterSucc = nullptr;
868  NextClusterPred = nullptr;
869 
870  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
871  //
872  // Nodes with unreleased weak edges can still be roots.
873  // Release top roots in forward order.
874  for (SUnit *SU : TopRoots)
875  SchedImpl->releaseTopNode(SU);
876 
877  // Release bottom roots in reverse order so the higher priority nodes appear
878  // first. This is more natural and slightly more efficient.
880  I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
881  SchedImpl->releaseBottomNode(*I);
882  }
883 
886 
887  SchedImpl->registerRoots();
888 
889  // Advance past initial DebugValues.
892 }
893 
894 /// Update scheduler queues after scheduling an instruction.
895 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
896  // Release dependent instructions for scheduling.
897  if (IsTopNode)
898  releaseSuccessors(SU);
899  else
901 
902  SU->isScheduled = true;
903 }
904 
905 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
907  // If first instruction was a DBG_VALUE then put it back.
908  if (FirstDbgValue) {
911  }
912 
913  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
914  DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
915  std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
916  MachineInstr *DbgValue = P.first;
917  MachineBasicBlock::iterator OrigPrevMI = P.second;
918  if (&*RegionBegin == DbgValue)
919  ++RegionBegin;
920  BB->splice(++OrigPrevMI, BB, DbgValue);
921  if (OrigPrevMI == std::prev(RegionEnd))
922  RegionEnd = DbgValue;
923  }
924  DbgValues.clear();
925  FirstDbgValue = nullptr;
926 }
927 
928 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
930  for (MachineInstr &MI : *this) {
931  if (SUnit *SU = getSUnit(&MI))
932  dumpNode(*SU);
933  else
934  dbgs() << "Missing SUnit\n";
935  }
936 }
937 #endif
938 
939 //===----------------------------------------------------------------------===//
940 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
941 // preservation.
942 //===----------------------------------------------------------------------===//
943 
945  delete DFSResult;
946 }
947 
949  const MachineInstr &MI = *SU.getInstr();
950  for (const MachineOperand &MO : MI.operands()) {
951  if (!MO.isReg())
952  continue;
953  if (!MO.readsReg())
954  continue;
955  if (TrackLaneMasks && !MO.isUse())
956  continue;
957 
958  Register Reg = MO.getReg();
960  continue;
961 
962  // Ignore re-defs.
963  if (TrackLaneMasks) {
964  bool FoundDef = false;
965  for (const MachineOperand &MO2 : MI.operands()) {
966  if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
967  FoundDef = true;
968  break;
969  }
970  }
971  if (FoundDef)
972  continue;
973  }
974 
975  // Record this local VReg use.
977  for (; UI != VRegUses.end(); ++UI) {
978  if (UI->SU == &SU)
979  break;
980  }
981  if (UI == VRegUses.end())
983  }
984 }
985 
986 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
987 /// crossing a scheduling boundary. [begin, end) includes all instructions in
988 /// the region, including the boundary itself and single-instruction regions
989 /// that don't get scheduled.
993  unsigned regioninstrs)
994 {
995  // ScheduleDAGMI initializes SchedImpl's per-region policy.
996  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
997 
998  // For convenience remember the end of the liveness region.
999  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
1000 
1002 
1003  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1004  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1005 
1007  "ShouldTrackLaneMasks requires ShouldTrackPressure");
1008 }
1009 
1010 // Setup the register pressure trackers for the top scheduled and bottom
1011 // scheduled regions.
1013  VRegUses.clear();
1015  for (SUnit &SU : SUnits)
1016  collectVRegUses(SU);
1017 
1019  ShouldTrackLaneMasks, false);
1021  ShouldTrackLaneMasks, false);
1022 
1023  // Close the RPTracker to finalize live ins.
1025 
1027 
1028  // Initialize the live ins and live outs.
1031 
1032  // Close one end of the tracker so we can call
1033  // getMaxUpward/DownwardPressureDelta before advancing across any
1034  // instructions. This converts currently live regs into live ins/outs.
1037 
1039  if (!BotRPTracker.getLiveThru().empty()) {
1041  LLVM_DEBUG(dbgs() << "Live Thru: ";
1043  };
1044 
1045  // For each live out vreg reduce the pressure change associated with other
1046  // uses of the same vreg below the live-out reaching def.
1048 
1049  // Account for liveness generated by the region boundary.
1050  if (LiveRegionEnd != RegionEnd) {
1052  BotRPTracker.recede(&LiveUses);
1053  updatePressureDiffs(LiveUses);
1054  }
1055 
1056  LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1058  dbgs() << "Bottom Pressure:\n";
1060 
1062  (RegionEnd->isDebugInstr() &&
1064  "Can't find the region bottom");
1065 
1066  // Cache the list of excess pressure sets in this region. This will also track
1067  // the max pressure in the scheduled code for these sets.
1068  RegionCriticalPSets.clear();
1069  const std::vector<unsigned> &RegionPressure =
1071  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1072  unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1073  if (RegionPressure[i] > Limit) {
1074  LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1075  << " Actual " << RegionPressure[i] << "\n");
1076  RegionCriticalPSets.push_back(PressureChange(i));
1077  }
1078  }
1079  LLVM_DEBUG(dbgs() << "Excess PSets: ";
1080  for (const PressureChange &RCPS
1082  << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1083  dbgs() << "\n");
1084 }
1085 
1088  const std::vector<unsigned> &NewMaxPressure) {
1089  const PressureDiff &PDiff = getPressureDiff(SU);
1090  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1091  for (const PressureChange &PC : PDiff) {
1092  if (!PC.isValid())
1093  break;
1094  unsigned ID = PC.getPSet();
1095  while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1096  ++CritIdx;
1097  if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1098  if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1099  && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1100  RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1101  }
1102  unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1103  if (NewMaxPressure[ID] >= Limit - 2) {
1104  LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1105  << NewMaxPressure[ID]
1106  << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1107  << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1108  << " livethru)\n");
1109  }
1110  }
1111 }
1112 
1113 /// Update the PressureDiff array for liveness after scheduling this
1114 /// instruction.
1116  ArrayRef<RegisterMaskPair> LiveUses) {
1117  for (const RegisterMaskPair &P : LiveUses) {
1118  Register Reg = P.RegUnit;
1119  /// FIXME: Currently assuming single-use physregs.
1121  continue;
1122 
1123  if (ShouldTrackLaneMasks) {
1124  // If the register has just become live then other uses won't change
1125  // this fact anymore => decrement pressure.
1126  // If the register has just become dead then other uses make it come
1127  // back to life => increment pressure.
1128  bool Decrement = P.LaneMask.any();
1129 
1130  for (const VReg2SUnit &V2SU
1132  SUnit &SU = *V2SU.SU;
1133  if (SU.isScheduled || &SU == &ExitSU)
1134  continue;
1135 
1136  PressureDiff &PDiff = getPressureDiff(&SU);
1137  PDiff.addPressureChange(Reg, Decrement, &MRI);
1138  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1139  << printReg(Reg, TRI) << ':'
1140  << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1141  dbgs() << " to "; PDiff.dump(*TRI););
1142  }
1143  } else {
1144  assert(P.LaneMask.any());
1145  LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1146  // This may be called before CurrentBottom has been initialized. However,
1147  // BotRPTracker must have a valid position. We want the value live into the
1148  // instruction or live out of the block, so ask for the previous
1149  // instruction's live-out.
1150  const LiveInterval &LI = LIS->getInterval(Reg);
1151  VNInfo *VNI;
1154  if (I == BB->end())
1155  VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1156  else {
1158  VNI = LRQ.valueIn();
1159  }
1160  // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1161  assert(VNI && "No live value at use.");
1162  for (const VReg2SUnit &V2SU
1164  SUnit *SU = V2SU.SU;
1165  // If this use comes before the reaching def, it cannot be a last use,
1166  // so decrease its pressure change.
1167  if (!SU->isScheduled && SU != &ExitSU) {
1168  LiveQueryResult LRQ =
1169  LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1170  if (LRQ.valueIn() == VNI) {
1171  PressureDiff &PDiff = getPressureDiff(SU);
1172  PDiff.addPressureChange(Reg, true, &MRI);
1173  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1174  << *SU->getInstr();
1175  dbgs() << " to "; PDiff.dump(*TRI););
1176  }
1177  }
1178  }
1179  }
1180  }
1181 }
1182 
1184 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1185  if (EntrySU.getInstr() != nullptr)
1187  for (const SUnit &SU : SUnits) {
1188  dumpNodeAll(SU);
1189  if (ShouldTrackPressure) {
1190  dbgs() << " Pressure Diff : ";
1191  getPressureDiff(&SU).dump(*TRI);
1192  }
1193  dbgs() << " Single Issue : ";
1194  if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1195  SchedModel.mustEndGroup(SU.getInstr()))
1196  dbgs() << "true;";
1197  else
1198  dbgs() << "false;";
1199  dbgs() << '\n';
1200  }
1201  if (ExitSU.getInstr() != nullptr)
1203 #endif
1204 }
1205 
1206 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1207 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1208 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1209 ///
1210 /// This is a skeletal driver, with all the functionality pushed into helpers,
1211 /// so that it can be easily extended by experimental schedulers. Generally,
1212 /// implementing MachineSchedStrategy should be sufficient to implement a new
1213 /// scheduling algorithm. However, if a scheduler further subclasses
1214 /// ScheduleDAGMILive then it will want to override this virtual method in order
1215 /// to update any specialized state.
1217  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1218  LLVM_DEBUG(SchedImpl->dumpPolicy());
1220 
1221  postprocessDAG();
1222 
1223  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1224  findRootsAndBiasEdges(TopRoots, BotRoots);
1225 
1226  // Initialize the strategy before modifying the DAG.
1227  // This may initialize a DFSResult to be used for queue priority.
1228  SchedImpl->initialize(this);
1229 
1230  LLVM_DEBUG(dump());
1231  if (PrintDAGs) dump();
1232  if (ViewMISchedDAGs) viewGraph();
1233 
1234  // Initialize ready queues now that the DAG and priority data are finalized.
1235  initQueues(TopRoots, BotRoots);
1236 
1237  bool IsTopNode = false;
1238  while (true) {
1239  LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1240  SUnit *SU = SchedImpl->pickNode(IsTopNode);
1241  if (!SU) break;
1242 
1243  assert(!SU->isScheduled && "Node already scheduled");
1244  if (!checkSchedLimit())
1245  break;
1246 
1247  scheduleMI(SU, IsTopNode);
1248 
1249  if (DFSResult) {
1250  unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1251  if (!ScheduledTrees.test(SubtreeID)) {
1252  ScheduledTrees.set(SubtreeID);
1253  DFSResult->scheduleTree(SubtreeID);
1254  SchedImpl->scheduleTree(SubtreeID);
1255  }
1256  }
1257 
1258  // Notify the scheduling strategy after updating the DAG.
1259  SchedImpl->schedNode(SU, IsTopNode);
1260 
1261  updateQueues(SU, IsTopNode);
1262  }
1263  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1264 
1265  placeDebugValues();
1266 
1267  LLVM_DEBUG({
1268  dbgs() << "*** Final schedule for "
1269  << printMBBReference(*begin()->getParent()) << " ***\n";
1270  dumpSchedule();
1271  dbgs() << '\n';
1272  });
1273 }
1274 
1275 /// Build the DAG and setup three register pressure trackers.
1277  if (!ShouldTrackPressure) {
1278  RPTracker.reset();
1279  RegionCriticalPSets.clear();
1281  return;
1282  }
1283 
1284  // Initialize the register pressure tracker used by buildSchedGraph.
1286  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1287 
1288  // Account for liveness generate by the region boundary.
1289  if (LiveRegionEnd != RegionEnd)
1290  RPTracker.recede();
1291 
1292  // Build the DAG, and compute current register pressure.
1294 
1295  // Initialize top/bottom trackers after computing region pressure.
1296  initRegPressure();
1297 }
1298 
1300  if (!DFSResult)
1301  DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1302  DFSResult->clear();
1304  DFSResult->resize(SUnits.size());
1307 }
1308 
1309 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1310 /// only provides the critical path for single block loops. To handle loops that
1311 /// span blocks, we could use the vreg path latencies provided by
1312 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1313 /// available for use in the scheduler.
1314 ///
1315 /// The cyclic path estimation identifies a def-use pair that crosses the back
1316 /// edge and considers the depth and height of the nodes. For example, consider
1317 /// the following instruction sequence where each instruction has unit latency
1318 /// and defines an eponymous virtual register:
1319 ///
1320 /// a->b(a,c)->c(b)->d(c)->exit
1321 ///
1322 /// The cyclic critical path is a two cycles: b->c->b
1323 /// The acyclic critical path is four cycles: a->b->c->d->exit
1324 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1325 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1326 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1327 /// LiveInDepth = depth(b) = len(a->b) = 1
1328 ///
1329 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1330 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1331 /// CyclicCriticalPath = min(2, 2) = 2
1332 ///
1333 /// This could be relevant to PostRA scheduling, but is currently implemented
1334 /// assuming LiveIntervals.
1336  // This only applies to single block loop.
1337  if (!BB->isSuccessor(BB))
1338  return 0;
1339 
1340  unsigned MaxCyclicLatency = 0;
1341  // Visit each live out vreg def to find def/use pairs that cross iterations.
1343  Register Reg = P.RegUnit;
1345  continue;
1346  const LiveInterval &LI = LIS->getInterval(Reg);
1347  const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1348  if (!DefVNI)
1349  continue;
1350 
1352  const SUnit *DefSU = getSUnit(DefMI);
1353  if (!DefSU)
1354  continue;
1355 
1356  unsigned LiveOutHeight = DefSU->getHeight();
1357  unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1358  // Visit all local users of the vreg def.
1359  for (const VReg2SUnit &V2SU
1361  SUnit *SU = V2SU.SU;
1362  if (SU == &ExitSU)
1363  continue;
1364 
1365  // Only consider uses of the phi.
1367  if (!LRQ.valueIn()->isPHIDef())
1368  continue;
1369 
1370  // Assume that a path spanning two iterations is a cycle, which could
1371  // overestimate in strange cases. This allows cyclic latency to be
1372  // estimated as the minimum slack of the vreg's depth or height.
1373  unsigned CyclicLatency = 0;
1374  if (LiveOutDepth > SU->getDepth())
1375  CyclicLatency = LiveOutDepth - SU->getDepth();
1376 
1377  unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1378  if (LiveInHeight > LiveOutHeight) {
1379  if (LiveInHeight - LiveOutHeight < CyclicLatency)
1380  CyclicLatency = LiveInHeight - LiveOutHeight;
1381  } else
1382  CyclicLatency = 0;
1383 
1384  LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1385  << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1386  if (CyclicLatency > MaxCyclicLatency)
1387  MaxCyclicLatency = CyclicLatency;
1388  }
1389  }
1390  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1391  return MaxCyclicLatency;
1392 }
1393 
1394 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1395 /// the Top RP tracker in case the region beginning has changed.
1397  ArrayRef<SUnit*> BotRoots) {
1398  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1399  if (ShouldTrackPressure) {
1400  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1402  }
1403 }
1404 
1405 /// Move an instruction and update register pressure.
1406 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1407  // Move the instruction to its new location in the instruction stream.
1408  MachineInstr *MI = SU->getInstr();
1409 
1410  if (IsTopNode) {
1411  assert(SU->isTopReady() && "node still has unscheduled dependencies");
1412  if (&*CurrentTop == MI)
1414  else {
1417  }
1418 
1419  if (ShouldTrackPressure) {
1420  // Update top scheduled pressure.
1421  RegisterOperands RegOpers;
1422  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1423  if (ShouldTrackLaneMasks) {
1424  // Adjust liveness and add missing dead+read-undef flags.
1425  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1426  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1427  } else {
1428  // Adjust for missing dead-def flags.
1429  RegOpers.detectDeadDefs(*MI, *LIS);
1430  }
1431 
1432  TopRPTracker.advance(RegOpers);
1433  assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1434  LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1436 
1438  }
1439  } else {
1440  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1441  MachineBasicBlock::iterator priorII =
1443  if (&*priorII == MI)
1444  CurrentBottom = priorII;
1445  else {
1446  if (&*CurrentTop == MI) {
1447  CurrentTop = nextIfDebug(++CurrentTop, priorII);
1449  }
1451  CurrentBottom = MI;
1453  }
1454  if (ShouldTrackPressure) {
1455  RegisterOperands RegOpers;
1456  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1457  if (ShouldTrackLaneMasks) {
1458  // Adjust liveness and add missing dead+read-undef flags.
1459  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1460  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1461  } else {
1462  // Adjust for missing dead-def flags.
1463  RegOpers.detectDeadDefs(*MI, *LIS);
1464  }
1465 
1469  BotRPTracker.recede(RegOpers, &LiveUses);
1470  assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1471  LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1473 
1475  updatePressureDiffs(LiveUses);
1476  }
1477  }
1478 }
1479 
1480 //===----------------------------------------------------------------------===//
1481 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1482 //===----------------------------------------------------------------------===//
1483 
1484 namespace {
1485 
1486 /// Post-process the DAG to create cluster edges between neighboring
1487 /// loads or between neighboring stores.
1488 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1489  struct MemOpInfo {
1490  SUnit *SU;
1492  int64_t Offset;
1493  unsigned Width;
1494 
1495  MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1496  int64_t Offset, unsigned Width)
1497  : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
1498  Width(Width) {}
1499 
1500  static bool Compare(const MachineOperand *const &A,
1501  const MachineOperand *const &B) {
1502  if (A->getType() != B->getType())
1503  return A->getType() < B->getType();
1504  if (A->isReg())
1505  return A->getReg() < B->getReg();
1506  if (A->isFI()) {
1507  const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1508  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1509  bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1511  return StackGrowsDown ? A->getIndex() > B->getIndex()
1512  : A->getIndex() < B->getIndex();
1513  }
1514 
1515  llvm_unreachable("MemOpClusterMutation only supports register or frame "
1516  "index bases.");
1517  }
1518 
1519  bool operator<(const MemOpInfo &RHS) const {
1520  // FIXME: Don't compare everything twice. Maybe use C++20 three way
1521  // comparison instead when it's available.
1522  if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
1523  RHS.BaseOps.begin(), RHS.BaseOps.end(),
1524  Compare))
1525  return true;
1526  if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
1527  BaseOps.begin(), BaseOps.end(), Compare))
1528  return false;
1529  if (Offset != RHS.Offset)
1530  return Offset < RHS.Offset;
1531  return SU->NodeNum < RHS.SU->NodeNum;
1532  }
1533  };
1534 
1535  const TargetInstrInfo *TII;
1536  const TargetRegisterInfo *TRI;
1537  bool IsLoad;
1538 
1539 public:
1540  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1541  const TargetRegisterInfo *tri, bool IsLoad)
1542  : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1543 
1544  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1545 
1546 protected:
1547  void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
1548  ScheduleDAGInstrs *DAG);
1549  void collectMemOpRecords(std::vector<SUnit> &SUnits,
1550  SmallVectorImpl<MemOpInfo> &MemOpRecords);
1551  bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1553 };
1554 
1555 class StoreClusterMutation : public BaseMemOpClusterMutation {
1556 public:
1557  StoreClusterMutation(const TargetInstrInfo *tii,
1558  const TargetRegisterInfo *tri)
1559  : BaseMemOpClusterMutation(tii, tri, false) {}
1560 };
1561 
1562 class LoadClusterMutation : public BaseMemOpClusterMutation {
1563 public:
1564  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1565  : BaseMemOpClusterMutation(tii, tri, true) {}
1566 };
1567 
1568 } // end anonymous namespace
1569 
1570 namespace llvm {
1571 
1572 std::unique_ptr<ScheduleDAGMutation>
1574  const TargetRegisterInfo *TRI) {
1575  return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
1576  : nullptr;
1577 }
1578 
1579 std::unique_ptr<ScheduleDAGMutation>
1581  const TargetRegisterInfo *TRI) {
1582  return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
1583  : nullptr;
1584 }
1585 
1586 } // end namespace llvm
1587 
1588 // Sorting all the loads/stores first, then for each load/store, checking the
1589 // following load/store one by one, until reach the first non-dependent one and
1590 // call target hook to see if they can cluster.
1591 // If FastCluster is enabled, we assume that, all the loads/stores have been
1592 // preprocessed and now, they didn't have dependencies on each other.
1593 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1594  ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
1595  ScheduleDAGInstrs *DAG) {
1596  // Keep track of the current cluster length and bytes for each SUnit.
1598 
1599  // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
1600  // cluster mem ops collected within `MemOpRecords` array.
1601  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1602  // Decision to cluster mem ops is taken based on target dependent logic
1603  auto MemOpa = MemOpRecords[Idx];
1604 
1605  // Seek for the next load/store to do the cluster.
1606  unsigned NextIdx = Idx + 1;
1607  for (; NextIdx < End; ++NextIdx)
1608  // Skip if MemOpb has been clustered already or has dependency with
1609  // MemOpa.
1610  if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
1611  (FastCluster ||
1612  (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
1613  !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
1614  break;
1615  if (NextIdx == End)
1616  continue;
1617 
1618  auto MemOpb = MemOpRecords[NextIdx];
1619  unsigned ClusterLength = 2;
1620  unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width;
1621  if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
1622  ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
1623  CurrentClusterBytes =
1624  SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
1625  }
1626 
1627  if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
1628  CurrentClusterBytes))
1629  continue;
1630 
1631  SUnit *SUa = MemOpa.SU;
1632  SUnit *SUb = MemOpb.SU;
1633  if (SUa->NodeNum > SUb->NodeNum)
1634  std::swap(SUa, SUb);
1635 
1636  // FIXME: Is this check really required?
1637  if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
1638  continue;
1639 
1640  LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1641  << SUb->NodeNum << ")\n");
1642  ++NumClustered;
1643 
1644  if (IsLoad) {
1645  // Copy successor edges from SUa to SUb. Interleaving computation
1646  // dependent on SUa can prevent load combining due to register reuse.
1647  // Predecessor edges do not need to be copied from SUb to SUa since
1648  // nearby loads should have effectively the same inputs.
1649  for (const SDep &Succ : SUa->Succs) {
1650  if (Succ.getSUnit() == SUb)
1651  continue;
1652  LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1653  << ")\n");
1654  DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1655  }
1656  } else {
1657  // Copy predecessor edges from SUb to SUa to avoid the SUnits that
1658  // SUb dependent on scheduled in-between SUb and SUa. Successor edges
1659  // do not need to be copied from SUa to SUb since no one will depend
1660  // on stores.
1661  // Notice that, we don't need to care about the memory dependency as
1662  // we won't try to cluster them if they have any memory dependency.
1663  for (const SDep &Pred : SUb->Preds) {
1664  if (Pred.getSUnit() == SUa)
1665  continue;
1666  LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
1667  << ")\n");
1668  DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
1669  }
1670  }
1671 
1672  SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
1673  CurrentClusterBytes};
1674 
1675  LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
1676  << ", Curr cluster bytes: " << CurrentClusterBytes
1677  << "\n");
1678  }
1679 }
1680 
1681 void BaseMemOpClusterMutation::collectMemOpRecords(
1682  std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
1683  for (auto &SU : SUnits) {
1684  if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1685  (!IsLoad && !SU.getInstr()->mayStore()))
1686  continue;
1687 
1688  const MachineInstr &MI = *SU.getInstr();
1690  int64_t Offset;
1691  bool OffsetIsScalable;
1692  unsigned Width;
1693  if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
1694  OffsetIsScalable, Width, TRI)) {
1695  MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width));
1696 
1697  LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
1698  << Offset << ", OffsetIsScalable: " << OffsetIsScalable
1699  << ", Width: " << Width << "\n");
1700  }
1701 #ifndef NDEBUG
1702  for (auto *Op : BaseOps)
1703  assert(Op);
1704 #endif
1705  }
1706 }
1707 
1708 bool BaseMemOpClusterMutation::groupMemOps(
1711  bool FastCluster =
1712  ForceFastCluster ||
1713  MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
1714 
1715  for (const auto &MemOp : MemOps) {
1716  unsigned ChainPredID = DAG->SUnits.size();
1717  if (FastCluster) {
1718  for (const SDep &Pred : MemOp.SU->Preds) {
1719  // We only want to cluster the mem ops that have the same ctrl(non-data)
1720  // pred so that they didn't have ctrl dependency for each other. But for
1721  // store instrs, we can still cluster them if the pred is load instr.
1722  if ((Pred.isCtrl() &&
1723  (IsLoad ||
1724  (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
1725  !Pred.isArtificial()) {
1726  ChainPredID = Pred.getSUnit()->NodeNum;
1727  break;
1728  }
1729  }
1730  } else
1731  ChainPredID = 0;
1732 
1733  Groups[ChainPredID].push_back(MemOp);
1734  }
1735  return FastCluster;
1736 }
1737 
1738 /// Callback from DAG postProcessing to create cluster edges for loads/stores.
1740  // Collect all the clusterable loads/stores
1741  SmallVector<MemOpInfo, 32> MemOpRecords;
1742  collectMemOpRecords(DAG->SUnits, MemOpRecords);
1743 
1744  if (MemOpRecords.size() < 2)
1745  return;
1746 
1747  // Put the loads/stores without dependency into the same group with some
1748  // heuristic if the DAG is too complex to avoid compiling time blow up.
1749  // Notice that, some fusion pair could be lost with this.
1751  bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
1752 
1753  for (auto &Group : Groups) {
1754  // Sorting the loads/stores, so that, we can stop the cluster as early as
1755  // possible.
1756  llvm::sort(Group.second);
1757 
1758  // Trying to cluster all the neighboring loads/stores.
1759  clusterNeighboringMemOps(Group.second, FastCluster, DAG);
1760  }
1761 }
1762 
1763 //===----------------------------------------------------------------------===//
1764 // CopyConstrain - DAG post-processing to encourage copy elimination.
1765 //===----------------------------------------------------------------------===//
1766 
1767 namespace {
1768 
1769 /// Post-process the DAG to create weak edges from all uses of a copy to
1770 /// the one use that defines the copy's source vreg, most likely an induction
1771 /// variable increment.
1772 class CopyConstrain : public ScheduleDAGMutation {
1773  // Transient state.
1774  SlotIndex RegionBeginIdx;
1775 
1776  // RegionEndIdx is the slot index of the last non-debug instruction in the
1777  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1778  SlotIndex RegionEndIdx;
1779 
1780 public:
1781  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1782 
1783  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1784 
1785 protected:
1786  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1787 };
1788 
1789 } // end anonymous namespace
1790 
1791 namespace llvm {
1792 
1793 std::unique_ptr<ScheduleDAGMutation>
1795  const TargetRegisterInfo *TRI) {
1796  return std::make_unique<CopyConstrain>(TII, TRI);
1797 }
1798 
1799 } // end namespace llvm
1800 
1801 /// constrainLocalCopy handles two possibilities:
1802 /// 1) Local src:
1803 /// I0: = dst
1804 /// I1: src = ...
1805 /// I2: = dst
1806 /// I3: dst = src (copy)
1807 /// (create pred->succ edges I0->I1, I2->I1)
1808 ///
1809 /// 2) Local copy:
1810 /// I0: dst = src (copy)
1811 /// I1: = dst
1812 /// I2: src = ...
1813 /// I3: = dst
1814 /// (create pred->succ edges I1->I2, I3->I2)
1815 ///
1816 /// Although the MachineScheduler is currently constrained to single blocks,
1817 /// this algorithm should handle extended blocks. An EBB is a set of
1818 /// contiguously numbered blocks such that the previous block in the EBB is
1819 /// always the single predecessor.
1820 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1821  LiveIntervals *LIS = DAG->getLIS();
1822  MachineInstr *Copy = CopySU->getInstr();
1823 
1824  // Check for pure vreg copies.
1825  const MachineOperand &SrcOp = Copy->getOperand(1);
1826  Register SrcReg = SrcOp.getReg();
1827  if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1828  return;
1829 
1830  const MachineOperand &DstOp = Copy->getOperand(0);
1831  Register DstReg = DstOp.getReg();
1832  if (!Register::isVirtualRegister(DstReg) || DstOp.isDead())
1833  return;
1834 
1835  // Check if either the dest or source is local. If it's live across a back
1836  // edge, it's not local. Note that if both vregs are live across the back
1837  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1838  // If both the copy's source and dest are local live intervals, then we
1839  // should treat the dest as the global for the purpose of adding
1840  // constraints. This adds edges from source's other uses to the copy.
1841  unsigned LocalReg = SrcReg;
1842  unsigned GlobalReg = DstReg;
1843  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1844  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1845  LocalReg = DstReg;
1846  GlobalReg = SrcReg;
1847  LocalLI = &LIS->getInterval(LocalReg);
1848  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1849  return;
1850  }
1851  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1852 
1853  // Find the global segment after the start of the local LI.
1854  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1855  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1856  // local live range. We could create edges from other global uses to the local
1857  // start, but the coalescer should have already eliminated these cases, so
1858  // don't bother dealing with it.
1859  if (GlobalSegment == GlobalLI->end())
1860  return;
1861 
1862  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1863  // returned the next global segment. But if GlobalSegment overlaps with
1864  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
1865  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1866  if (GlobalSegment->contains(LocalLI->beginIndex()))
1867  ++GlobalSegment;
1868 
1869  if (GlobalSegment == GlobalLI->end())
1870  return;
1871 
1872  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1873  if (GlobalSegment != GlobalLI->begin()) {
1874  // Two address defs have no hole.
1875  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1876  GlobalSegment->start)) {
1877  return;
1878  }
1879  // If the prior global segment may be defined by the same two-address
1880  // instruction that also defines LocalLI, then can't make a hole here.
1881  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1882  LocalLI->beginIndex())) {
1883  return;
1884  }
1885  // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1886  // it would be a disconnected component in the live range.
1887  assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1888  "Disconnected LRG within the scheduling region.");
1889  }
1890  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1891  if (!GlobalDef)
1892  return;
1893 
1894  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1895  if (!GlobalSU)
1896  return;
1897 
1898  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1899  // constraining the uses of the last local def to precede GlobalDef.
1900  SmallVector<SUnit*,8> LocalUses;
1901  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1902  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1903  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1904  for (const SDep &Succ : LastLocalSU->Succs) {
1905  if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1906  continue;
1907  if (Succ.getSUnit() == GlobalSU)
1908  continue;
1909  if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1910  return;
1911  LocalUses.push_back(Succ.getSUnit());
1912  }
1913  // Open the top of the GlobalLI hole by constraining any earlier global uses
1914  // to precede the start of LocalLI.
1915  SmallVector<SUnit*,8> GlobalUses;
1916  MachineInstr *FirstLocalDef =
1917  LIS->getInstructionFromIndex(LocalLI->beginIndex());
1918  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1919  for (const SDep &Pred : GlobalSU->Preds) {
1920  if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1921  continue;
1922  if (Pred.getSUnit() == FirstLocalSU)
1923  continue;
1924  if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1925  return;
1926  GlobalUses.push_back(Pred.getSUnit());
1927  }
1928  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1929  // Add the weak edges.
1930  for (SUnit *LU : LocalUses) {
1931  LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
1932  << GlobalSU->NodeNum << ")\n");
1933  DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
1934  }
1935  for (SUnit *GU : GlobalUses) {
1936  LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
1937  << FirstLocalSU->NodeNum << ")\n");
1938  DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
1939  }
1940 }
1941 
1942 /// Callback from DAG postProcessing to create weak edges to encourage
1943 /// copy elimination.
1944 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1945  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1946  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1947 
1948  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1949  if (FirstPos == DAG->end())
1950  return;
1951  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1952  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1953  *priorNonDebug(DAG->end(), DAG->begin()));
1954 
1955  for (SUnit &SU : DAG->SUnits) {
1956  if (!SU.getInstr()->isCopy())
1957  continue;
1958 
1959  constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1960  }
1961 }
1962 
1963 //===----------------------------------------------------------------------===//
1964 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1965 // and possibly other custom schedulers.
1966 //===----------------------------------------------------------------------===//
1967 
1968 static const unsigned InvalidCycle = ~0U;
1969 
1971 
1972 /// Given a Count of resource usage and a Latency value, return true if a
1973 /// SchedBoundary becomes resource limited.
1974 /// If we are checking after scheduling a node, we should return true when
1975 /// we just reach the resource limit.
1976 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1977  unsigned Latency, bool AfterSchedNode) {
1978  int ResCntFactor = (int)(Count - (Latency * LFactor));
1979  if (AfterSchedNode)
1980  return ResCntFactor >= (int)LFactor;
1981  else
1982  return ResCntFactor > (int)LFactor;
1983 }
1984 
1986  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1987  // Destroying and reconstructing it is very expensive though. So keep
1988  // invalid, placeholder HazardRecs.
1989  if (HazardRec && HazardRec->isEnabled()) {
1990  delete HazardRec;
1991  HazardRec = nullptr;
1992  }
1993  Available.clear();
1994  Pending.clear();
1995  CheckPending = false;
1996  CurrCycle = 0;
1997  CurrMOps = 0;
1998  MinReadyCycle = std::numeric_limits<unsigned>::max();
1999  ExpectedLatency = 0;
2000  DependentLatency = 0;
2001  RetiredMOps = 0;
2002  MaxExecutedResCount = 0;
2003  ZoneCritResIdx = 0;
2004  IsResourceLimited = false;
2005  ReservedCycles.clear();
2006  ReservedCyclesIndex.clear();
2007  ResourceGroupSubUnitMasks.clear();
2008 #ifndef NDEBUG
2009  // Track the maximum number of stall cycles that could arise either from the
2010  // latency of a DAG edge or the number of cycles that a processor resource is
2011  // reserved (SchedBoundary::ReservedCycles).
2012  MaxObservedStall = 0;
2013 #endif
2014  // Reserve a zero-count for invalid CritResIdx.
2015  ExecutedResCounts.resize(1);
2016  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2017 }
2018 
2019 void SchedRemainder::
2020 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
2021  reset();
2022  if (!SchedModel->hasInstrSchedModel())
2023  return;
2025  for (SUnit &SU : DAG->SUnits) {
2026  const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
2027  RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
2028  * SchedModel->getMicroOpFactor();
2030  PI = SchedModel->getWriteProcResBegin(SC),
2031  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2032  unsigned PIdx = PI->ProcResourceIdx;
2033  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2034  RemainingCounts[PIdx] += (Factor * PI->Cycles);
2035  }
2036  }
2037 }
2038 
2039 void SchedBoundary::
2041  reset();
2042  DAG = dag;
2043  SchedModel = smodel;
2044  Rem = rem;
2045  if (SchedModel->hasInstrSchedModel()) {
2046  unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2047  ReservedCyclesIndex.resize(ResourceCount);
2048  ExecutedResCounts.resize(ResourceCount);
2049  ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
2050  unsigned NumUnits = 0;
2051 
2052  for (unsigned i = 0; i < ResourceCount; ++i) {
2053  ReservedCyclesIndex[i] = NumUnits;
2054  NumUnits += SchedModel->getProcResource(i)->NumUnits;
2055  if (isUnbufferedGroup(i)) {
2056  auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
2057  for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
2058  U != UE; ++U)
2059  ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2060  }
2061  }
2062 
2063  ReservedCycles.resize(NumUnits, InvalidCycle);
2064  }
2065 }
2066 
2067 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2068 /// these "soft stalls" differently than the hard stall cycles based on CPU
2069 /// resources and computed by checkHazard(). A fully in-order model
2070 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
2071 /// available for scheduling until they are ready. However, a weaker in-order
2072 /// model may use this for heuristics. For example, if a processor has in-order
2073 /// behavior when reading certain resources, this may come into play.
2075  if (!SU->isUnbuffered)
2076  return 0;
2077 
2078  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2079  if (ReadyCycle > CurrCycle)
2080  return ReadyCycle - CurrCycle;
2081  return 0;
2082 }
2083 
2084 /// Compute the next cycle at which the given processor resource unit
2085 /// can be scheduled.
2087  unsigned Cycles) {
2088  unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2089  // If this resource has never been used, always return cycle zero.
2090  if (NextUnreserved == InvalidCycle)
2091  return 0;
2092  // For bottom-up scheduling add the cycles needed for the current operation.
2093  if (!isTop())
2094  NextUnreserved += Cycles;
2095  return NextUnreserved;
2096 }
2097 
2098 /// Compute the next cycle at which the given processor resource can be
2099 /// scheduled. Returns the next cycle and the index of the processor resource
2100 /// instance in the reserved cycles vector.
2101 std::pair<unsigned, unsigned>
2103  unsigned Cycles) {
2104 
2105  unsigned MinNextUnreserved = InvalidCycle;
2106  unsigned InstanceIdx = 0;
2107  unsigned StartIndex = ReservedCyclesIndex[PIdx];
2108  unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2109  assert(NumberOfInstances > 0 &&
2110  "Cannot have zero instances of a ProcResource");
2111 
2112  if (isUnbufferedGroup(PIdx)) {
2113  // If any subunits are used by the instruction, report that the resource
2114  // group is available at 0, effectively removing the group record from
2115  // hazarding and basing the hazarding decisions on the subunit records.
2116  // Otherwise, choose the first available instance from among the subunits.
2117  // Specifications which assign cycles to both the subunits and the group or
2118  // which use an unbuffered group with buffered subunits will appear to
2119  // schedule strangely. In the first case, the additional cycles for the
2120  // group will be ignored. In the second, the group will be ignored
2121  // entirely.
2122  for (const MCWriteProcResEntry &PE :
2125  if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2126  return std::make_pair(0u, StartIndex);
2127 
2128  auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2129  for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2130  unsigned NextUnreserved, NextInstanceIdx;
2131  std::tie(NextUnreserved, NextInstanceIdx) =
2132  getNextResourceCycle(SC, SubUnits[I], Cycles);
2133  if (MinNextUnreserved > NextUnreserved) {
2134  InstanceIdx = NextInstanceIdx;
2135  MinNextUnreserved = NextUnreserved;
2136  }
2137  }
2138  return std::make_pair(MinNextUnreserved, InstanceIdx);
2139  }
2140 
2141  for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2142  ++I) {
2143  unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
2144  if (MinNextUnreserved > NextUnreserved) {
2145  InstanceIdx = I;
2146  MinNextUnreserved = NextUnreserved;
2147  }
2148  }
2149  return std::make_pair(MinNextUnreserved, InstanceIdx);
2150 }
2151 
2152 /// Does this SU have a hazard within the current instruction group.
2153 ///
2154 /// The scheduler supports two modes of hazard recognition. The first is the
2155 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2156 /// supports highly complicated in-order reservation tables
2157 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2158 ///
2159 /// The second is a streamlined mechanism that checks for hazards based on
2160 /// simple counters that the scheduler itself maintains. It explicitly checks
2161 /// for instruction dispatch limitations, including the number of micro-ops that
2162 /// can dispatch per cycle.
2163 ///
2164 /// TODO: Also check whether the SU must start a new group.
2166  if (HazardRec->isEnabled()
2168  return true;
2169  }
2170 
2171  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2172  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2173  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
2174  << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2175  return true;
2176  }
2177 
2178  if (CurrMOps > 0 &&
2179  ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2180  (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2181  LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
2182  << (isTop() ? "begin" : "end") << " group\n");
2183  return true;
2184  }
2185 
2187  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2188  for (const MCWriteProcResEntry &PE :
2191  unsigned ResIdx = PE.ProcResourceIdx;
2192  unsigned Cycles = PE.Cycles;
2193  unsigned NRCycle, InstanceIdx;
2194  std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles);
2195  if (NRCycle > CurrCycle) {
2196 #ifndef NDEBUG
2197  MaxObservedStall = std::max(Cycles, MaxObservedStall);
2198 #endif
2199  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2200  << SchedModel->getResourceName(ResIdx)
2201  << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
2202  << "=" << NRCycle << "c\n");
2203  return true;
2204  }
2205  }
2206  }
2207  return false;
2208 }
2209 
2210 // Find the unscheduled node in ReadySUs with the highest latency.
2211 unsigned SchedBoundary::
2213  SUnit *LateSU = nullptr;
2214  unsigned RemLatency = 0;
2215  for (SUnit *SU : ReadySUs) {
2216  unsigned L = getUnscheduledLatency(SU);
2217  if (L > RemLatency) {
2218  RemLatency = L;
2219  LateSU = SU;
2220  }
2221  }
2222  if (LateSU) {
2223  LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2224  << LateSU->NodeNum << ") " << RemLatency << "c\n");
2225  }
2226  return RemLatency;
2227 }
2228 
2229 // Count resources in this zone and the remaining unscheduled
2230 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2231 // resource index, or zero if the zone is issue limited.
2232 unsigned SchedBoundary::
2233 getOtherResourceCount(unsigned &OtherCritIdx) {
2234  OtherCritIdx = 0;
2236  return 0;
2237 
2238  unsigned OtherCritCount = Rem->RemIssueCount
2239  + (RetiredMOps * SchedModel->getMicroOpFactor());
2240  LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2241  << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2242  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2243  PIdx != PEnd; ++PIdx) {
2244  unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2245  if (OtherCount > OtherCritCount) {
2246  OtherCritCount = OtherCount;
2247  OtherCritIdx = PIdx;
2248  }
2249  }
2250  if (OtherCritIdx) {
2251  LLVM_DEBUG(
2252  dbgs() << " " << Available.getName() << " + Remain CritRes: "
2253  << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2254  << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2255  }
2256  return OtherCritCount;
2257 }
2258 
2259 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2260  unsigned Idx) {
2261  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2262 
2263 #ifndef NDEBUG
2264  // ReadyCycle was been bumped up to the CurrCycle when this node was
2265  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2266  // scheduling, so may now be greater than ReadyCycle.
2267  if (ReadyCycle > CurrCycle)
2268  MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2269 #endif
2270 
2271  if (ReadyCycle < MinReadyCycle)
2272  MinReadyCycle = ReadyCycle;
2273 
2274  // Check for interlocks first. For the purpose of other heuristics, an
2275  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2276  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2277  bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2278  checkHazard(SU) || (Available.size() >= ReadyListLimit);
2279 
2280  if (!HazardDetected) {
2281  Available.push(SU);
2282 
2283  if (InPQueue)
2284  Pending.remove(Pending.begin() + Idx);
2285  return;
2286  }
2287 
2288  if (!InPQueue)
2289  Pending.push(SU);
2290 }
2291 
2292 /// Move the boundary of scheduled code by one cycle.
2293 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2294  if (SchedModel->getMicroOpBufferSize() == 0) {
2295  assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2296  "MinReadyCycle uninitialized");
2297  if (MinReadyCycle > NextCycle)
2298  NextCycle = MinReadyCycle;
2299  }
2300  // Update the current micro-ops, which will issue in the next cycle.
2301  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2302  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2303 
2304  // Decrement DependentLatency based on the next cycle.
2305  if ((NextCycle - CurrCycle) > DependentLatency)
2306  DependentLatency = 0;
2307  else
2308  DependentLatency -= (NextCycle - CurrCycle);
2309 
2310  if (!HazardRec->isEnabled()) {
2311  // Bypass HazardRec virtual calls.
2312  CurrCycle = NextCycle;
2313  } else {
2314  // Bypass getHazardType calls in case of long latency.
2315  for (; CurrCycle != NextCycle; ++CurrCycle) {
2316  if (isTop())
2318  else
2320  }
2321  }
2322  CheckPending = true;
2323  IsResourceLimited =
2325  getScheduledLatency(), true);
2326 
2327  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2328  << '\n');
2329 }
2330 
2331 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2332  ExecutedResCounts[PIdx] += Count;
2333  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2334  MaxExecutedResCount = ExecutedResCounts[PIdx];
2335 }
2336 
2337 /// Add the given processor resource to this scheduled zone.
2338 ///
2339 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2340 /// during which this resource is consumed.
2341 ///
2342 /// \return the next cycle at which the instruction may execute without
2343 /// oversubscribing resources.
2344 unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
2345  unsigned Cycles, unsigned NextCycle) {
2346  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2347  unsigned Count = Factor * Cycles;
2348  LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2349  << Cycles << "x" << Factor << "u\n");
2350 
2351  // Update Executed resources counts.
2352  incExecutedResources(PIdx, Count);
2353  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2354  Rem->RemainingCounts[PIdx] -= Count;
2355 
2356  // Check if this resource exceeds the current critical resource. If so, it
2357  // becomes the critical resource.
2358  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2359  ZoneCritResIdx = PIdx;
2360  LLVM_DEBUG(dbgs() << " *** Critical resource "
2361  << SchedModel->getResourceName(PIdx) << ": "
2363  << "c\n");
2364  }
2365  // For reserved resources, record the highest cycle using the resource.
2366  unsigned NextAvailable, InstanceIdx;
2367  std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(SC, PIdx, Cycles);
2368  if (NextAvailable > CurrCycle) {
2369  LLVM_DEBUG(dbgs() << " Resource conflict: "
2370  << SchedModel->getResourceName(PIdx)
2371  << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2372  << " reserved until @" << NextAvailable << "\n");
2373  }
2374  return NextAvailable;
2375 }
2376 
2377 /// Move the boundary of scheduled code by one SUnit.
2379  // Update the reservation table.
2380  if (HazardRec->isEnabled()) {
2381  if (!isTop() && SU->isCall) {
2382  // Calls are scheduled with their preceding instructions. For bottom-up
2383  // scheduling, clear the pipeline state before emitting.
2384  HazardRec->Reset();
2385  }
2387  // Scheduling an instruction may have made pending instructions available.
2388  CheckPending = true;
2389  }
2390  // checkHazard should prevent scheduling multiple instructions per cycle that
2391  // exceed the issue width.
2392  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2393  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2394  assert(
2395  (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2396  "Cannot schedule this instruction's MicroOps in the current cycle.");
2397 
2398  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2399  LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2400 
2401  unsigned NextCycle = CurrCycle;
2402  switch (SchedModel->getMicroOpBufferSize()) {
2403  case 0:
2404  assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2405  break;
2406  case 1:
2407  if (ReadyCycle > NextCycle) {
2408  NextCycle = ReadyCycle;
2409  LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2410  }
2411  break;
2412  default:
2413  // We don't currently model the OOO reorder buffer, so consider all
2414  // scheduled MOps to be "retired". We do loosely model in-order resource
2415  // latency. If this instruction uses an in-order resource, account for any
2416  // likely stall cycles.
2417  if (SU->isUnbuffered && ReadyCycle > NextCycle)
2418  NextCycle = ReadyCycle;
2419  break;
2420  }
2421  RetiredMOps += IncMOps;
2422 
2423  // Update resource counts and critical resource.
2424  if (SchedModel->hasInstrSchedModel()) {
2425  unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2426  assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2427  Rem->RemIssueCount -= DecRemIssue;
2428  if (ZoneCritResIdx) {
2429  // Scale scheduled micro-ops for comparing with the critical resource.
2430  unsigned ScaledMOps =
2431  RetiredMOps * SchedModel->getMicroOpFactor();
2432 
2433  // If scaled micro-ops are now more than the previous critical resource by
2434  // a full cycle, then micro-ops issue becomes critical.
2435  if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2436  >= (int)SchedModel->getLatencyFactor()) {
2437  ZoneCritResIdx = 0;
2438  LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2439  << ScaledMOps / SchedModel->getLatencyFactor()
2440  << "c\n");
2441  }
2442  }
2445  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2446  unsigned RCycle =
2447  countResource(SC, PI->ProcResourceIdx, PI->Cycles, NextCycle);
2448  if (RCycle > NextCycle)
2449  NextCycle = RCycle;
2450  }
2451  if (SU->hasReservedResource) {
2452  // For reserved resources, record the highest cycle using the resource.
2453  // For top-down scheduling, this is the cycle in which we schedule this
2454  // instruction plus the number of cycles the operations reserves the
2455  // resource. For bottom-up is it simply the instruction's cycle.
2458  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2459  unsigned PIdx = PI->ProcResourceIdx;
2460  if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2461  unsigned ReservedUntil, InstanceIdx;
2462  std::tie(ReservedUntil, InstanceIdx) =
2463  getNextResourceCycle(SC, PIdx, 0);
2464  if (isTop()) {
2465  ReservedCycles[InstanceIdx] =
2466  std::max(ReservedUntil, NextCycle + PI->Cycles);
2467  } else
2468  ReservedCycles[InstanceIdx] = NextCycle;
2469  }
2470  }
2471  }
2472  }
2473  // Update ExpectedLatency and DependentLatency.
2474  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2475  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2476  if (SU->getDepth() > TopLatency) {
2477  TopLatency = SU->getDepth();
2478  LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2479  << SU->NodeNum << ") " << TopLatency << "c\n");
2480  }
2481  if (SU->getHeight() > BotLatency) {
2482  BotLatency = SU->getHeight();
2483  LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2484  << SU->NodeNum << ") " << BotLatency << "c\n");
2485  }
2486  // If we stall for any reason, bump the cycle.
2487  if (NextCycle > CurrCycle)
2488  bumpCycle(NextCycle);
2489  else
2490  // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2491  // resource limited. If a stall occurred, bumpCycle does this.
2492  IsResourceLimited =
2494  getScheduledLatency(), true);
2495 
2496  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2497  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2498  // one cycle. Since we commonly reach the max MOps here, opportunistically
2499  // bump the cycle to avoid uselessly checking everything in the readyQ.
2500  CurrMOps += IncMOps;
2501 
2502  // Bump the cycle count for issue group constraints.
2503  // This must be done after NextCycle has been adjust for all other stalls.
2504  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2505  // currCycle to X.
2506  if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2507  (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2508  LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2509  << " group\n");
2510  bumpCycle(++NextCycle);
2511  }
2512 
2513  while (CurrMOps >= SchedModel->getIssueWidth()) {
2514  LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2515  << CurrCycle << '\n');
2516  bumpCycle(++NextCycle);
2517  }
2519 }
2520 
2521 /// Release pending ready nodes in to the available queue. This makes them
2522 /// visible to heuristics.
2524  // If the available queue is empty, it is safe to reset MinReadyCycle.
2525  if (Available.empty())
2526  MinReadyCycle = std::numeric_limits<unsigned>::max();
2527 
2528  // Check to see if any of the pending instructions are ready to issue. If
2529  // so, add them to the available queue.
2530  for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
2531  SUnit *SU = *(Pending.begin() + I);
2532  unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2533 
2534  if (ReadyCycle < MinReadyCycle)
2535  MinReadyCycle = ReadyCycle;
2536 
2537  if (Available.size() >= ReadyListLimit)
2538  break;
2539 
2540  releaseNode(SU, ReadyCycle, true, I);
2541  if (E != Pending.size()) {
2542  --I;
2543  --E;
2544  }
2545  }
2546  CheckPending = false;
2547 }
2548 
2549 /// Remove SU from the ready set for this boundary.
2551  if (Available.isInQueue(SU))
2553  else {
2554  assert(Pending.isInQueue(SU) && "bad ready count");
2555  Pending.remove(Pending.find(SU));
2556  }
2557 }
2558 
2559 /// If this queue only has one ready candidate, return it. As a side effect,
2560 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2561 /// one node is ready. If multiple instructions are ready, return NULL.
2563  if (CheckPending)
2564  releasePending();
2565 
2566  // Defer any ready instrs that now have a hazard.
2567  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2568  if (checkHazard(*I)) {
2569  Pending.push(*I);
2570  I = Available.remove(I);
2571  continue;
2572  }
2573  ++I;
2574  }
2575  for (unsigned i = 0; Available.empty(); ++i) {
2576 // FIXME: Re-enable assert once PR20057 is resolved.
2577 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2578 // "permanent hazard");
2579  (void)i;
2580  bumpCycle(CurrCycle + 1);
2581  releasePending();
2582  }
2583 
2584  LLVM_DEBUG(Pending.dump());
2586 
2587  if (Available.size() == 1)
2588  return *Available.begin();
2589  return nullptr;
2590 }
2591 
2592 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2593 // This is useful information to dump after bumpNode.
2594 // Note that the Queue contents are more useful before pickNodeFromQueue.
2596  unsigned ResFactor;
2597  unsigned ResCount;
2598  if (ZoneCritResIdx) {
2599  ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2600  ResCount = getResourceCount(ZoneCritResIdx);
2601  } else {
2602  ResFactor = SchedModel->getMicroOpFactor();
2603  ResCount = RetiredMOps * ResFactor;
2604  }
2605  unsigned LFactor = SchedModel->getLatencyFactor();
2606  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2607  << " Retired: " << RetiredMOps;
2608  dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2609  dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2610  << ResCount / ResFactor << " "
2611  << SchedModel->getResourceName(ZoneCritResIdx)
2612  << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2613  << (IsResourceLimited ? " - Resource" : " - Latency")
2614  << " limited.\n";
2615 }
2616 #endif
2617 
2618 //===----------------------------------------------------------------------===//
2619 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2620 //===----------------------------------------------------------------------===//
2621 
2624  const TargetSchedModel *SchedModel) {
2626  return;
2627 
2628  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2631  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2632  if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2633  ResDelta.CritResources += PI->Cycles;
2634  if (PI->ProcResourceIdx == Policy.DemandResIdx)
2635  ResDelta.DemandedResources += PI->Cycles;
2636  }
2637 }
2638 
2639 /// Compute remaining latency. We need this both to determine whether the
2640 /// overall schedule has become latency-limited and whether the instructions
2641 /// outside this zone are resource or latency limited.
2642 ///
2643 /// The "dependent" latency is updated incrementally during scheduling as the
2644 /// max height/depth of scheduled nodes minus the cycles since it was
2645 /// scheduled:
2646 /// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2647 ///
2648 /// The "independent" latency is the max ready queue depth:
2649 /// ILat = max N.depth for N in Available|Pending
2650 ///
2651 /// RemainingLatency is the greater of independent and dependent latency.
2652 ///
2653 /// These computations are expensive, especially in DAGs with many edges, so
2654 /// only do them if necessary.
2655 static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2656  unsigned RemLatency = CurrZone.getDependentLatency();
2657  RemLatency = std::max(RemLatency,
2658  CurrZone.findMaxLatency(CurrZone.Available.elements()));
2659  RemLatency = std::max(RemLatency,
2660  CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2661  return RemLatency;
2662 }
2663 
2664 /// Returns true if the current cycle plus remaning latency is greater than
2665 /// the critical path in the scheduling region.
2666 bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2667  SchedBoundary &CurrZone,
2668  bool ComputeRemLatency,
2669  unsigned &RemLatency) const {
2670  // The current cycle is already greater than the critical path, so we are
2671  // already latency limited and don't need to compute the remaining latency.
2672  if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2673  return true;
2674 
2675  // If we haven't scheduled anything yet, then we aren't latency limited.
2676  if (CurrZone.getCurrCycle() == 0)
2677  return false;
2678 
2679  if (ComputeRemLatency)
2680  RemLatency = computeRemLatency(CurrZone);
2681 
2682  return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2683 }
2684 
2685 /// Set the CandPolicy given a scheduling zone given the current resources and
2686 /// latencies inside and outside the zone.
2687 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2688  SchedBoundary &CurrZone,
2689  SchedBoundary *OtherZone) {
2690  // Apply preemptive heuristics based on the total latency and resources
2691  // inside and outside this zone. Potential stalls should be considered before
2692  // following this policy.
2693 
2694  // Compute the critical resource outside the zone.
2695  unsigned OtherCritIdx = 0;
2696  unsigned OtherCount =
2697  OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2698 
2699  bool OtherResLimited = false;
2700  unsigned RemLatency = 0;
2701  bool RemLatencyComputed = false;
2702  if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2703  RemLatency = computeRemLatency(CurrZone);
2704  RemLatencyComputed = true;
2705  OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2706  OtherCount, RemLatency, false);
2707  }
2708 
2709  // Schedule aggressively for latency in PostRA mode. We don't check for
2710  // acyclic latency during PostRA, and highly out-of-order processors will
2711  // skip PostRA scheduling.
2712  if (!OtherResLimited &&
2713  (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2714  RemLatency))) {
2715  Policy.ReduceLatency |= true;
2716  LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
2717  << " RemainingLatency " << RemLatency << " + "
2718  << CurrZone.getCurrCycle() << "c > CritPath "
2719  << Rem.CriticalPath << "\n");
2720  }
2721  // If the same resource is limiting inside and outside the zone, do nothing.
2722  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2723  return;
2724 
2725  LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2726  dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2727  << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2728  } if (OtherResLimited) dbgs()
2729  << " RemainingLimit: "
2730  << SchedModel->getResourceName(OtherCritIdx) << "\n";
2731  if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2732  << " Latency limited both directions.\n");
2733 
2734  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2735  Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2736 
2737  if (OtherResLimited)
2738  Policy.DemandResIdx = OtherCritIdx;
2739 }
2740 
2741 #ifndef NDEBUG
2744  switch (Reason) {
2745  case NoCand: return "NOCAND ";
2746  case Only1: return "ONLY1 ";
2747  case PhysReg: return "PHYS-REG ";
2748  case RegExcess: return "REG-EXCESS";
2749  case RegCritical: return "REG-CRIT ";
2750  case Stall: return "STALL ";
2751  case Cluster: return "CLUSTER ";
2752  case Weak: return "WEAK ";
2753  case RegMax: return "REG-MAX ";
2754  case ResourceReduce: return "RES-REDUCE";
2755  case ResourceDemand: return "RES-DEMAND";
2756  case TopDepthReduce: return "TOP-DEPTH ";
2757  case TopPathReduce: return "TOP-PATH ";
2758  case BotHeightReduce:return "BOT-HEIGHT";
2759  case BotPathReduce: return "BOT-PATH ";
2760  case NextDefUse: return "DEF-USE ";
2761  case NodeOrder: return "ORDER ";
2762  };
2763  llvm_unreachable("Unknown reason!");
2764 }
2765 
2767  PressureChange P;
2768  unsigned ResIdx = 0;
2769  unsigned Latency = 0;
2770  switch (Cand.Reason) {
2771  default:
2772  break;
2773  case RegExcess:
2774  P = Cand.RPDelta.Excess;
2775  break;
2776  case RegCritical:
2777  P = Cand.RPDelta.CriticalMax;
2778  break;
2779  case RegMax:
2780  P = Cand.RPDelta.CurrentMax;
2781  break;
2782  case ResourceReduce:
2783  ResIdx = Cand.Policy.ReduceResIdx;
2784  break;
2785  case ResourceDemand:
2786  ResIdx = Cand.Policy.DemandResIdx;
2787  break;
2788  case TopDepthReduce:
2789  Latency = Cand.SU->getDepth();
2790  break;
2791  case TopPathReduce:
2792  Latency = Cand.SU->getHeight();
2793  break;
2794  case BotHeightReduce:
2795  Latency = Cand.SU->getHeight();
2796  break;
2797  case BotPathReduce:
2798  Latency = Cand.SU->getDepth();
2799  break;
2800  }
2801  dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2802  if (P.isValid())
2803  dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2804  << ":" << P.getUnitInc() << " ";
2805  else
2806  dbgs() << " ";
2807  if (ResIdx)
2808  dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2809  else
2810  dbgs() << " ";
2811  if (Latency)
2812  dbgs() << " " << Latency << " cycles ";
2813  else
2814  dbgs() << " ";
2815  dbgs() << '\n';
2816 }
2817 #endif
2818 
2819 namespace llvm {
2820 /// Return true if this heuristic determines order.
2821 bool tryLess(int TryVal, int CandVal,
2825  if (TryVal < CandVal) {
2826  TryCand.Reason = Reason;
2827  return true;
2828  }
2829  if (TryVal > CandVal) {
2830  if (Cand.Reason > Reason)
2831  Cand.Reason = Reason;
2832  return true;
2833  }
2834  return false;
2835 }
2836 
2837 bool tryGreater(int TryVal, int CandVal,
2841  if (TryVal > CandVal) {
2842  TryCand.Reason = Reason;
2843  return true;
2844  }
2845  if (TryVal < CandVal) {
2846  if (Cand.Reason > Reason)
2847  Cand.Reason = Reason;
2848  return true;
2849  }
2850  return false;
2851 }
2852 
2855  SchedBoundary &Zone) {
2856  if (Zone.isTop()) {
2857  // Prefer the candidate with the lesser depth, but only if one of them has
2858  // depth greater than the total latency scheduled so far, otherwise either
2859  // of them could be scheduled now with no stall.
2860  if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
2861  Zone.getScheduledLatency()) {
2862  if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2863  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2864  return true;
2865  }
2866  if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2867  TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2868  return true;
2869  } else {
2870  // Prefer the candidate with the lesser height, but only if one of them has
2871  // height greater than the total latency scheduled so far, otherwise either
2872  // of them could be scheduled now with no stall.
2873  if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
2874  Zone.getScheduledLatency()) {
2875  if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2876  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2877  return true;
2878  }
2879  if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2880  TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2881  return true;
2882  }
2883  return false;
2884 }
2885 } // end namespace llvm
2886 
2887 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2888  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2889  << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2890 }
2891 
2893  tracePick(Cand.Reason, Cand.AtTop);
2894 }
2895 
2897  assert(dag->hasVRegLiveness() &&
2898  "(PreRA)GenericScheduler needs vreg liveness");
2899  DAG = static_cast<ScheduleDAGMILive*>(dag);
2900  SchedModel = DAG->getSchedModel();
2901  TRI = DAG->TRI;
2902 
2903  if (RegionPolicy.ComputeDFSResult)
2904  DAG->computeDFSResult();
2905 
2906  Rem.init(DAG, SchedModel);
2907  Top.init(DAG, SchedModel, &Rem);
2908  Bot.init(DAG, SchedModel, &Rem);
2909 
2910  // Initialize resource counts.
2911 
2912  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2913  // are disabled, then these HazardRecs will be disabled.
2915  if (!Top.HazardRec) {
2916  Top.HazardRec =
2918  Itin, DAG);
2919  }
2920  if (!Bot.HazardRec) {
2921  Bot.HazardRec =
2923  Itin, DAG);
2924  }
2925  TopCand.SU = nullptr;
2926  BotCand.SU = nullptr;
2927 }
2928 
2929 /// Initialize the per-region scheduling policy.
2932  unsigned NumRegionInstrs) {
2933  const MachineFunction &MF = *Begin->getMF();
2934  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2935 
2936  // Avoid setting up the register pressure tracker for small regions to save
2937  // compile time. As a rough heuristic, only track pressure when the number of
2938  // schedulable instructions exceeds half the integer register file.
2939  RegionPolicy.ShouldTrackPressure = true;
2940  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2941  MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2942  if (TLI->isTypeLegal(LegalIntVT)) {
2943  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2944  TLI->getRegClassFor(LegalIntVT));
2945  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2946  }
2947  }
2948 
2949  // For generic targets, we default to bottom-up, because it's simpler and more
2950  // compile-time optimizations have been implemented in that direction.
2951  RegionPolicy.OnlyBottomUp = true;
2952 
2953  // Allow the subtarget to override default policy.
2954  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2955 
2956  // After subtarget overrides, apply command line options.
2957  if (!EnableRegPressure) {
2958  RegionPolicy.ShouldTrackPressure = false;
2959  RegionPolicy.ShouldTrackLaneMasks = false;
2960  }
2961 
2962  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2963  // e.g. -misched-bottomup=false allows scheduling in both directions.
2965  "-misched-topdown incompatible with -misched-bottomup");
2966  if (ForceBottomUp.getNumOccurrences() > 0) {
2967  RegionPolicy.OnlyBottomUp = ForceBottomUp;
2968  if (RegionPolicy.OnlyBottomUp)
2969  RegionPolicy.OnlyTopDown = false;
2970  }
2971  if (ForceTopDown.getNumOccurrences() > 0) {
2972  RegionPolicy.OnlyTopDown = ForceTopDown;
2973  if (RegionPolicy.OnlyTopDown)
2974  RegionPolicy.OnlyBottomUp = false;
2975  }
2976 }
2977 
2979  // Cannot completely remove virtual function even in release mode.
2980 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2981  dbgs() << "GenericScheduler RegionPolicy: "
2982  << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2983  << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2984  << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2985  << "\n";
2986 #endif
2987 }
2988 
2989 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2990 /// critical path by more cycles than it takes to drain the instruction buffer.
2991 /// We estimate an upper bounds on in-flight instructions as:
2992 ///
2993 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2994 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2995 /// InFlightResources = InFlightIterations * LoopResources
2996 ///
2997 /// TODO: Check execution resources in addition to IssueCount.
3000  return;
3001 
3002  // Scaled number of cycles per loop iteration.
3003  unsigned IterCount =
3005  Rem.RemIssueCount);
3006  // Scaled acyclic critical path.
3007  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3008  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
3009  unsigned InFlightCount =
3010  (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3011  unsigned BufferLimit =
3013 
3014  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3015 
3016  LLVM_DEBUG(
3017  dbgs() << "IssueCycles="
3019  << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3020  << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3021  << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3022  << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3023  if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
3024 }
3025 
3027  Rem.CriticalPath = DAG->ExitSU.getDepth();
3028 
3029  // Some roots may not feed into ExitSU. Check all of them in case.
3030  for (const SUnit *SU : Bot.Available) {
3031  if (SU->getDepth() > Rem.CriticalPath)
3032  Rem.CriticalPath = SU->getDepth();
3033  }
3034  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3035  if (DumpCriticalPathLength) {
3036  errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3037  }
3038 
3040  Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3041  checkAcyclicLatency();
3042  }
3043 }
3044 
3045 namespace llvm {
3046 bool tryPressure(const PressureChange &TryP,
3047  const PressureChange &CandP,
3051  const TargetRegisterInfo *TRI,
3052  const MachineFunction &MF) {
3053  // If one candidate decreases and the other increases, go with it.
3054  // Invalid candidates have UnitInc==0.
3055  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
3056  Reason)) {
3057  return true;
3058  }
3059  // Do not compare the magnitude of pressure changes between top and bottom
3060  // boundary.
3061  if (Cand.AtTop != TryCand.AtTop)
3062  return false;
3063 
3064  // If both candidates affect the same set in the same boundary, go with the
3065  // smallest increase.
3066  unsigned TryPSet = TryP.getPSetOrMax();
3067  unsigned CandPSet = CandP.getPSetOrMax();
3068  if (TryPSet == CandPSet) {
3069  return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
3070  Reason);
3071  }
3072 
3073  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3075 
3076  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3078 
3079  // If the candidates are decreasing pressure, reverse priority.
3080  if (TryP.getUnitInc() < 0)
3081  std::swap(TryRank, CandRank);
3082  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3083 }
3084 
3085 unsigned getWeakLeft(const SUnit *SU, bool isTop) {
3086  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3087 }
3088 
3089 /// Minimize physical register live ranges. Regalloc wants them adjacent to
3090 /// their physreg def/use.
3091 ///
3092 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3093 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3094 /// with the operation that produces or consumes the physreg. We'll do this when
3095 /// regalloc has support for parallel copies.
3096 int biasPhysReg(const SUnit *SU, bool isTop) {
3097  const MachineInstr *MI = SU->getInstr();
3098 
3099  if (MI->isCopy()) {
3100  unsigned ScheduledOper = isTop ? 1 : 0;
3101  unsigned UnscheduledOper = isTop ? 0 : 1;
3102  // If we have already scheduled the physreg produce/consumer, immediately
3103  // schedule the copy.
3104  if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg()))
3105  return 1;
3106  // If the physreg is at the boundary, defer it. Otherwise schedule it
3107  // immediately to free the dependent. We can hoist the copy later.
3108  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3109  if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg()))
3110  return AtBoundary ? -1 : 1;
3111  }
3112 
3113  if (MI->isMoveImmediate()) {
3114  // If we have a move immediate and all successors have been assigned, bias
3115  // towards scheduling this later. Make sure all register defs are to
3116  // physical registers.
3117  bool DoBias = true;
3118  for (const MachineOperand &Op : MI->defs()) {
3119  if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) {
3120  DoBias = false;
3121  break;
3122  }
3123  }
3124 
3125  if (DoBias)
3126  return isTop ? -1 : 1;
3127  }
3128 
3129  return 0;
3130 }
3131 } // end namespace llvm
3132 
3134  bool AtTop,
3135  const RegPressureTracker &RPTracker,
3136  RegPressureTracker &TempTracker) {
3137  Cand.SU = SU;
3138  Cand.AtTop = AtTop;
3139  if (DAG->isTrackingPressure()) {
3140  if (AtTop) {
3141  TempTracker.getMaxDownwardPressureDelta(
3142  Cand.SU->getInstr(),
3143  Cand.RPDelta,
3144  DAG->getRegionCriticalPSets(),
3145  DAG->getRegPressure().MaxSetPressure);
3146  } else {
3147  if (VerifyScheduling) {
3148  TempTracker.getMaxUpwardPressureDelta(
3149  Cand.SU->getInstr(),
3150  &DAG->getPressureDiff(Cand.SU),
3151  Cand.RPDelta,
3152  DAG->getRegionCriticalPSets(),
3153  DAG->getRegPressure().MaxSetPressure);
3154  } else {
3155  RPTracker.getUpwardPressureDelta(
3156  Cand.SU->getInstr(),
3157  DAG->getPressureDiff(Cand.SU),
3158  Cand.RPDelta,
3159  DAG->getRegionCriticalPSets(),
3160  DAG->getRegPressure().MaxSetPressure);
3161  }
3162  }
3163  }
3164  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3165  << " Try SU(" << Cand.SU->NodeNum << ") "
3166  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
3167  << Cand.RPDelta.Excess.getUnitInc() << "\n");
3168 }
3169 
3170 /// Apply a set of heuristics to a new candidate. Heuristics are currently
3171 /// hierarchical. This may be more efficient than a graduated cost model because
3172 /// we don't need to evaluate all aspects of the model for each node in the
3173 /// queue. But it's really done to make the heuristics easier to debug and
3174 /// statistically analyze.
3175 ///
3176 /// \param Cand provides the policy and current best candidate.
3177 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3178 /// \param Zone describes the scheduled zone that we are extending, or nullptr
3179 // if Cand is from a different zone than TryCand.
3181  SchedCandidate &TryCand,
3182  SchedBoundary *Zone) const {
3183  // Initialize the candidate if needed.
3184  if (!Cand.isValid()) {
3185  TryCand.Reason = NodeOrder;
3186  return;
3187  }
3188 
3189  // Bias PhysReg Defs and copies to their uses and defined respectively.
3190  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3191  biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3192  return;
3193 
3194  // Avoid exceeding the target's limit.
3195  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3196  Cand.RPDelta.Excess,
3197  TryCand, Cand, RegExcess, TRI,
3198  DAG->MF))
3199  return;
3200 
3201  // Avoid increasing the max critical pressure in the scheduled region.
3202  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3203  Cand.RPDelta.CriticalMax,
3204  TryCand, Cand, RegCritical, TRI,
3205  DAG->MF))
3206  return;
3207 
3208  // We only compare a subset of features when comparing nodes between
3209  // Top and Bottom boundary. Some properties are simply incomparable, in many
3210  // other instances we should only override the other boundary if something
3211  // is a clear good pick on one boundary. Skip heuristics that are more
3212  // "tie-breaking" in nature.
3213  bool SameBoundary = Zone != nullptr;
3214  if (SameBoundary) {
3215  // For loops that are acyclic path limited, aggressively schedule for
3216  // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3217  // heuristics to take precedence.
3218  if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3219  tryLatency(TryCand, Cand, *Zone))
3220  return;
3221 
3222  // Prioritize instructions that read unbuffered resources by stall cycles.
3223  if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3224  Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3225  return;
3226  }
3227 
3228  // Keep clustered nodes together to encourage downstream peephole
3229  // optimizations which may reduce resource requirements.
3230  //
3231  // This is a best effort to set things up for a post-RA pass. Optimizations
3232  // like generating loads of multiple registers should ideally be done within
3233  // the scheduler pass by combining the loads during DAG postprocessing.
3234  const SUnit *CandNextClusterSU =
3235  Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3236  const SUnit *TryCandNextClusterSU =
3237  TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3238  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3239  Cand.SU == CandNextClusterSU,
3240  TryCand, Cand, Cluster))
3241  return;
3242 
3243  if (SameBoundary) {
3244  // Weak edges are for clustering and other constraints.
3245  if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3246  getWeakLeft(Cand.SU, Cand.AtTop),
3247  TryCand, Cand, Weak))
3248  return;
3249  }
3250 
3251  // Avoid increasing the max pressure of the entire region.
3252  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3253  Cand.RPDelta.CurrentMax,
3254  TryCand, Cand, RegMax, TRI,
3255  DAG->MF))
3256  return;
3257 
3258  if (SameBoundary) {
3259  // Avoid critical resource consumption and balance the schedule.
3260  TryCand.initResourceDelta(DAG, SchedModel);
3262  TryCand, Cand, ResourceReduce))
3263  return;
3266  TryCand, Cand, ResourceDemand))
3267  return;
3268 
3269  // Avoid serializing long latency dependence chains.
3270  // For acyclic path limited loops, latency was already checked above.
3271  if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3272  !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3273  return;
3274 
3275  // Fall through to original instruction order.
3276  if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3277  || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3278  TryCand.Reason = NodeOrder;
3279  }
3280  }
3281 }
3282 
3283 /// Pick the best candidate from the queue.
3284 ///
3285 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3286 /// DAG building. To adjust for the current scheduling location we need to
3287 /// maintain the number of vreg uses remaining to be top-scheduled.
3289  const CandPolicy &ZonePolicy,
3290  const RegPressureTracker &RPTracker,
3291  SchedCandidate &Cand) {
3292  // getMaxPressureDelta temporarily modifies the tracker.
3293  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3294 
3295  ReadyQueue &Q = Zone.Available;
3296  for (SUnit *SU : Q) {
3297 
3298  SchedCandidate TryCand(ZonePolicy);
3299  initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3300  // Pass SchedBoundary only when comparing nodes from the same boundary.
3301  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3302  tryCandidate(Cand, TryCand, ZoneArg);
3303  if (TryCand.Reason != NoCand) {
3304  // Initialize resource delta if needed in case future heuristics query it.
3305  if (TryCand.ResDelta == SchedResourceDelta())
3306  TryCand.initResourceDelta(DAG, SchedModel);
3307  Cand.setBest(TryCand);
3308  LLVM_DEBUG(traceCandidate(Cand));
3309  }
3310  }
3311 }
3312 
3313 /// Pick the best candidate node from either the top or bottom queue.
3315  // Schedule as far as possible in the direction of no choice. This is most
3316  // efficient, but also provides the best heuristics for CriticalPSets.
3317  if (SUnit *SU = Bot.pickOnlyChoice()) {
3318  IsTopNode = false;
3319  tracePick(Only1, false);
3320  return SU;
3321  }
3322  if (SUnit *SU = Top.pickOnlyChoice()) {
3323  IsTopNode = true;
3324  tracePick(Only1, true);
3325  return SU;
3326  }
3327  // Set the bottom-up policy based on the state of the current bottom zone and
3328  // the instructions outside the zone, including the top zone.
3329  CandPolicy BotPolicy;
3330  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3331  // Set the top-down policy based on the state of the current top zone and
3332  // the instructions outside the zone, including the bottom zone.
3333  CandPolicy TopPolicy;
3334  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3335 
3336  // See if BotCand is still valid (because we previously scheduled from Top).
3337  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3338  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3339  BotCand.Policy != BotPolicy) {
3340  BotCand.reset(CandPolicy());
3341  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3342  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3343  } else {
3344  LLVM_DEBUG(traceCandidate(BotCand));
3345 #ifndef NDEBUG
3346  if (VerifyScheduling) {
3347  SchedCandidate TCand;
3348  TCand.reset(CandPolicy());
3349  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3350  assert(TCand.SU == BotCand.SU &&
3351  "Last pick result should correspond to re-picking right now");
3352  }
3353 #endif
3354  }
3355 
3356  // Check if the top Q has a better candidate.
3357  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3358  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3359  TopCand.Policy != TopPolicy) {
3360  TopCand.reset(CandPolicy());
3361  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3362  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3363  } else {
3364  LLVM_DEBUG(traceCandidate(TopCand));
3365 #ifndef NDEBUG
3366  if (VerifyScheduling) {
3367  SchedCandidate TCand;
3368  TCand.reset(CandPolicy());
3369  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3370  assert(TCand.SU == TopCand.SU &&
3371  "Last pick result should correspond to re-picking right now");
3372  }
3373 #endif
3374  }
3375 
3376  // Pick best from BotCand and TopCand.
3377  assert(BotCand.isValid());
3378  assert(TopCand.isValid());
3379  SchedCandidate Cand = BotCand;
3380  TopCand.Reason = NoCand;
3381  tryCandidate(Cand, TopCand, nullptr);
3382  if (TopCand.Reason != NoCand) {
3383  Cand.setBest(TopCand);
3384  LLVM_DEBUG(traceCandidate(Cand));
3385  }
3386 
3387  IsTopNode = Cand.AtTop;
3388  tracePick(Cand);
3389  return Cand.SU;
3390 }
3391 
3392 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3394  if (DAG->top() == DAG->bottom()) {
3395  assert(Top.Available.empty() && Top.Pending.empty() &&
3396  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3397  return nullptr;
3398  }
3399  SUnit *SU;
3400  do {
3401  if (RegionPolicy.OnlyTopDown) {
3402  SU = Top.pickOnlyChoice();
3403  if (!SU) {
3404  CandPolicy NoPolicy;
3405  TopCand.reset(NoPolicy);
3406  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3407  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3408  tracePick(TopCand);
3409  SU = TopCand.SU;
3410  }
3411  IsTopNode = true;
3412  } else if (RegionPolicy.OnlyBottomUp) {
3413  SU = Bot.pickOnlyChoice();
3414  if (!SU) {
3415  CandPolicy NoPolicy;
3416  BotCand.reset(NoPolicy);
3417  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3418  assert(BotCand.Reason != NoCand && "failed to find a candidate");
3419  tracePick(BotCand);
3420  SU = BotCand.SU;
3421  }
3422  IsTopNode = false;
3423  } else {
3424  SU = pickNodeBidirectional(IsTopNode);
3425  }
3426  } while (SU->isScheduled);
3427 
3428  if (SU->isTopReady())
3429  Top.removeReady(SU);
3430  if (SU->isBottomReady())
3431  Bot.removeReady(SU);
3432 
3433  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3434  << *SU->getInstr());
3435  return SU;
3436 }
3437 
3439  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3440  if (!isTop)
3441  ++InsertPos;
3442  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3443 
3444  // Find already scheduled copies with a single physreg dependence and move
3445  // them just above the scheduled instruction.
3446  for (SDep &Dep : Deps) {
3447  if (Dep.getKind() != SDep::Data ||
3448  !Register::isPhysicalRegister(Dep.getReg()))
3449  continue;
3450  SUnit *DepSU = Dep.getSUnit();
3451  if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3452  continue;
3453  MachineInstr *Copy = DepSU->getInstr();
3454  if (!Copy->isCopy() && !Copy->isMoveImmediate())
3455  continue;
3456  LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3457  DAG->dumpNode(*Dep.getSUnit()));
3458  DAG->moveInstruction(Copy, InsertPos);
3459  }
3460 }
3461 
3462 /// Update the scheduler's state after scheduling a node. This is the same node
3463 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3464 /// update it's state based on the current cycle before MachineSchedStrategy
3465 /// does.
3466 ///
3467 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3468 /// them here. See comments in biasPhysReg.
3469 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3470  if (IsTopNode) {
3471  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3472  Top.bumpNode(SU);
3473  if (SU->hasPhysRegUses)
3474  reschedulePhysReg(SU, true);
3475  } else {
3476  SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3477  Bot.bumpNode(SU);
3478  if (SU->hasPhysRegDefs)
3479  reschedulePhysReg(SU, false);
3480  }
3481 }
3482 
3483 /// Create the standard converging machine scheduler. This will be used as the
3484 /// default scheduler if the target does not set a default.
3486  ScheduleDAGMILive *DAG =
3487  new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
3488  // Register DAG post-processors.
3489  //
3490  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3491  // data and pass it to later mutations. Have a single mutation that gathers
3492  // the interesting nodes in one pass.
3494  return DAG;
3495 }
3496 
3498  return createGenericSchedLive(C);
3499 }
3500 
3501 static MachineSchedRegistry
3502 GenericSchedRegistry("converge", "Standard converging scheduler.",
3504 
3505 //===----------------------------------------------------------------------===//
3506 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3507 //===----------------------------------------------------------------------===//
3508 
3510  DAG = Dag;
3511  SchedModel = DAG->getSchedModel();
3512  TRI = DAG->TRI;
3513 
3514  Rem.init(DAG, SchedModel);
3515  Top.init(DAG, SchedModel, &Rem);
3516  BotRoots.clear();
3517 
3518  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3519  // or are disabled, then these HazardRecs will be disabled.
3521  if (!Top.HazardRec) {
3522  Top.HazardRec =
3524  Itin, DAG);
3525  }
3526 }
3527 
3529  Rem.CriticalPath = DAG->ExitSU.getDepth();
3530 
3531  // Some roots may not feed into ExitSU. Check all of them in case.
3532  for (const SUnit *SU : BotRoots) {
3533  if (SU->getDepth() > Rem.CriticalPath)
3534  Rem.CriticalPath = SU->getDepth();
3535  }
3536  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3537  if (DumpCriticalPathLength) {
3538  errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3539  }
3540 }
3541 
3542 /// Apply a set of heuristics to a new candidate for PostRA scheduling.
3543 ///
3544 /// \param Cand provides the policy and current best candidate.
3545 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3547  SchedCandidate &TryCand) {
3548  // Initialize the candidate if needed.
3549  if (!Cand.isValid()) {
3550  TryCand.Reason = NodeOrder;
3551  return;
3552  }
3553 
3554  // Prioritize instructions that read unbuffered resources by stall cycles.
3555  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3556  Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3557  return;
3558 
3559  // Keep clustered nodes together.
3560  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3561  Cand.SU == DAG->getNextClusterSucc(),
3562  TryCand, Cand, Cluster))
3563  return;
3564 
3565  // Avoid critical resource consumption and balance the schedule.
3567  TryCand, Cand, ResourceReduce))
3568  return;
3571  TryCand, Cand, ResourceDemand))
3572  return;
3573 
3574  // Avoid serializing long latency dependence chains.
3575  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3576  return;
3577  }
3578 
3579  // Fall through to original instruction order.
3580  if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
3581  TryCand.Reason = NodeOrder;
3582 }
3583 
3585  ReadyQueue &Q = Top.Available;
3586  for (SUnit *SU : Q) {
3587  SchedCandidate TryCand(Cand.Policy);
3588  TryCand.SU = SU;
3589  TryCand.AtTop = true;
3590  TryCand.initResourceDelta(DAG, SchedModel);
3591  tryCandidate(Cand, TryCand);
3592  if (TryCand.Reason != NoCand) {
3593  Cand.setBest(TryCand);
3594  LLVM_DEBUG(traceCandidate(Cand));
3595  }
3596  }
3597 }
3598 
3599 /// Pick the next node to schedule.
3601  if (DAG->top() == DAG->bottom()) {
3602  assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3603  return nullptr;
3604  }
3605  SUnit *SU;
3606  do {
3607  SU = Top.pickOnlyChoice();
3608  if (SU) {
3609  tracePick(Only1, true);
3610  } else {
3611  CandPolicy NoPolicy;
3612  SchedCandidate TopCand(NoPolicy);
3613  // Set the top-down policy based on the state of the current top zone and
3614  // the instructions outside the zone, including the bottom zone.
3615  setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3616  pickNodeFromQueue(TopCand);
3617  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3618  tracePick(TopCand);
3619  SU = TopCand.SU;
3620  }
3621  } while (SU->isScheduled);
3622 
3623  IsTopNode = true;
3624  Top.removeReady(SU);
3625 
3626  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3627  << *SU->getInstr());
3628  return SU;
3629 }
3630 
3631 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3632 /// scheduled/remaining flags in the DAG nodes.
3633 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3634  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3635  Top.bumpNode(SU);
3636 }
3637 
3639  return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
3640  /*RemoveKillFlags=*/true);
3641 }
3642 
3643 //===----------------------------------------------------------------------===//
3644 // ILP Scheduler. Currently for experimental analysis of heuristics.
3645 //===----------------------------------------------------------------------===//
3646 
3647 namespace {
3648 
3649 /// Order nodes by the ILP metric.
3650 struct ILPOrder {
3651  const SchedDFSResult *DFSResult = nullptr;
3652  const BitVector *ScheduledTrees = nullptr;
3653  bool MaximizeILP;
3654 
3655  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3656 
3657  /// Apply a less-than relation on node priority.
3658  ///
3659  /// (Return true if A comes after B in the Q.)
3660  bool operator()(const SUnit *A, const SUnit *B) const {
3661  unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3662  unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3663  if (SchedTreeA != SchedTreeB) {
3664  // Unscheduled trees have lower priority.
3665  if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3666  return ScheduledTrees->test(SchedTreeB);
3667 
3668  // Trees with shallower connections have have lower priority.
3669  if (DFSResult->getSubtreeLevel(SchedTreeA)
3670  != DFSResult->getSubtreeLevel(SchedTreeB)) {
3671  return DFSResult->getSubtreeLevel(SchedTreeA)
3672  < DFSResult->getSubtreeLevel(SchedTreeB);
3673  }
3674  }
3675  if (MaximizeILP)
3676  return DFSResult->getILP(A) < DFSResult->getILP(B);
3677  else
3678  return DFSResult->getILP(A) > DFSResult->getILP(B);
3679  }
3680 };
3681 
3682 /// Schedule based on the ILP metric.
3683 class ILPScheduler : public MachineSchedStrategy {
3684  ScheduleDAGMILive *DAG = nullptr;
3685  ILPOrder Cmp;
3686 
3687  std::vector<SUnit*> ReadyQ;
3688 
3689 public:
3690  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3691 
3692  void initialize(ScheduleDAGMI *dag) override {
3693  assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3694  DAG = static_cast<ScheduleDAGMILive*>(dag);
3695  DAG->computeDFSResult();
3696  Cmp.DFSResult = DAG->getDFSResult();
3697  Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3698  ReadyQ.clear();
3699  }
3700 
3701  void registerRoots() override {
3702  // Restore the heap in ReadyQ with the updated DFS results.
3703  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3704  }
3705 
3706  /// Implement MachineSchedStrategy interface.
3707  /// -----------------------------------------
3708 
3709  /// Callback to select the highest priority node from the ready Q.
3710  SUnit *pickNode(bool &IsTopNode) override {
3711  if (ReadyQ.empty()) return nullptr;
3712  std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3713  SUnit *SU = ReadyQ.back();
3714  ReadyQ.pop_back();
3715  IsTopNode = false;
3716  LLVM_DEBUG(dbgs() << "Pick node "
3717  << "SU(" << SU->NodeNum << ") "
3718  << " ILP: " << DAG->getDFSResult()->getILP(SU)
3719  << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3720  << " @"
3721  << DAG->getDFSResult()->getSubtreeLevel(
3722  DAG->getDFSResult()->getSubtreeID(SU))
3723  << '\n'
3724  << "Scheduling " << *SU->getInstr());
3725  return SU;
3726  }
3727 
3728  /// Scheduler callback to notify that a new subtree is scheduled.
3729  void scheduleTree(unsigned SubtreeID) override {
3730  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3731  }
3732 
3733  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3734  /// DFSResults, and resort the priority Q.
3735  void schedNode(SUnit *SU, bool IsTopNode) override {
3736  assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3737  }
3738 
3739  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3740 
3741  void releaseBottomNode(SUnit *SU) override {
3742  ReadyQ.push_back(SU);
3743  std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3744  }
3745 };
3746 
3747 } // end anonymous namespace
3748 
3750  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
3751 }
3753  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
3754 }
3755 
3757  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3759  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3760 
3761 //===----------------------------------------------------------------------===//
3762 // Machine Instruction Shuffler for Correctness Testing
3763 //===----------------------------------------------------------------------===//
3764 
3765 #ifndef NDEBUG
3766 namespace {
3767 
3768 /// Apply a less-than relation on the node order, which corresponds to the
3769 /// instruction order prior to scheduling. IsReverse implements greater-than.
3770 template<bool IsReverse>
3771 struct SUnitOrder {
3772  bool operator()(SUnit *A, SUnit *B) const {
3773  if (IsReverse)
3774  return A->NodeNum > B->NodeNum;
3775  else
3776  return A->NodeNum < B->NodeNum;
3777  }
3778 };
3779 
3780 /// Reorder instructions as much as possible.
3781 class InstructionShuffler : public MachineSchedStrategy {
3782  bool IsAlternating;
3783  bool IsTopDown;
3784 
3785  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3786  // gives nodes with a higher number higher priority causing the latest
3787  // instructions to be scheduled first.
3788  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3789  TopQ;
3790 
3791  // When scheduling bottom-up, use greater-than as the queue priority.
3793  BottomQ;
3794 
3795 public:
3796  InstructionShuffler(bool alternate, bool topdown)
3797  : IsAlternating(alternate), IsTopDown(topdown) {}
3798 
3799  void initialize(ScheduleDAGMI*) override {
3800  TopQ.clear();
3801  BottomQ.clear();
3802  }
3803 
3804  /// Implement MachineSchedStrategy interface.
3805  /// -----------------------------------------
3806 
3807  SUnit *pickNode(bool &IsTopNode) override {
3808  SUnit *SU;
3809  if (IsTopDown) {
3810  do {
3811  if (TopQ.empty()) return nullptr;
3812  SU = TopQ.top();
3813  TopQ.pop();
3814  } while (SU->isScheduled);
3815  IsTopNode = true;
3816  } else {
3817  do {
3818  if (BottomQ.empty()) return nullptr;
3819  SU = BottomQ.top();
3820  BottomQ.pop();
3821  } while (SU->isScheduled);
3822  IsTopNode = false;
3823  }
3824  if (IsAlternating)
3825  IsTopDown = !IsTopDown;
3826  return SU;
3827  }
3828 
3829  void schedNode(SUnit *SU, bool IsTopNode) override {}
3830 
3831  void releaseTopNode(SUnit *SU) override {
3832  TopQ.push(SU);
3833  }
3834  void releaseBottomNode(SUnit *SU) override {
3835  BottomQ.push(SU);
3836  }
3837 };
3838 
3839 } // end anonymous namespace
3840 
3842  bool Alternate = !ForceTopDown && !ForceBottomUp;
3843  bool TopDown = !ForceBottomUp;
3844  assert((TopDown || !ForceTopDown) &&
3845  "-misched-topdown incompatible with -misched-bottomup");
3846  return new ScheduleDAGMILive(
3847  C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
3848 }
3849 
3851  "shuffle", "Shuffle machine instructions alternating directions",
3853 #endif // !NDEBUG
3854 
3855 //===----------------------------------------------------------------------===//
3856 // GraphWriter support for ScheduleDAGMILive.
3857 //===----------------------------------------------------------------------===//
3858 
3859 #ifndef NDEBUG
3860 namespace llvm {
3861 
3862 template<> struct GraphTraits<
3864 
3865 template<>
3868 
3869  static std::string getGraphName(const ScheduleDAG *G) {
3870  return std::string(G->MF.getName());
3871  }
3872 
3873  static bool renderGraphFromBottomUp() {
3874  return true;
3875  }
3876 
3877  static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
3878  if (ViewMISchedCutoff == 0)
3879  return false;
3880  return (Node->Preds.size() > ViewMISchedCutoff
3881  || Node->Succs.size() > ViewMISchedCutoff);
3882  }
3883 
3884  /// If you want to override the dot attributes printed for a particular
3885  /// edge, override this method.
3886  static std::string getEdgeAttributes(const SUnit *Node,
3887  SUnitIterator EI,
3888  const ScheduleDAG *Graph) {
3889  if (EI.isArtificialDep())
3890  return "color=cyan,style=dashed";
3891  if (EI.isCtrlDep())
3892  return "color=blue,style=dashed";
3893  return "";
3894  }
3895 
3896  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3897  std::string Str;
3898  raw_string_ostream SS(Str);
3899  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3900  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3901  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3902  SS << "SU:" << SU->NodeNum;
3903  if (DFS)
3904  SS << " I:" << DFS->getNumInstrs(SU);
3905  return SS.str();
3906  }
3907 
3908  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3909  return G->getGraphNodeLabel(SU);
3910  }
3911 
3912  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3913  std::string Str("shape=Mrecord");
3914  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3915  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3916  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3917  if (DFS) {
3918  Str += ",style=filled,fillcolor=\"#";
3919  Str += DOT::getColorString(DFS->getSubtreeID(N));
3920  Str += '"';
3921  }
3922  return Str;
3923  }
3924 };
3925 
3926 } // end namespace llvm
3927 #endif // NDEBUG
3928 
3929 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3930 /// rendered using 'dot'.
3931 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3932 #ifndef NDEBUG
3933  ViewGraph(this, Name, false, Title);
3934 #else
3935  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3936  << "systems with Graphviz or gv!\n";
3937 #endif // NDEBUG
3938 }
3939 
3940 /// Out-of-line implementation with no arguments is handy for gdb.
3942  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3943 }
llvm::ScheduleDAGMILive::updateScheduledPressure
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
Definition: MachineScheduler.cpp:1087
llvm::ScheduleDAGInstrs::FirstDbgValue
MachineInstr * FirstDbgValue
Definition: ScheduleDAGInstrs.h:249
llvm::LiveRange::find
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
Definition: LiveInterval.cpp:350
ForceFastCluster
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))
i
i
Definition: README.txt:29
llvm::ScheduleDAGMI::updateQueues
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
Definition: MachineScheduler.cpp:895
llvm::PressureChange::getPSetOrMax
unsigned getPSetOrMax() const
Definition: RegisterPressure.h:121
llvm::SparseMultiSet::setUniverse
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
Definition: SparseMultiSet.h:202
llvm::PriorityQueue::clear
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:75
llvm::GenericSchedulerBase::SchedResourceDelta::DemandedResources
unsigned DemandedResources
Definition: MachineScheduler.h:845
llvm::TargetSchedModel::mustEndGroup
bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if current group must end.
Definition: TargetSchedule.cpp:96
llvm::GenericSchedulerBase::Weak
@ Weak
Definition: MachineScheduler.h:813
llvm::ScheduleDAG::MRI
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:561
llvm::GenericSchedulerBase::CandReason
CandReason
Represent the type of SchedCandidate found within a single queue.
Definition: MachineScheduler.h:812
DefaultSchedRegistry
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
isSchedBoundary
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
Definition: MachineScheduler.cpp:459
llvm::GenericSchedulerBase::TRI
const TargetRegisterInfo * TRI
Definition: MachineScheduler.h:909
llvm::SchedRemainder::reset
void reset()
Definition: MachineScheduler.h:594
llvm::MachineSchedContext::~MachineSchedContext
virtual ~MachineSchedContext()
Definition: MachineScheduler.cpp:158
llvm::TargetSchedModel::getWriteProcResBegin
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
Definition: TargetSchedule.h:134
llvm::HexagonInstrInfo::isSchedulingBoundary
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
Definition: HexagonInstrInfo.cpp:1706
llvm::SchedRemainder::RemIssueCount
unsigned RemIssueCount
Definition: MachineScheduler.h:585
MISchedCutoff
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
llvm::MCProcResourceDesc::BufferSize
int BufferSize
Definition: MCSchedule.h:49
llvm::ScheduleDAGMILive::~ScheduleDAGMILive
~ScheduleDAGMILive() override
Definition: MachineScheduler.cpp:944
llvm::RegisterOperands::detectDeadDefs
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Definition: RegisterPressure.cpp:581
ScheduleDAG.h
llvm::GenericSchedulerBase::SchedCandidate::setBest
void setBest(SchedCandidate &Best)
Definition: MachineScheduler.h:893
llvm::ScheduleDAGMI::viewGraph
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
Definition: MachineScheduler.cpp:3941
llvm::SchedDFSResult::scheduleTree
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
Definition: ScheduleDAGInstrs.cpp:1500
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
MachineInstr.h
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:499
llvm::ReadyQueue::dump
void dump() const
Definition: MachineScheduler.cpp:616
llvm
Definition: AllocatorList.h:23
llvm::GenericScheduler::tryCandidate
virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
Definition: MachineScheduler.cpp:3180
llvm::GenericSchedulerBase::NodeOrder
@ NodeOrder
Definition: MachineScheduler.h:815
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::SchedBoundary::HazardRec
ScheduleHazardRecognizer * HazardRec
Definition: MachineScheduler.h:624
llvm::ScheduleDAGMILive::scheduleMI
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
Definition: MachineScheduler.cpp:1406
llvm::RegPressureTracker::addLiveRegs
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
Definition: RegisterPressure.cpp:707
llvm::SUnit::NumPredsLeft
unsigned NumPredsLeft
Definition: ScheduleDAG.h:268
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
TargetFrameLowering.h
llvm::RegPressureTracker::init
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
Definition: RegisterPressure.cpp:263
llvm::TargetFrameLowering
Information about stack frame layout on the target.
Definition: TargetFrameLowering.h:42
llvm::ReadyQueue::isInQueue
bool isInQueue(SUnit *SU) const
Definition: MachineScheduler.h:544
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::SDep::Artificial
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
llvm::ReadyQueue::find
iterator find(SUnit *SU)
Definition: MachineScheduler.h:560
llvm::printVRegOrUnit
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
Definition: TargetRegisterInfo.cpp:164
llvm::DOTGraphTraits< ScheduleDAGMI * >::getGraphName
static std::string getGraphName(const ScheduleDAG *G)
Definition: MachineScheduler.cpp:3869
llvm::GenericSchedulerBase::Cluster
@ Cluster
Definition: MachineScheduler.h:813
llvm::Latency
@ Latency
Definition: SIMachineScheduler.h:32
createILPMinScheduler
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
Definition: MachineScheduler.cpp:3752
ILPMinRegistry
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
llvm::MachineSchedContext::RegClassInfo
RegisterClassInfo * RegClassInfo
Definition: MachineScheduler.h:128
MachinePassRegistry.h
llvm::SchedBoundary::getCriticalCount
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
Definition: MachineScheduler.h:732
llvm::MachineSchedStrategy::initialize
virtual void initialize(ScheduleDAGMI *DAG)=0
Initialize the strategy after building the DAG for a new region.
llvm::SchedBoundary::DAG
ScheduleDAGMI * DAG
Definition: MachineScheduler.h:617
llvm::ScheduleDAGInstrs::buildSchedGraph
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
Definition: ScheduleDAGInstrs.cpp:730
llvm::ScheduleDAGInstrs::begin
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
Definition: ScheduleDAGInstrs.h:277
llvm::SparseMultiSet::find
iterator find(const KeyT &Key)
Find an element by its key.
Definition: SparseMultiSet.h:375
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:343
llvm::RegisterOperands
List of registers defined and used by a machine instruction.
Definition: RegisterPressure.h:167
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:92
llvm::BitVector::clear
void clear()
clear - Removes all bits from the bitvector.
Definition: BitVector.h:327
llvm::GenericSchedulerBase::Stall
@ Stall
Definition: MachineScheduler.h:813
llvm::TargetSchedModel::getResourceFactor
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
Definition: TargetSchedule.h:144
ScheduleDAGInstrs.h
llvm::PressureDiff
List of PressureChanges in order of increasing, unique PSetID.
Definition: RegisterPressure.h:141
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:614
llvm::GenericSchedulerBase::SchedCandidate::reset
void reset(const CandPolicy &NewPolicy)
Definition: MachineScheduler.h:881
llvm::MachineInstr::mayLoad
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:994
llvm::SchedRemainder::init
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Definition: MachineScheduler.cpp:2020
llvm::SchedBoundary::getNextResourceCycle
std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles)
Compute the next cycle at which the given processor resource can be scheduled.
Definition: MachineScheduler.cpp:2102
llvm::MachineSchedRegistry
MachineSchedRegistry provides a selection of available machine instruction schedulers.
Definition: MachineScheduler.h:136
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::SchedBoundary::Pending
ReadyQueue Pending
Definition: MachineScheduler.h:622
llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:118
llvm::GenericSchedulerBase::SchedModel
const TargetSchedModel * SchedModel
Definition: MachineScheduler.h:908
llvm::ForceTopDown
cl::opt< bool > ForceTopDown
llvm::ReadyQueue::begin
iterator begin()
Definition: MachineScheduler.h:554
llvm::LiveIntervals::getInstructionFromIndex
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
Definition: LiveIntervals.h:231
llvm::SparseMultiSet::clear
void clear()
Clears the set.
Definition: SparseMultiSet.h:342
ErrorHandling.h
llvm::tryGreater
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Definition: MachineScheduler.cpp:2837
llvm::ScheduleDAGMI::hasVRegLiveness
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
Definition: MachineScheduler.h:311
llvm::GenericSchedulerBase::SchedCandidate::RPDelta
RegPressureDelta RPDelta
Definition: MachineScheduler.h:873
llvm::SUnit::NumSuccsLeft
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:269
llvm::SDep::Anti
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
ReadyListLimit
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::PostGenericScheduler::initialize
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
Definition: MachineScheduler.cpp:3509
InvalidCycle
static const unsigned InvalidCycle
Definition: MachineScheduler.cpp:1968
llvm::GenericSchedulerBase::RegCritical
@ RegCritical
Definition: MachineScheduler.h:813
llvm::SUnit::isCall
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
llvm::MCProcResourceDesc::NumUnits
unsigned NumUnits
Definition: MCSchedule.h:34
RegisterClassInfo.h
MachineBasicBlock.h
llvm::ScheduleDAGMILive::SUPressureDiffs
PressureDiffs SUPressureDiffs
Definition: MachineScheduler.h:402
priorNonDebug
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
Definition: MachineScheduler.cpp:296
llvm::DOTGraphTraits< ScheduleDAGMI * >::getNodeAttributes
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
Definition: MachineScheduler.cpp:3912
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::ScheduleDAGMILive::initRegPressure
void initRegPressure()
Definition: MachineScheduler.cpp:1012
llvm::ScheduleDAGMI::releasePredecessors
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
Definition: MachineScheduler.cpp:702
llvm::VNInfo::def
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
llvm::MemOp
Definition: TargetLowering.h:111
llvm::ScheduleDAGInstrs::addEdge
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
Definition: ScheduleDAGInstrs.cpp:1207
llvm::ScheduleDAGMI::postprocessDAG
void postprocessDAG()
Apply each ScheduleDAGMutation step in order.
Definition: MachineScheduler.cpp:840
getSchedRegions
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
Definition: MachineScheduler.cpp:487
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:231
llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition: RegisterPressure.h:358
tracePick
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
Definition: MachineScheduler.cpp:2887
DenseMap.h
llvm::GenericSchedulerBase::setPolicy
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
Definition: MachineScheduler.cpp:2687
llvm::GenericScheduler::registerRoots
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Definition: MachineScheduler.cpp:3026
llvm::BitVector::resize
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:333
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:329
llvm::PressureDiffs::clear
void clear()
Definition: RegisterPressure.h:207
TargetInstrInfo.h
llvm::GenericScheduler::initialize
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
Definition: MachineScheduler.cpp:2896
Groups
static const X86InstrFMA3Group Groups[]
Definition: X86InstrFMA3Info.cpp:65
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
llvm::RegisterMaskPair
Definition: RegisterPressure.h:39
llvm::ScheduleDAGMI::moveInstruction
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
Definition: MachineScheduler.cpp:733
llvm::ScheduleDAGInstrs::startBlock
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
Definition: ScheduleDAGInstrs.cpp:178
llvm::initializePostMachineSchedulerPass
void initializePostMachineSchedulerPass(PassRegistry &)
llvm::MachineSchedRegistry::Registry
static MachinePassRegistry< ScheduleDAGCtor > Registry
Definition: MachineScheduler.h:145
llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
llvm::GenericSchedulerBase::SchedCandidate::Reason
CandReason Reason
Definition: MachineScheduler.h:867
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1279
llvm::SchedBoundary::countResource
unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles, unsigned ReadyCycle)
Add the given processor resource to this scheduled zone.
Definition: MachineScheduler.cpp:2344
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::MachineRegisterInfo::getNumVirtRegs
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
Definition: MachineRegisterInfo.h:757
llvm::ScheduleDAGMILive::getPressureDiff
PressureDiff & getPressureDiff(const SUnit *SU)
Definition: MachineScheduler.h:457
EnableMachineSched
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
llvm::SUnit::getDepth
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:892
llvm::ScheduleDAGMILive::computeDFSResult
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
Definition: MachineScheduler.cpp:1299
llvm::MCWriteProcResEntry
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
llvm::ViewGraph
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:375
llvm::ScheduleDAGMI::CurrentBottom
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
Definition: MachineScheduler.h:279
llvm::GenericSchedulerBase::ResourceDemand
@ ResourceDemand
Definition: MachineScheduler.h:814
llvm::ScheduleDAGMI::checkSchedLimit
bool checkSchedLimit()
Definition: MachineScheduler.cpp:751
STLExtras.h
llvm::RegPressureTracker::closeRegion
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
Definition: RegisterPressure.cpp:343
llvm::GenericScheduler::initCandidate
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
Definition: MachineScheduler.cpp:3133
llvm::ScheduleDAGMILive::TopRPTracker
RegPressureTracker TopRPTracker
Definition: MachineScheduler.h:417
llvm::MachineSchedStrategy::releaseTopNode
virtual void releaseTopNode(SUnit *SU)=0
When all predecessor dependencies have been resolved, free this node for top-down scheduling.
llvm::ScheduleDAGInstrs::SchedModel
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
Definition: ScheduleDAGInstrs.h:125
nextIfDebug
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
Definition: MachineScheduler.cpp:317
llvm::LiveQueryResult
Result of a LiveRange query.
Definition: LiveInterval.h:90
llvm::ScheduleDAGMI::releaseSucc
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
Definition: MachineScheduler.cpp:637
llvm::TargetSchedModel::getInstrItineraries
const InstrItineraryData * getInstrItineraries() const
Definition: TargetSchedule.h:83
llvm::MachineSchedStrategy::schedNode
virtual void schedNode(SUnit *SU, bool IsTopNode)=0
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
llvm::DOTGraphTraits< ScheduleDAGMI * >::getEdgeAttributes
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method.
Definition: MachineScheduler.cpp:3886
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::SUnit::isTopReady
bool isTopReady() const
Definition: ScheduleDAG.h:446
llvm::PostGenericScheduler::tryCandidate
virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
Definition: MachineScheduler.cpp:3546
llvm::SchedDFSResult::getSubtreeID
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:169
llvm::ScheduleDAGMI::enterRegion
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
Definition: MachineScheduler.cpp:721
llvm::ScheduleDAGInstrs::end
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
Definition: ScheduleDAGInstrs.h:280
llvm::SchedBoundary::Rem
SchedRemainder * Rem
Definition: MachineScheduler.h:619
llvm::ScheduleDAGMI::releaseSuccessors
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
Definition: MachineScheduler.cpp:665
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
RegisterPressure.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: MachineScheduler.cpp:75
llvm::LiveIntervals::getInstructionIndex
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
Definition: LiveIntervals.h:226
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:180
llvm::ScheduleDAGMILive::initQueues
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
Definition: MachineScheduler.cpp:1396
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
llvm::SchedDFSResult::clear
void clear()
Clear the results.
Definition: ScheduleDFS.h:128
llvm::TargetSchedModel::getProcResource
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
Definition: TargetSchedule.h:118
MachineRegisterInfo.h
llvm::ReadyQueue::remove
iterator remove(iterator I)
Definition: MachineScheduler.h:567
llvm::ScheduleDAGMI::Mutations
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Definition: MachineScheduler.h:273
llvm::LiveIntervals::handleMove
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
Definition: LiveIntervals.cpp:1507
llvm::LiveRange::beginIndex
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:377
llvm::ScheduleDAGMILive::RegionCriticalPSets
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
Definition: MachineScheduler.h:413
AliasAnalysis.h
llvm::SDep::isArtificial
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
MachineValueType.h
llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:32
llvm::GenericScheduler::dumpPolicy
void dumpPolicy() const override
Definition: MachineScheduler.cpp:2978
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::SchedRemainder::CriticalPath
unsigned CriticalPath
Definition: MachineScheduler.h:581
llvm::tgtok::Dag
@ Dag
Definition: TGLexer.h:50
llvm::ScheduleDAGMILive::dump
void dump() const override
Definition: MachineScheduler.cpp:1183
CommandLine.h
llvm::SUnitIterator::isCtrlDep
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:657
llvm::RegPressureTracker::setPos
void setPos(MachineBasicBlock::const_iterator Pos)
Definition: RegisterPressure.h:420
TargetLowering.h
llvm::GenericScheduler::reschedulePhysReg
void reschedulePhysReg(SUnit *SU, bool isTop)
Definition: MachineScheduler.cpp:3438
MinSubtreeSize
static const unsigned MinSubtreeSize
Definition: MachineScheduler.cpp:143
llvm::SchedBoundary::isTop
bool isTop() const
Definition: MachineScheduler.h:702
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:40
bb
< i1 > br i1 label label bb bb
Definition: README.txt:978
llvm::MachineSchedStrategy
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
Definition: MachineScheduler.h:201
llvm::SUnit::BotReadyCycle
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:300
llvm::X86AS::SS
@ SS
Definition: X86.h:189
llvm::SDep::Weak
@ Weak
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:73
ScheduleHazardRecognizer.h
llvm::DOT::getColorString
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:70
checkResourceLimit
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
Definition: MachineScheduler.cpp:1976
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
llvm::cl::apply
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1308
llvm::TargetSchedModel::getIssueWidth
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
Definition: TargetSchedule.h:99
MachineLoopInfo.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::PPCISD::SC
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Definition: PPCISelLowering.h:410
llvm::SUnit::isBottomReady
bool isBottomReady() const
Definition: ScheduleDAG.h:449
llvm::SUnit::isUnbuffered
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288
llvm::LiveRange::begin
iterator begin()
Definition: LiveInterval.h:215
llvm::SchedBoundary::~SchedBoundary
~SchedBoundary()
Definition: MachineScheduler.cpp:1970
llvm::RegPressureDelta::CurrentMax
PressureChange CurrentMax
Definition: RegisterPressure.h:242
EnableRegPressure
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
llvm::SchedBoundary::getExecutedCount
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
Definition: MachineScheduler.h:741
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::RegPressureTracker::getUpwardPressureDelta
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
Definition: RegisterPressure.cpp:1163
llvm::DstOp::getReg
Register getReg() const
Definition: MachineIRBuilder.h:99
llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::ScheduleDAGMI::NextClusterPred
const SUnit * NextClusterPred
Record the next node in a scheduled cluster.
Definition: MachineScheduler.h:282
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::tryLatency
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
Definition: MachineScheduler.cpp:2853
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3150
llvm::GenericSchedulerBase::Only1
@ Only1
Definition: MachineScheduler.h:813
llvm::SchedRemainder::IsAcyclicLatencyLimited
bool IsAcyclicLatencyLimited
Definition: MachineScheduler.h:587
llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
llvm::MachineBasicBlock::isSuccessor
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
Definition: MachineBasicBlock.cpp:933
llvm::MCSchedClassDesc
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:110
llvm::MachineSchedStrategy::releaseBottomNode
virtual void releaseBottomNode(SUnit *SU)=0
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::LaneBitmask::getNone
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:82
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ScheduleDAGMI::findRootsAndBiasEdges
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
Definition: MachineScheduler.cpp:846
SchedOnlyBlock
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
llvm::createCopyConstrainDAGMutation
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Definition: MachineScheduler.cpp:1794
llvm::SchedDFSResult::getSubtreeLevel
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:180
llvm::GenericSchedulerBase::RegMax
@ RegMax
Definition: MachineScheduler.h:814
llvm::ScheduleDAGInstrs::getSUnit
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
Definition: ScheduleDAGInstrs.h:390
llvm::RegPressureTracker::getLiveThru
ArrayRef< unsigned > getLiveThru() const
Definition: RegisterPressure.h:455
false
Definition: StackSlotColoring.cpp:142
llvm::SchedBoundary::reset
void reset()
Definition: MachineScheduler.cpp:1985
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::ScheduleDAGMILive::updatePressureDiffs
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
Definition: MachineScheduler.cpp:1115
llvm::createGenericSchedPostRA
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
Definition: MachineScheduler.cpp:3638
llvm::ScheduleDAGMILive::getScheduledTrees
BitVector & getScheduledTrees()
Definition: MachineScheduler.h:471
llvm::GenericSchedulerBase::CandPolicy
Policy for scheduling the next instruction in the candidate's zone.
Definition: MachineScheduler.h:822
llvm::Instruction
Definition: Instruction.h:45
ShufflerRegistry
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
llvm::ScheduleDAGMI::addMutation
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
Definition: MachineScheduler.h:318
llvm::SchedBoundary::getCurrCycle
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
Definition: MachineScheduler.h:707
createILPMaxScheduler
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
Definition: MachineScheduler.cpp:3749
llvm::RegisterPressure::LiveOutRegs
SmallVector< RegisterMaskPair, 8 > LiveOutRegs
Definition: RegisterPressure.h:54
llvm::RegPressureTracker::initLiveThru
void initLiveThru(const RegPressureTracker &RPTracker)
Initialize the LiveThru pressure set based on the untied defs found in RPTracker.
Definition: RegisterPressure.cpp:359
llvm::SchedRemainder::CyclicCritPath
unsigned CyclicCritPath
Definition: MachineScheduler.h:582
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:50
llvm::PrintLaneMask
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:93
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:606
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:404
BitVector.h
llvm::SlotIndexes
SlotIndexes pass.
Definition: SlotIndexes.h:314
llvm::ScheduleDAGMI::startBlock
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
Definition: MachineScheduler.cpp:707
llvm::ArrayRef::rbegin
reverse_iterator rbegin() const
Definition: ArrayRef.h:154
llvm::SDep::Data
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
llvm::RegPressureDelta::Excess
PressureChange Excess
Definition: RegisterPressure.h:240
llvm::ScheduleDAGInstrs::RegionEnd
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
Definition: ScheduleDAGInstrs.h:151
llvm::TargetSchedModel::getWriteProcResEnd
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
Definition: TargetSchedule.h:138
llvm::BitVector
Definition: BitVector.h:74
llvm::SchedDFSResult::getNumSubtrees
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:163
ViewMISchedDAGs
static cl::opt< bool > ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed"))
llvm::LiveInterval
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:680
llvm::LiveQueryResult::valueIn
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
llvm::ReadyQueue::push
void push(SUnit *SU)
Definition: MachineScheduler.h:562
llvm::SlotIndex
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_END(MachineScheduler
llvm::VReg2SUnit
An individual mapping from virtual register number to SUnit.
Definition: ScheduleDAGInstrs.h:52
llvm::RegisterPassParser
RegisterPassParser class - Handle the addition of new machine passes.
Definition: MachinePassRegistry.h:135
llvm::SchedBoundary::dumpScheduledState
void dumpScheduledState() const
Definition: MachineScheduler.cpp:2595
llvm::ScheduleDAGMI::initQueues
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
Definition: MachineScheduler.cpp:865
llvm::ArrayRef::rend
reverse_iterator rend() const
Definition: ArrayRef.h:155
llvm::SchedBoundary::releasePending
void releasePending()
Release pending ready nodes in to the available queue.
Definition: MachineScheduler.cpp:2523
llvm::RegPressureTracker::dump
void dump() const
Definition: RegisterPressure.cpp:117
llvm::GenericSchedulerBase::SchedCandidate::SU
SUnit * SU
Definition: MachineScheduler.h:864
llvm::ScheduleDAGMILive::RPTracker
RegPressureTracker RPTracker
Definition: MachineScheduler.h:408
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::GenericScheduler::initPolicy
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
Definition: MachineScheduler.cpp:2930
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::RegisterOperands::collect
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Definition: RegisterPressure.cpp:570
llvm::RegisterClassInfo::getNumAllocatableRegs
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
Definition: RegisterClassInfo.h:92
llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
llvm::DOTGraphTraits
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
Definition: DOTGraphTraits.h:161
llvm::ScheduleDAGMI::NumInstrsScheduled
unsigned NumInstrsScheduled
The number of instructions scheduled so far.
Definition: MachineScheduler.h:288
llvm::SUnit::getHeight
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
llvm::SUnit::TopReadyCycle
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:299
llvm::ScheduleDAGMILive::RegClassInfo
RegisterClassInfo * RegClassInfo
Definition: MachineScheduler.h:387
Passes.h
llvm::initializeMachineSchedulerPass
void initializeMachineSchedulerPass(PassRegistry &)
llvm::SchedBoundary::getZoneCritResIdx
unsigned getZoneCritResIdx() const
Definition: MachineScheduler.h:746
llvm::GenericSchedulerBase::Rem
SchedRemainder Rem
Definition: MachineScheduler.h:911
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::SparseMultiSet::end
iterator end()
Returns an iterator past this container.
Definition: SparseMultiSet.h:319
llvm::ReadyQueue::end
iterator end()
Definition: MachineScheduler.h:556
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
llvm::tryLess
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
Definition: MachineScheduler.cpp:2821
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:558
llvm::RegPressureTracker::getRegSetPressureAtPos
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
Definition: RegisterPressure.h:464
llvm::RegPressureTracker::recedeSkipDebugValues
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
Definition: RegisterPressure.cpp:853
llvm::cl::opt< bool >
llvm::GenericSchedulerBase::SchedCandidate
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
Definition: MachineScheduler.h:860
llvm::GenericSchedulerBase::NoCand
@ NoCand
Definition: MachineScheduler.h:813
llvm::SparseMultiSet::insert
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
Definition: SparseMultiSet.h:419
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:179
llvm::ScheduleDAGMI::LIS
LiveIntervals * LIS
Definition: MachineScheduler.h:269
llvm::SUnit::WeakSuccsLeft
unsigned WeakSuccsLeft
Definition: ScheduleDAG.h:271
llvm::LiveRange::getVNInfoBefore
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx,...
Definition: LiveInterval.h:421
TargetSchedule.h
llvm::RegPressureTracker::closeBottom
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
Definition: RegisterPressure.cpp:331
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:31
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::TargetSubtargetInfo::enablePostRAMachineScheduler
virtual bool enablePostRAMachineScheduler() const
True if the subtarget should run a machine scheduler after register allocation.
Definition: TargetSubtargetInfo.cpp:56
LiveIntervals.h
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:889
llvm::GenericSchedulerBase::CandPolicy::DemandResIdx
unsigned DemandResIdx
Definition: MachineScheduler.h:825
llvm::createGenericSchedLive
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
Definition: MachineScheduler.cpp:3485
llvm::LiveIntervals::getMBBEndIdx
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
Definition: LiveIntervals.h:241
llvm::SchedBoundary::SchedModel
const TargetSchedModel * SchedModel
Definition: MachineScheduler.h:618
llvm::RegPressureTracker::reset
void reset()
Definition: RegisterPressure.cpp:243
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::MachineSchedStrategy::registerRoots
virtual void registerRoots()
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Definition: MachineScheduler.h:239
llvm::MachineSchedStrategy::scheduleTree
virtual void scheduleTree(unsigned SubtreeID)
Scheduler callback to notify that a new subtree is scheduled.
Definition: MachineScheduler.h:247
llvm::SchedBoundary::init
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
Definition: MachineScheduler.cpp:2040
llvm::GenericSchedulerBase::Context
const MachineSchedContext * Context
Definition: MachineScheduler.h:907
llvm::tryPressure
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
Definition: MachineScheduler.cpp:3046
llvm::SchedDFSResult::resize
void resize(unsigned NumSUnits)
Initialize the result data with the size of the DAG.
Definition: ScheduleDFS.h:136
llvm::ScheduleDAGInstrs::IsReachable
bool IsReachable(SUnit *SU, SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
Definition: ScheduleDAGInstrs.h:272
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::PostGenericScheduler::registerRoots
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Definition: MachineScheduler.cpp:3528
llvm::DenseMap
Definition: DenseMap.h:714
llvm::codeview::FrameCookieKind::Copy
@ Copy
llvm::operator<
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:343
llvm::PressureChange::isValid
bool isValid() const
Definition: RegisterPressure.h:113
llvm::SchedBoundary::getLatencyStallCycles
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
Definition: MachineScheduler.cpp:2074
ViewMISchedCutoff
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...
llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition: MachineScheduler.h:120
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::RegPressureTracker::closeTop
void closeTop()
Set the boundary for the top of the region and summarize live ins.
Definition: RegisterPressure.cpp:319
PriorityQueue.h
llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
llvm::SDep::getReg
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
llvm::SparseMultiSet< VReg2SUnit, VirtReg2IndexFunctor >::iterator
iterator_base< SparseMultiSet * > iterator
Definition: SparseMultiSet.h:311
llvm::RegPressureTracker::advance
void advance()
Advance across the current instruction.
Definition: RegisterPressure.cpp:948
llvm::SUnit::isScheduled
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::SchedBoundary::getCurrMOps
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
Definition: MachineScheduler.h:710
llvm::PressureChange::getUnitInc
int getUnitInc() const
Definition: RegisterPressure.h:125
llvm::SchedBoundary::incExecutedResources
void incExecutedResources(unsigned PIdx, unsigned Count)
Definition: MachineScheduler.cpp:2331
llvm::SlotIndex::isSameInstr
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:197
llvm::DOTGraphTraits< ScheduleDAGMI * >::DOTGraphTraits
DOTGraphTraits(bool isSimple=false)
Definition: MachineScheduler.cpp:3867
ArrayRef.h
llvm::LiveRange::isLocal
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries,...
Definition: LiveInterval.h:510
llvm::LiveRange::Query
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:533
TargetPassConfig.h
llvm::ScheduleDAGMI::getLIS
LiveIntervals * getLIS() const
Definition: MachineScheduler.h:308
llvm::SchedRemainder
Summarize the unscheduled region.
Definition: MachineScheduler.h:579
MachineFunctionPass.h
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::SUnit::hasPhysRegUses
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:279
llvm::RegPressureTracker::getPressure
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
Definition: RegisterPressure.h:459
llvm::ScheduleDAGMILive::DFSResult
SchedDFSResult * DFSResult
Information about DAG subtrees.
Definition: MachineScheduler.h:391
llvm::TargetRegisterInfo::getRegPressureSetName
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
llvm::RegPressureDelta::CriticalMax
PressureChange CriticalMax
Definition: RegisterPressure.h:241
llvm::SUnit::biasCriticalPath
void biasCriticalPath()
Orders this node's predecessor edges such that the critical path edge occurs first.
Definition: ScheduleDAG.cpp:325
llvm::RegisterPressure::MaxSetPressure
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
Definition: RegisterPressure.h:50
ILPMaxRegistry
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
llvm::RegPressureTracker::getMaxDownwardPressureDelta
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
Definition: RegisterPressure.cpp:1344
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::RegisterPressure::LiveInRegs
SmallVector< RegisterMaskPair, 8 > LiveInRegs
List of live in virtual registers or physical register units.
Definition: RegisterPressure.h:53
llvm::ScheduleDAGMILive::ScheduledTrees
BitVector ScheduledTrees
Definition: MachineScheduler.h:392
llvm::ScheduleDAGMI::dumpSchedule
void dumpSchedule() const
dump the scheduled Sequence.
Definition: MachineScheduler.cpp:929
llvm::RegisterClassInfo
Definition: RegisterClassInfo.h:30
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::ScheduleDAGInstrs::canAddEdge
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
Definition: ScheduleDAGInstrs.cpp:1203
llvm::ScheduleDAGInstrs::TrackLaneMasks
bool TrackLaneMasks
Whether lane masks should get tracked.
Definition: ScheduleDAGInstrs.h:139
llvm::ScheduleDAGMI::SchedImpl
std::unique_ptr< MachineSchedStrategy > SchedImpl
Definition: MachineScheduler.h:270
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::GenericSchedulerBase::SchedCandidate::Policy
CandPolicy Policy
Definition: MachineScheduler.h:861
llvm::GenericSchedulerBase::SchedResourceDelta::CritResources
unsigned CritResources
Definition: MachineScheduler.h:842
iterator_range.h
llvm::TargetFrameLowering::getStackGrowthDirection
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
Definition: TargetFrameLowering.h:88
llvm::ScheduleDAG::dumpNodeAll
void dumpNodeAll(const SUnit &SU) const
Definition: ScheduleDAG.cpp:363
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::ScheduleDAGInstrs::DbgValues
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
Definition: ScheduleDAGInstrs.h:248
llvm::TargetSubtargetInfo::overrideSchedPolicy
virtual void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const
Override generic scheduling policy within a region.
Definition: TargetSubtargetInfo.h:225
llvm::ScheduleDAGMI
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
Definition: MachineScheduler.h:266
llvm::ReadyQueue::getName
StringRef getName() const
Definition: MachineScheduler.h:541
llvm::ScheduleDAGMI::placeDebugValues
void placeDebugValues()
Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues.
Definition: MachineScheduler.cpp:906
EnablePostRAMachineSched
static cl::opt< bool > EnablePostRAMachineSched("enable-post-misched", cl::desc("Enable the post-ra machine instruction scheduling pass."), cl::init(true), cl::Hidden)
llvm::PressureChange::getPSet
unsigned getPSet() const
Definition: RegisterPressure.h:115
llvm::PostGenericScheduler::schedNode
void schedNode(SUnit *SU, bool IsTopNode) override
Called after ScheduleDAGMI has scheduled an instruction and updated scheduled/remaining flags in the ...
Definition: MachineScheduler.cpp:3633
llvm::SlotIndex::getRegSlot
SlotIndex getRegSlot(bool EC=false) const
Returns the register use/def slot in the current instruction for a normal or early-clobber def.
Definition: SlotIndexes.h:254
llvm::LiveIntervals::getInterval
LiveInterval & getInterval(Register Reg)
Definition: LiveIntervals.h:114
llvm::RegPressureTracker::getMaxUpwardPressureDelta
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)