LLVM  14.0.0git
MachineScheduler.cpp
Go to the documentation of this file.
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // MachineScheduler schedules machine instructions after phi elimination. It
10 // preserves LiveIntervals so it can be invoked before register allocation.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/PriorityQueue.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
35 #include "llvm/CodeGen/Passes.h"
51 #include "llvm/Config/llvm-config.h"
52 #include "llvm/InitializePasses.h"
53 #include "llvm/MC/LaneBitmask.h"
54 #include "llvm/Pass.h"
56 #include "llvm/Support/Compiler.h"
57 #include "llvm/Support/Debug.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <iterator>
66 #include <limits>
67 #include <memory>
68 #include <string>
69 #include <tuple>
70 #include <utility>
71 #include <vector>
72 
73 using namespace llvm;
74 
75 #define DEBUG_TYPE "machine-scheduler"
76 
77 STATISTIC(NumClustered, "Number of load/store pairs clustered");
78 
79 namespace llvm {
80 
81 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
82  cl::desc("Force top-down list scheduling"));
83 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
84  cl::desc("Force bottom-up list scheduling"));
86 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
87  cl::desc("Print critical path length to stdout"));
88 
90  "verify-misched", cl::Hidden,
91  cl::desc("Verify machine instrs before and after machine scheduling"));
92 
93 } // end namespace llvm
94 
95 #ifndef NDEBUG
96 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
97  cl::desc("Pop up a window to show MISched dags after they are processed"));
98 
99 /// In some situations a few uninteresting nodes depend on nearly all other
100 /// nodes in the graph, provide a cutoff to hide them.
101 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
102  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
103 
104 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
105  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
106 
107 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
108  cl::desc("Only schedule this function"));
109 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
110  cl::desc("Only schedule this MBB#"));
111 static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
112  cl::desc("Print schedule DAGs"));
113 #else
114 static const bool ViewMISchedDAGs = false;
115 static const bool PrintDAGs = false;
116 #endif // NDEBUG
117 
118 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
119 /// size of the ready lists.
120 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
121  cl::desc("Limit ready list to N instructions"), cl::init(256));
122 
123 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
124  cl::desc("Enable register pressure scheduling."), cl::init(true));
125 
126 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
127  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
128 
129 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
130  cl::desc("Enable memop clustering."),
131  cl::init(true));
132 static cl::opt<bool>
133  ForceFastCluster("force-fast-cluster", cl::Hidden,
134  cl::desc("Switch to fast cluster algorithm with the lost "
135  "of some fusion opportunities"),
136  cl::init(false));
137 static cl::opt<unsigned>
138  FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
139  cl::desc("The threshold for fast cluster"),
140  cl::init(1000));
141 
142 // DAG subtrees must have at least this many nodes.
143 static const unsigned MinSubtreeSize = 8;
144 
145 // Pin the vtables to this file.
146 void MachineSchedStrategy::anchor() {}
147 
148 void ScheduleDAGMutation::anchor() {}
149 
150 //===----------------------------------------------------------------------===//
151 // Machine Instruction Scheduling Pass and Registry
152 //===----------------------------------------------------------------------===//
153 
156 }
157 
159  delete RegClassInfo;
160 }
161 
162 namespace {
163 
164 /// Base class for a machine scheduler class that can run at any point.
165 class MachineSchedulerBase : public MachineSchedContext,
166  public MachineFunctionPass {
167 public:
168  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
169 
170  void print(raw_ostream &O, const Module* = nullptr) const override;
171 
172 protected:
173  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
174 };
175 
176 /// MachineScheduler runs after coalescing and before register allocation.
177 class MachineScheduler : public MachineSchedulerBase {
178 public:
179  MachineScheduler();
180 
181  void getAnalysisUsage(AnalysisUsage &AU) const override;
182 
183  bool runOnMachineFunction(MachineFunction&) override;
184 
185  static char ID; // Class identification, replacement for typeinfo
186 
187 protected:
188  ScheduleDAGInstrs *createMachineScheduler();
189 };
190 
191 /// PostMachineScheduler runs after shortly before code emission.
192 class PostMachineScheduler : public MachineSchedulerBase {
193 public:
194  PostMachineScheduler();
195 
196  void getAnalysisUsage(AnalysisUsage &AU) const override;
197 
198  bool runOnMachineFunction(MachineFunction&) override;
199 
200  static char ID; // Class identification, replacement for typeinfo
201 
202 protected:
203  ScheduleDAGInstrs *createPostMachineScheduler();
204 };
205 
206 } // end anonymous namespace
207 
208 char MachineScheduler::ID = 0;
209 
211 
212 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
213  "Machine Instruction Scheduler", false, false)
219 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
221 
222 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
224 }
225 
226 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
227  AU.setPreservesCFG();
232  AU.addRequired<SlotIndexes>();
237 }
238 
239 char PostMachineScheduler::ID = 0;
240 
242 
243 INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
244  "PostRA Machine Instruction Scheduler", false, false)
248 INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
250 
251 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
253 }
254 
255 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
256  AU.setPreservesCFG();
262 }
263 
266 
267 /// A dummy default scheduler factory indicates whether the scheduler
268 /// is overridden on the command line.
270  return nullptr;
271 }
272 
273 /// MachineSchedOpt allows command line selection of the scheduler.
276 MachineSchedOpt("misched",
278  cl::desc("Machine instruction scheduler to use"));
279 
281 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
283 
285  "enable-misched",
286  cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
287  cl::Hidden);
288 
290  "enable-post-misched",
291  cl::desc("Enable the post-ra machine instruction scheduling pass."),
292  cl::init(true), cl::Hidden);
293 
294 /// Decrement this iterator until reaching the top or a non-debug instr.
298  assert(I != Beg && "reached the top of the region, cannot decrement");
299  while (--I != Beg) {
300  if (!I->isDebugOrPseudoInstr())
301  break;
302  }
303  return I;
304 }
305 
306 /// Non-const version.
312 }
313 
314 /// If this iterator is a debug value, increment until reaching the End or a
315 /// non-debug instruction.
319  for(; I != End; ++I) {
320  if (!I->isDebugOrPseudoInstr())
321  break;
322  }
323  return I;
324 }
325 
326 /// Non-const version.
332 }
333 
334 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
335 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
336  // Select the scheduler, or set the default.
338  if (Ctor != useDefaultMachineSched)
339  return Ctor(this);
340 
341  // Get the default scheduler set by the target for this function.
342  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
343  if (Scheduler)
344  return Scheduler;
345 
346  // Default to GenericScheduler.
347  return createGenericSchedLive(this);
348 }
349 
350 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
351 /// the caller. We don't have a command line option to override the postRA
352 /// scheduler. The Target must configure it.
353 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
354  // Get the postRA scheduler set by the target for this function.
355  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
356  if (Scheduler)
357  return Scheduler;
358 
359  // Default to GenericScheduler.
360  return createGenericSchedPostRA(this);
361 }
362 
363 /// Top-level MachineScheduler pass driver.
364 ///
365 /// Visit blocks in function order. Divide each block into scheduling regions
366 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
367 /// consistent with the DAG builder, which traverses the interior of the
368 /// scheduling regions bottom-up.
369 ///
370 /// This design avoids exposing scheduling boundaries to the DAG builder,
371 /// simplifying the DAG builder's support for "special" target instructions.
372 /// At the same time the design allows target schedulers to operate across
373 /// scheduling boundaries, for example to bundle the boundary instructions
374 /// without reordering them. This creates complexity, because the target
375 /// scheduler must update the RegionBegin and RegionEnd positions cached by
376 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
377 /// design would be to split blocks at scheduling boundaries, but LLVM has a
378 /// general bias against block splitting purely for implementation simplicity.
379 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
380  if (skipFunction(mf.getFunction()))
381  return false;
382 
384  if (!EnableMachineSched)
385  return false;
386  } else if (!mf.getSubtarget().enableMachineScheduler())
387  return false;
388 
389  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
390 
391  // Initialize the context of the pass.
392  MF = &mf;
393  MLI = &getAnalysis<MachineLoopInfo>();
394  MDT = &getAnalysis<MachineDominatorTree>();
395  PassConfig = &getAnalysis<TargetPassConfig>();
396  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
397 
398  LIS = &getAnalysis<LiveIntervals>();
399 
400  if (VerifyScheduling) {
401  LLVM_DEBUG(LIS->dump());
402  MF->verify(this, "Before machine scheduling.");
403  }
404  RegClassInfo->runOnMachineFunction(*MF);
405 
406  // Instantiate the selected scheduler for this target, function, and
407  // optimization level.
408  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
409  scheduleRegions(*Scheduler, false);
410 
411  LLVM_DEBUG(LIS->dump());
412  if (VerifyScheduling)
413  MF->verify(this, "After machine scheduling.");
414  return true;
415 }
416 
417 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
418  if (skipFunction(mf.getFunction()))
419  return false;
420 
423  return false;
424  } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
425  LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
426  return false;
427  }
428  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
429 
430  // Initialize the context of the pass.
431  MF = &mf;
432  MLI = &getAnalysis<MachineLoopInfo>();
433  PassConfig = &getAnalysis<TargetPassConfig>();
434  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
435 
436  if (VerifyScheduling)
437  MF->verify(this, "Before post machine scheduling.");
438 
439  // Instantiate the selected scheduler for this target, function, and
440  // optimization level.
441  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
442  scheduleRegions(*Scheduler, true);
443 
444  if (VerifyScheduling)
445  MF->verify(this, "After post machine scheduling.");
446  return true;
447 }
448 
449 /// Return true of the given instruction should not be included in a scheduling
450 /// region.
451 ///
452 /// MachineScheduler does not currently support scheduling across calls. To
453 /// handle calls, the DAG builder needs to be modified to create register
454 /// anti/output dependencies on the registers clobbered by the call's regmask
455 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
456 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
457 /// the boundary, but there would be no benefit to postRA scheduling across
458 /// calls this late anyway.
461  MachineFunction *MF,
462  const TargetInstrInfo *TII) {
463  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
464 }
465 
466 /// A region of an MBB for scheduling.
467 namespace {
468 struct SchedRegion {
469  /// RegionBegin is the first instruction in the scheduling region, and
470  /// RegionEnd is either MBB->end() or the scheduling boundary after the
471  /// last instruction in the scheduling region. These iterators cannot refer
472  /// to instructions outside of the identified scheduling region because
473  /// those may be reordered before scheduling this region.
474  MachineBasicBlock::iterator RegionBegin;
475  MachineBasicBlock::iterator RegionEnd;
476  unsigned NumRegionInstrs;
477 
479  unsigned N) :
480  RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
481 };
482 } // end anonymous namespace
483 
485 
486 static void
488  MBBRegionsVector &Regions,
489  bool RegionsTopDown) {
490  MachineFunction *MF = MBB->getParent();
491  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
492 
493  MachineBasicBlock::iterator I = nullptr;
494  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
495  RegionEnd != MBB->begin(); RegionEnd = I) {
496 
497  // Avoid decrementing RegionEnd for blocks with no terminator.
498  if (RegionEnd != MBB->end() ||
499  isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
500  --RegionEnd;
501  }
502 
503  // The next region starts above the previous region. Look backward in the
504  // instruction stream until we find the nearest boundary.
505  unsigned NumRegionInstrs = 0;
506  I = RegionEnd;
507  for (;I != MBB->begin(); --I) {
508  MachineInstr &MI = *std::prev(I);
509  if (isSchedBoundary(&MI, &*MBB, MF, TII))
510  break;
511  if (!MI.isDebugOrPseudoInstr()) {
512  // MBB::size() uses instr_iterator to count. Here we need a bundle to
513  // count as a single instruction.
514  ++NumRegionInstrs;
515  }
516  }
517 
518  // It's possible we found a scheduling region that only has debug
519  // instructions. Don't bother scheduling these.
520  if (NumRegionInstrs != 0)
521  Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
522  }
523 
524  if (RegionsTopDown)
525  std::reverse(Regions.begin(), Regions.end());
526 }
527 
528 /// Main driver for both MachineScheduler and PostMachineScheduler.
529 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
530  bool FixKillFlags) {
531  // Visit all machine basic blocks.
532  //
533  // TODO: Visit blocks in global postorder or postorder within the bottom-up
534  // loop tree. Then we can optionally compute global RegPressure.
535  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
536  MBB != MBBEnd; ++MBB) {
537 
538  Scheduler.startBlock(&*MBB);
539 
540 #ifndef NDEBUG
541  if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
542  continue;
543  if (SchedOnlyBlock.getNumOccurrences()
544  && (int)SchedOnlyBlock != MBB->getNumber())
545  continue;
546 #endif
547 
548  // Break the block into scheduling regions [I, RegionEnd). RegionEnd
549  // points to the scheduling boundary at the bottom of the region. The DAG
550  // does not include RegionEnd, but the region does (i.e. the next
551  // RegionEnd is above the previous RegionBegin). If the current block has
552  // no terminator then RegionEnd == MBB->end() for the bottom region.
553  //
554  // All the regions of MBB are first found and stored in MBBRegions, which
555  // will be processed (MBB) top-down if initialized with true.
556  //
557  // The Scheduler may insert instructions during either schedule() or
558  // exitRegion(), even for empty regions. So the local iterators 'I' and
559  // 'RegionEnd' are invalid across these calls. Instructions must not be
560  // added to other regions than the current one without updating MBBRegions.
561 
562  MBBRegionsVector MBBRegions;
563  getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
564  for (MBBRegionsVector::iterator R = MBBRegions.begin();
565  R != MBBRegions.end(); ++R) {
566  MachineBasicBlock::iterator I = R->RegionBegin;
567  MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
568  unsigned NumRegionInstrs = R->NumRegionInstrs;
569 
570  // Notify the scheduler of the region, even if we may skip scheduling
571  // it. Perhaps it still needs to be bundled.
572  Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
573 
574  // Skip empty scheduling regions (0 or 1 schedulable instructions).
575  if (I == RegionEnd || I == std::prev(RegionEnd)) {
576  // Close the current region. Bundle the terminator if needed.
577  // This invalidates 'RegionEnd' and 'I'.
578  Scheduler.exitRegion();
579  continue;
580  }
581  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
582  LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
583  << " " << MBB->getName() << "\n From: " << *I
584  << " To: ";
585  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
586  else dbgs() << "End\n";
587  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
589  errs() << MF->getName();
590  errs() << ":%bb. " << MBB->getNumber();
591  errs() << " " << MBB->getName() << " \n";
592  }
593 
594  // Schedule a region: possibly reorder instructions.
595  // This invalidates the original region iterators.
596  Scheduler.schedule();
597 
598  // Close the current region.
599  Scheduler.exitRegion();
600  }
601  Scheduler.finishBlock();
602  // FIXME: Ideally, no further passes should rely on kill flags. However,
603  // thumb2 size reduction is currently an exception, so the PostMIScheduler
604  // needs to do this.
605  if (FixKillFlags)
606  Scheduler.fixupKills(*MBB);
607  }
608  Scheduler.finalizeSchedule();
609 }
610 
611 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
612  // unimplemented
613 }
614 
615 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
617  dbgs() << "Queue " << Name << ": ";
618  for (const SUnit *SU : Queue)
619  dbgs() << SU->NodeNum << " ";
620  dbgs() << "\n";
621 }
622 #endif
623 
624 //===----------------------------------------------------------------------===//
625 // ScheduleDAGMI - Basic machine instruction scheduling. This is
626 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
627 // virtual registers.
628 // ===----------------------------------------------------------------------===/
629 
630 // Provide a vtable anchor.
632 
633 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
634 /// NumPredsLeft reaches zero, release the successor node.
635 ///
636 /// FIXME: Adjust SuccSU height based on MinLatency.
637 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
638  SUnit *SuccSU = SuccEdge->getSUnit();
639 
640  if (SuccEdge->isWeak()) {
641  --SuccSU->WeakPredsLeft;
642  if (SuccEdge->isCluster())
643  NextClusterSucc = SuccSU;
644  return;
645  }
646 #ifndef NDEBUG
647  if (SuccSU->NumPredsLeft == 0) {
648  dbgs() << "*** Scheduling failed! ***\n";
649  dumpNode(*SuccSU);
650  dbgs() << " has been released too many times!\n";
651  llvm_unreachable(nullptr);
652  }
653 #endif
654  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
655  // CurrCycle may have advanced since then.
656  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
657  SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
658 
659  --SuccSU->NumPredsLeft;
660  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
661  SchedImpl->releaseTopNode(SuccSU);
662 }
663 
664 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
666  for (SDep &Succ : SU->Succs)
667  releaseSucc(SU, &Succ);
668 }
669 
670 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
671 /// NumSuccsLeft reaches zero, release the predecessor node.
672 ///
673 /// FIXME: Adjust PredSU height based on MinLatency.
674 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
675  SUnit *PredSU = PredEdge->getSUnit();
676 
677  if (PredEdge->isWeak()) {
678  --PredSU->WeakSuccsLeft;
679  if (PredEdge->isCluster())
680  NextClusterPred = PredSU;
681  return;
682  }
683 #ifndef NDEBUG
684  if (PredSU->NumSuccsLeft == 0) {
685  dbgs() << "*** Scheduling failed! ***\n";
686  dumpNode(*PredSU);
687  dbgs() << " has been released too many times!\n";
688  llvm_unreachable(nullptr);
689  }
690 #endif
691  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
692  // CurrCycle may have advanced since then.
693  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
694  PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
695 
696  --PredSU->NumSuccsLeft;
697  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
698  SchedImpl->releaseBottomNode(PredSU);
699 }
700 
701 /// releasePredecessors - Call releasePred on each of SU's predecessors.
703  for (SDep &Pred : SU->Preds)
704  releasePred(SU, &Pred);
705 }
706 
709  SchedImpl->enterMBB(bb);
710 }
711 
713  SchedImpl->leaveMBB();
715 }
716 
717 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
718 /// crossing a scheduling boundary. [begin, end) includes all instructions in
719 /// the region, including the boundary itself and single-instruction regions
720 /// that don't get scheduled.
724  unsigned regioninstrs)
725 {
726  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
727 
728  SchedImpl->initPolicy(begin, end, regioninstrs);
729 }
730 
731 /// This is normally called from the main scheduler loop but may also be invoked
732 /// by the scheduling strategy to perform additional code motion.
735  // Advance RegionBegin if the first instruction moves down.
736  if (&*RegionBegin == MI)
737  ++RegionBegin;
738 
739  // Update the instruction stream.
740  BB->splice(InsertPos, BB, MI);
741 
742  // Update LiveIntervals
743  if (LIS)
744  LIS->handleMove(*MI, /*UpdateFlags=*/true);
745 
746  // Recede RegionBegin if an instruction moves above the first.
747  if (RegionBegin == InsertPos)
748  RegionBegin = MI;
749 }
750 
752 #ifndef NDEBUG
755  return false;
756  }
758 #endif
759  return true;
760 }
761 
762 /// Per-region scheduling driver, called back from
763 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
764 /// does not consider liveness or register pressure. It is useful for PostRA
765 /// scheduling and potentially other custom schedulers.
767  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
768  LLVM_DEBUG(SchedImpl->dumpPolicy());
769 
770  // Build the DAG.
772 
773  postprocessDAG();
774 
775  SmallVector<SUnit*, 8> TopRoots, BotRoots;
776  findRootsAndBiasEdges(TopRoots, BotRoots);
777 
778  LLVM_DEBUG(dump());
779  if (PrintDAGs) dump();
780  if (ViewMISchedDAGs) viewGraph();
781 
782  // Initialize the strategy before modifying the DAG.
783  // This may initialize a DFSResult to be used for queue priority.
784  SchedImpl->initialize(this);
785 
786  // Initialize ready queues now that the DAG and priority data are finalized.
787  initQueues(TopRoots, BotRoots);
788 
789  bool IsTopNode = false;
790  while (true) {
791  LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
792  SUnit *SU = SchedImpl->pickNode(IsTopNode);
793  if (!SU) break;
794 
795  assert(!SU->isScheduled && "Node already scheduled");
796  if (!checkSchedLimit())
797  break;
798 
799  MachineInstr *MI = SU->getInstr();
800  if (IsTopNode) {
801  assert(SU->isTopReady() && "node still has unscheduled dependencies");
802  if (&*CurrentTop == MI)
804  else
806  } else {
807  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
810  if (&*priorII == MI)
811  CurrentBottom = priorII;
812  else {
813  if (&*CurrentTop == MI)
814  CurrentTop = nextIfDebug(++CurrentTop, priorII);
816  CurrentBottom = MI;
817  }
818  }
819  // Notify the scheduling strategy before updating the DAG.
820  // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
821  // runs, it can then use the accurate ReadyCycle time to determine whether
822  // newly released nodes can move to the readyQ.
823  SchedImpl->schedNode(SU, IsTopNode);
824 
825  updateQueues(SU, IsTopNode);
826  }
827  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
828 
830 
831  LLVM_DEBUG({
832  dbgs() << "*** Final schedule for "
833  << printMBBReference(*begin()->getParent()) << " ***\n";
834  dumpSchedule();
835  dbgs() << '\n';
836  });
837 }
838 
839 /// Apply each ScheduleDAGMutation step in order.
841  for (auto &m : Mutations)
842  m->apply(this);
843 }
844 
845 void ScheduleDAGMI::
847  SmallVectorImpl<SUnit*> &BotRoots) {
848  for (SUnit &SU : SUnits) {
849  assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
850 
851  // Order predecessors so DFSResult follows the critical path.
852  SU.biasCriticalPath();
853 
854  // A SUnit is ready to top schedule if it has no predecessors.
855  if (!SU.NumPredsLeft)
856  TopRoots.push_back(&SU);
857  // A SUnit is ready to bottom schedule if it has no successors.
858  if (!SU.NumSuccsLeft)
859  BotRoots.push_back(&SU);
860  }
862 }
863 
864 /// Identify DAG roots and setup scheduler queues.
866  ArrayRef<SUnit*> BotRoots) {
867  NextClusterSucc = nullptr;
868  NextClusterPred = nullptr;
869 
870  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
871  //
872  // Nodes with unreleased weak edges can still be roots.
873  // Release top roots in forward order.
874  for (SUnit *SU : TopRoots)
875  SchedImpl->releaseTopNode(SU);
876 
877  // Release bottom roots in reverse order so the higher priority nodes appear
878  // first. This is more natural and slightly more efficient.
880  I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
881  SchedImpl->releaseBottomNode(*I);
882  }
883 
886 
887  SchedImpl->registerRoots();
888 
889  // Advance past initial DebugValues.
892 }
893 
894 /// Update scheduler queues after scheduling an instruction.
895 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
896  // Release dependent instructions for scheduling.
897  if (IsTopNode)
898  releaseSuccessors(SU);
899  else
901 
902  SU->isScheduled = true;
903 }
904 
905 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
907  // If first instruction was a DBG_VALUE then put it back.
908  if (FirstDbgValue) {
911  }
912 
913  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
914  DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
915  std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
916  MachineInstr *DbgValue = P.first;
917  MachineBasicBlock::iterator OrigPrevMI = P.second;
918  if (&*RegionBegin == DbgValue)
919  ++RegionBegin;
920  BB->splice(++OrigPrevMI, BB, DbgValue);
921  if (OrigPrevMI == std::prev(RegionEnd))
923  }
924  DbgValues.clear();
925  FirstDbgValue = nullptr;
926 }
927 
928 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
930  for (MachineInstr &MI : *this) {
931  if (SUnit *SU = getSUnit(&MI))
932  dumpNode(*SU);
933  else
934  dbgs() << "Missing SUnit\n";
935  }
936 }
937 #endif
938 
939 //===----------------------------------------------------------------------===//
940 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
941 // preservation.
942 //===----------------------------------------------------------------------===//
943 
945  delete DFSResult;
946 }
947 
949  const MachineInstr &MI = *SU.getInstr();
950  for (const MachineOperand &MO : MI.operands()) {
951  if (!MO.isReg())
952  continue;
953  if (!MO.readsReg())
954  continue;
955  if (TrackLaneMasks && !MO.isUse())
956  continue;
957 
958  Register Reg = MO.getReg();
960  continue;
961 
962  // Ignore re-defs.
963  if (TrackLaneMasks) {
964  bool FoundDef = false;
965  for (const MachineOperand &MO2 : MI.operands()) {
966  if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
967  FoundDef = true;
968  break;
969  }
970  }
971  if (FoundDef)
972  continue;
973  }
974 
975  // Record this local VReg use.
977  for (; UI != VRegUses.end(); ++UI) {
978  if (UI->SU == &SU)
979  break;
980  }
981  if (UI == VRegUses.end())
983  }
984 }
985 
986 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
987 /// crossing a scheduling boundary. [begin, end) includes all instructions in
988 /// the region, including the boundary itself and single-instruction regions
989 /// that don't get scheduled.
993  unsigned regioninstrs)
994 {
995  // ScheduleDAGMI initializes SchedImpl's per-region policy.
996  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
997 
998  // For convenience remember the end of the liveness region.
999  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
1000 
1002 
1003  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1004  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1005 
1007  "ShouldTrackLaneMasks requires ShouldTrackPressure");
1008 }
1009 
1010 // Setup the register pressure trackers for the top scheduled and bottom
1011 // scheduled regions.
1013  VRegUses.clear();
1015  for (SUnit &SU : SUnits)
1016  collectVRegUses(SU);
1017 
1019  ShouldTrackLaneMasks, false);
1021  ShouldTrackLaneMasks, false);
1022 
1023  // Close the RPTracker to finalize live ins.
1025 
1027 
1028  // Initialize the live ins and live outs.
1031 
1032  // Close one end of the tracker so we can call
1033  // getMaxUpward/DownwardPressureDelta before advancing across any
1034  // instructions. This converts currently live regs into live ins/outs.
1037 
1039  if (!BotRPTracker.getLiveThru().empty()) {
1041  LLVM_DEBUG(dbgs() << "Live Thru: ";
1043  };
1044 
1045  // For each live out vreg reduce the pressure change associated with other
1046  // uses of the same vreg below the live-out reaching def.
1048 
1049  // Account for liveness generated by the region boundary.
1050  if (LiveRegionEnd != RegionEnd) {
1052  BotRPTracker.recede(&LiveUses);
1053  updatePressureDiffs(LiveUses);
1054  }
1055 
1056  LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1058  dbgs() << "Bottom Pressure:\n";
1060 
1062  (RegionEnd->isDebugInstr() &&
1064  "Can't find the region bottom");
1065 
1066  // Cache the list of excess pressure sets in this region. This will also track
1067  // the max pressure in the scheduled code for these sets.
1068  RegionCriticalPSets.clear();
1069  const std::vector<unsigned> &RegionPressure =
1071  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1072  unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1073  if (RegionPressure[i] > Limit) {
1074  LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1075  << " Actual " << RegionPressure[i] << "\n");
1076  RegionCriticalPSets.push_back(PressureChange(i));
1077  }
1078  }
1079  LLVM_DEBUG(dbgs() << "Excess PSets: ";
1080  for (const PressureChange &RCPS
1082  << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1083  dbgs() << "\n");
1084 }
1085 
1088  const std::vector<unsigned> &NewMaxPressure) {
1089  const PressureDiff &PDiff = getPressureDiff(SU);
1090  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1091  for (const PressureChange &PC : PDiff) {
1092  if (!PC.isValid())
1093  break;
1094  unsigned ID = PC.getPSet();
1095  while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1096  ++CritIdx;
1097  if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1098  if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1099  && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1100  RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1101  }
1102  unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1103  if (NewMaxPressure[ID] >= Limit - 2) {
1104  LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1105  << NewMaxPressure[ID]
1106  << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1107  << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1108  << " livethru)\n");
1109  }
1110  }
1111 }
1112 
1113 /// Update the PressureDiff array for liveness after scheduling this
1114 /// instruction.
1116  ArrayRef<RegisterMaskPair> LiveUses) {
1117  for (const RegisterMaskPair &P : LiveUses) {
1118  Register Reg = P.RegUnit;
1119  /// FIXME: Currently assuming single-use physregs.
1121  continue;
1122 
1123  if (ShouldTrackLaneMasks) {
1124  // If the register has just become live then other uses won't change
1125  // this fact anymore => decrement pressure.
1126  // If the register has just become dead then other uses make it come
1127  // back to life => increment pressure.
1128  bool Decrement = P.LaneMask.any();
1129 
1130  for (const VReg2SUnit &V2SU
1132  SUnit &SU = *V2SU.SU;
1133  if (SU.isScheduled || &SU == &ExitSU)
1134  continue;
1135 
1136  PressureDiff &PDiff = getPressureDiff(&SU);
1137  PDiff.addPressureChange(Reg, Decrement, &MRI);
1138  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1139  << printReg(Reg, TRI) << ':'
1140  << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1141  dbgs() << " to "; PDiff.dump(*TRI););
1142  }
1143  } else {
1144  assert(P.LaneMask.any());
1145  LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1146  // This may be called before CurrentBottom has been initialized. However,
1147  // BotRPTracker must have a valid position. We want the value live into the
1148  // instruction or live out of the block, so ask for the previous
1149  // instruction's live-out.
1150  const LiveInterval &LI = LIS->getInterval(Reg);
1151  VNInfo *VNI;
1154  if (I == BB->end())
1155  VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1156  else {
1158  VNI = LRQ.valueIn();
1159  }
1160  // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1161  assert(VNI && "No live value at use.");
1162  for (const VReg2SUnit &V2SU
1164  SUnit *SU = V2SU.SU;
1165  // If this use comes before the reaching def, it cannot be a last use,
1166  // so decrease its pressure change.
1167  if (!SU->isScheduled && SU != &ExitSU) {
1168  LiveQueryResult LRQ =
1169  LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1170  if (LRQ.valueIn() == VNI) {
1171  PressureDiff &PDiff = getPressureDiff(SU);
1172  PDiff.addPressureChange(Reg, true, &MRI);
1173  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1174  << *SU->getInstr();
1175  dbgs() << " to "; PDiff.dump(*TRI););
1176  }
1177  }
1178  }
1179  }
1180  }
1181 }
1182 
1184 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1185  if (EntrySU.getInstr() != nullptr)
1187  for (const SUnit &SU : SUnits) {
1188  dumpNodeAll(SU);
1189  if (ShouldTrackPressure) {
1190  dbgs() << " Pressure Diff : ";
1191  getPressureDiff(&SU).dump(*TRI);
1192  }
1193  dbgs() << " Single Issue : ";
1194  if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1195  SchedModel.mustEndGroup(SU.getInstr()))
1196  dbgs() << "true;";
1197  else
1198  dbgs() << "false;";
1199  dbgs() << '\n';
1200  }
1201  if (ExitSU.getInstr() != nullptr)
1203 #endif
1204 }
1205 
1206 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1207 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1208 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1209 ///
1210 /// This is a skeletal driver, with all the functionality pushed into helpers,
1211 /// so that it can be easily extended by experimental schedulers. Generally,
1212 /// implementing MachineSchedStrategy should be sufficient to implement a new
1213 /// scheduling algorithm. However, if a scheduler further subclasses
1214 /// ScheduleDAGMILive then it will want to override this virtual method in order
1215 /// to update any specialized state.
1217  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1218  LLVM_DEBUG(SchedImpl->dumpPolicy());
1220 
1221  postprocessDAG();
1222 
1223  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1224  findRootsAndBiasEdges(TopRoots, BotRoots);
1225 
1226  // Initialize the strategy before modifying the DAG.
1227  // This may initialize a DFSResult to be used for queue priority.
1228  SchedImpl->initialize(this);
1229 
1230  LLVM_DEBUG(dump());
1231  if (PrintDAGs) dump();
1232  if (ViewMISchedDAGs) viewGraph();
1233 
1234  // Initialize ready queues now that the DAG and priority data are finalized.
1235  initQueues(TopRoots, BotRoots);
1236 
1237  bool IsTopNode = false;
1238  while (true) {
1239  LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1240  SUnit *SU = SchedImpl->pickNode(IsTopNode);
1241  if (!SU) break;
1242 
1243  assert(!SU->isScheduled && "Node already scheduled");
1244  if (!checkSchedLimit())
1245  break;
1246 
1247  scheduleMI(SU, IsTopNode);
1248 
1249  if (DFSResult) {
1250  unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1251  if (!ScheduledTrees.test(SubtreeID)) {
1252  ScheduledTrees.set(SubtreeID);
1253  DFSResult->scheduleTree(SubtreeID);
1254  SchedImpl->scheduleTree(SubtreeID);
1255  }
1256  }
1257 
1258  // Notify the scheduling strategy after updating the DAG.
1259  SchedImpl->schedNode(SU, IsTopNode);
1260 
1261  updateQueues(SU, IsTopNode);
1262  }
1263  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1264 
1265  placeDebugValues();
1266 
1267  LLVM_DEBUG({
1268  dbgs() << "*** Final schedule for "
1269  << printMBBReference(*begin()->getParent()) << " ***\n";
1270  dumpSchedule();
1271  dbgs() << '\n';
1272  });
1273 }
1274 
1275 /// Build the DAG and setup three register pressure trackers.
1277  if (!ShouldTrackPressure) {
1278  RPTracker.reset();
1279  RegionCriticalPSets.clear();
1281  return;
1282  }
1283 
1284  // Initialize the register pressure tracker used by buildSchedGraph.
1286  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1287 
1288  // Account for liveness generate by the region boundary.
1289  if (LiveRegionEnd != RegionEnd)
1290  RPTracker.recede();
1291 
1292  // Build the DAG, and compute current register pressure.
1294 
1295  // Initialize top/bottom trackers after computing region pressure.
1296  initRegPressure();
1297 }
1298 
1300  if (!DFSResult)
1301  DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1302  DFSResult->clear();
1304  DFSResult->resize(SUnits.size());
1307 }
1308 
1309 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1310 /// only provides the critical path for single block loops. To handle loops that
1311 /// span blocks, we could use the vreg path latencies provided by
1312 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1313 /// available for use in the scheduler.
1314 ///
1315 /// The cyclic path estimation identifies a def-use pair that crosses the back
1316 /// edge and considers the depth and height of the nodes. For example, consider
1317 /// the following instruction sequence where each instruction has unit latency
1318 /// and defines an eponymous virtual register:
1319 ///
1320 /// a->b(a,c)->c(b)->d(c)->exit
1321 ///
1322 /// The cyclic critical path is a two cycles: b->c->b
1323 /// The acyclic critical path is four cycles: a->b->c->d->exit
1324 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1325 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1326 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1327 /// LiveInDepth = depth(b) = len(a->b) = 1
1328 ///
1329 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1330 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1331 /// CyclicCriticalPath = min(2, 2) = 2
1332 ///
1333 /// This could be relevant to PostRA scheduling, but is currently implemented
1334 /// assuming LiveIntervals.
1336  // This only applies to single block loop.
1337  if (!BB->isSuccessor(BB))
1338  return 0;
1339 
1340  unsigned MaxCyclicLatency = 0;
1341  // Visit each live out vreg def to find def/use pairs that cross iterations.
1343  Register Reg = P.RegUnit;
1345  continue;
1346  const LiveInterval &LI = LIS->getInterval(Reg);
1347  const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1348  if (!DefVNI)
1349  continue;
1350 
1352  const SUnit *DefSU = getSUnit(DefMI);
1353  if (!DefSU)
1354  continue;
1355 
1356  unsigned LiveOutHeight = DefSU->getHeight();
1357  unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1358  // Visit all local users of the vreg def.
1359  for (const VReg2SUnit &V2SU
1361  SUnit *SU = V2SU.SU;
1362  if (SU == &ExitSU)
1363  continue;
1364 
1365  // Only consider uses of the phi.
1367  if (!LRQ.valueIn()->isPHIDef())
1368  continue;
1369 
1370  // Assume that a path spanning two iterations is a cycle, which could
1371  // overestimate in strange cases. This allows cyclic latency to be
1372  // estimated as the minimum slack of the vreg's depth or height.
1373  unsigned CyclicLatency = 0;
1374  if (LiveOutDepth > SU->getDepth())
1375  CyclicLatency = LiveOutDepth - SU->getDepth();
1376 
1377  unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1378  if (LiveInHeight > LiveOutHeight) {
1379  if (LiveInHeight - LiveOutHeight < CyclicLatency)
1380  CyclicLatency = LiveInHeight - LiveOutHeight;
1381  } else
1382  CyclicLatency = 0;
1383 
1384  LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1385  << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1386  if (CyclicLatency > MaxCyclicLatency)
1387  MaxCyclicLatency = CyclicLatency;
1388  }
1389  }
1390  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1391  return MaxCyclicLatency;
1392 }
1393 
1394 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1395 /// the Top RP tracker in case the region beginning has changed.
1397  ArrayRef<SUnit*> BotRoots) {
1398  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1399  if (ShouldTrackPressure) {
1400  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1402  }
1403 }
1404 
1405 /// Move an instruction and update register pressure.
1406 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1407  // Move the instruction to its new location in the instruction stream.
1408  MachineInstr *MI = SU->getInstr();
1409 
1410  if (IsTopNode) {
1411  assert(SU->isTopReady() && "node still has unscheduled dependencies");
1412  if (&*CurrentTop == MI)
1414  else {
1417  }
1418 
1419  if (ShouldTrackPressure) {
1420  // Update top scheduled pressure.
1421  RegisterOperands RegOpers;
1422  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1423  if (ShouldTrackLaneMasks) {
1424  // Adjust liveness and add missing dead+read-undef flags.
1425  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1426  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1427  } else {
1428  // Adjust for missing dead-def flags.
1429  RegOpers.detectDeadDefs(*MI, *LIS);
1430  }
1431 
1432  TopRPTracker.advance(RegOpers);
1433  assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1434  LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1436 
1438  }
1439  } else {
1440  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1441  MachineBasicBlock::iterator priorII =
1443  if (&*priorII == MI)
1444  CurrentBottom = priorII;
1445  else {
1446  if (&*CurrentTop == MI) {
1447  CurrentTop = nextIfDebug(++CurrentTop, priorII);
1449  }
1451  CurrentBottom = MI;
1453  }
1454  if (ShouldTrackPressure) {
1455  RegisterOperands RegOpers;
1456  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1457  if (ShouldTrackLaneMasks) {
1458  // Adjust liveness and add missing dead+read-undef flags.
1459  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1460  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1461  } else {
1462  // Adjust for missing dead-def flags.
1463  RegOpers.detectDeadDefs(*MI, *LIS);
1464  }
1465 
1469  BotRPTracker.recede(RegOpers, &LiveUses);
1470  assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1471  LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1473 
1475  updatePressureDiffs(LiveUses);
1476  }
1477  }
1478 }
1479 
1480 //===----------------------------------------------------------------------===//
1481 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1482 //===----------------------------------------------------------------------===//
1483 
1484 namespace {
1485 
1486 /// Post-process the DAG to create cluster edges between neighboring
1487 /// loads or between neighboring stores.
1488 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1489  struct MemOpInfo {
1490  SUnit *SU;
1492  int64_t Offset;
1493  unsigned Width;
1494 
1495  MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1496  int64_t Offset, unsigned Width)
1497  : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
1498  Width(Width) {}
1499 
1500  static bool Compare(const MachineOperand *const &A,
1501  const MachineOperand *const &B) {
1502  if (A->getType() != B->getType())
1503  return A->getType() < B->getType();
1504  if (A->isReg())
1505  return A->getReg() < B->getReg();
1506  if (A->isFI()) {
1507  const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1508  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1509  bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1511  return StackGrowsDown ? A->getIndex() > B->getIndex()
1512  : A->getIndex() < B->getIndex();
1513  }
1514 
1515  llvm_unreachable("MemOpClusterMutation only supports register or frame "
1516  "index bases.");
1517  }
1518 
1519  bool operator<(const MemOpInfo &RHS) const {
1520  // FIXME: Don't compare everything twice. Maybe use C++20 three way
1521  // comparison instead when it's available.
1522  if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
1523  RHS.BaseOps.begin(), RHS.BaseOps.end(),
1524  Compare))
1525  return true;
1526  if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
1527  BaseOps.begin(), BaseOps.end(), Compare))
1528  return false;
1529  if (Offset != RHS.Offset)
1530  return Offset < RHS.Offset;
1531  return SU->NodeNum < RHS.SU->NodeNum;
1532  }
1533  };
1534 
1535  const TargetInstrInfo *TII;
1536  const TargetRegisterInfo *TRI;
1537  bool IsLoad;
1538 
1539 public:
1540  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1541  const TargetRegisterInfo *tri, bool IsLoad)
1542  : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1543 
1544  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1545 
1546 protected:
1547  void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
1548  ScheduleDAGInstrs *DAG);
1549  void collectMemOpRecords(std::vector<SUnit> &SUnits,
1550  SmallVectorImpl<MemOpInfo> &MemOpRecords);
1551  bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1553 };
1554 
1555 class StoreClusterMutation : public BaseMemOpClusterMutation {
1556 public:
1557  StoreClusterMutation(const TargetInstrInfo *tii,
1558  const TargetRegisterInfo *tri)
1559  : BaseMemOpClusterMutation(tii, tri, false) {}
1560 };
1561 
1562 class LoadClusterMutation : public BaseMemOpClusterMutation {
1563 public:
1564  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1565  : BaseMemOpClusterMutation(tii, tri, true) {}
1566 };
1567 
1568 } // end anonymous namespace
1569 
1570 namespace llvm {
1571 
1572 std::unique_ptr<ScheduleDAGMutation>
1574  const TargetRegisterInfo *TRI) {
1575  return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
1576  : nullptr;
1577 }
1578 
1579 std::unique_ptr<ScheduleDAGMutation>
1581  const TargetRegisterInfo *TRI) {
1582  return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
1583  : nullptr;
1584 }
1585 
1586 } // end namespace llvm
1587 
1588 // Sorting all the loads/stores first, then for each load/store, checking the
1589 // following load/store one by one, until reach the first non-dependent one and
1590 // call target hook to see if they can cluster.
1591 // If FastCluster is enabled, we assume that, all the loads/stores have been
1592 // preprocessed and now, they didn't have dependencies on each other.
1593 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1594  ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
1595  ScheduleDAGInstrs *DAG) {
1596  // Keep track of the current cluster length and bytes for each SUnit.
1598 
1599  // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
1600  // cluster mem ops collected within `MemOpRecords` array.
1601  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1602  // Decision to cluster mem ops is taken based on target dependent logic
1603  auto MemOpa = MemOpRecords[Idx];
1604 
1605  // Seek for the next load/store to do the cluster.
1606  unsigned NextIdx = Idx + 1;
1607  for (; NextIdx < End; ++NextIdx)
1608  // Skip if MemOpb has been clustered already or has dependency with
1609  // MemOpa.
1610  if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
1611  (FastCluster ||
1612  (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
1613  !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
1614  break;
1615  if (NextIdx == End)
1616  continue;
1617 
1618  auto MemOpb = MemOpRecords[NextIdx];
1619  unsigned ClusterLength = 2;
1620  unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width;
1621  if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
1622  ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
1623  CurrentClusterBytes =
1624  SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
1625  }
1626 
1627  if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
1628  CurrentClusterBytes))
1629  continue;
1630 
1631  SUnit *SUa = MemOpa.SU;
1632  SUnit *SUb = MemOpb.SU;
1633  if (SUa->NodeNum > SUb->NodeNum)
1634  std::swap(SUa, SUb);
1635 
1636  // FIXME: Is this check really required?
1637  if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
1638  continue;
1639 
1640  LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1641  << SUb->NodeNum << ")\n");
1642  ++NumClustered;
1643 
1644  if (IsLoad) {
1645  // Copy successor edges from SUa to SUb. Interleaving computation
1646  // dependent on SUa can prevent load combining due to register reuse.
1647  // Predecessor edges do not need to be copied from SUb to SUa since
1648  // nearby loads should have effectively the same inputs.
1649  for (const SDep &Succ : SUa->Succs) {
1650  if (Succ.getSUnit() == SUb)
1651  continue;
1652  LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1653  << ")\n");
1654  DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1655  }
1656  } else {
1657  // Copy predecessor edges from SUb to SUa to avoid the SUnits that
1658  // SUb dependent on scheduled in-between SUb and SUa. Successor edges
1659  // do not need to be copied from SUa to SUb since no one will depend
1660  // on stores.
1661  // Notice that, we don't need to care about the memory dependency as
1662  // we won't try to cluster them if they have any memory dependency.
1663  for (const SDep &Pred : SUb->Preds) {
1664  if (Pred.getSUnit() == SUa)
1665  continue;
1666  LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
1667  << ")\n");
1668  DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
1669  }
1670  }
1671 
1672  SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
1673  CurrentClusterBytes};
1674 
1675  LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
1676  << ", Curr cluster bytes: " << CurrentClusterBytes
1677  << "\n");
1678  }
1679 }
1680 
1681 void BaseMemOpClusterMutation::collectMemOpRecords(
1682  std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
1683  for (auto &SU : SUnits) {
1684  if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1685  (!IsLoad && !SU.getInstr()->mayStore()))
1686  continue;
1687 
1688  const MachineInstr &MI = *SU.getInstr();
1690  int64_t Offset;
1691  bool OffsetIsScalable;
1692  unsigned Width;
1693  if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
1694  OffsetIsScalable, Width, TRI)) {
1695  MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width));
1696 
1697  LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
1698  << Offset << ", OffsetIsScalable: " << OffsetIsScalable
1699  << ", Width: " << Width << "\n");
1700  }
1701 #ifndef NDEBUG
1702  for (auto *Op : BaseOps)
1703  assert(Op);
1704 #endif
1705  }
1706 }
1707 
1708 bool BaseMemOpClusterMutation::groupMemOps(
1711  bool FastCluster =
1712  ForceFastCluster ||
1713  MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
1714 
1715  for (const auto &MemOp : MemOps) {
1716  unsigned ChainPredID = DAG->SUnits.size();
1717  if (FastCluster) {
1718  for (const SDep &Pred : MemOp.SU->Preds) {
1719  // We only want to cluster the mem ops that have the same ctrl(non-data)
1720  // pred so that they didn't have ctrl dependency for each other. But for
1721  // store instrs, we can still cluster them if the pred is load instr.
1722  if ((Pred.isCtrl() &&
1723  (IsLoad ||
1724  (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
1725  !Pred.isArtificial()) {
1726  ChainPredID = Pred.getSUnit()->NodeNum;
1727  break;
1728  }
1729  }
1730  } else
1731  ChainPredID = 0;
1732 
1733  Groups[ChainPredID].push_back(MemOp);
1734  }
1735  return FastCluster;
1736 }
1737 
1738 /// Callback from DAG postProcessing to create cluster edges for loads/stores.
1740  // Collect all the clusterable loads/stores
1741  SmallVector<MemOpInfo, 32> MemOpRecords;
1742  collectMemOpRecords(DAG->SUnits, MemOpRecords);
1743 
1744  if (MemOpRecords.size() < 2)
1745  return;
1746 
1747  // Put the loads/stores without dependency into the same group with some
1748  // heuristic if the DAG is too complex to avoid compiling time blow up.
1749  // Notice that, some fusion pair could be lost with this.
1751  bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
1752 
1753  for (auto &Group : Groups) {
1754  // Sorting the loads/stores, so that, we can stop the cluster as early as
1755  // possible.
1756  llvm::sort(Group.second);
1757 
1758  // Trying to cluster all the neighboring loads/stores.
1759  clusterNeighboringMemOps(Group.second, FastCluster, DAG);
1760  }
1761 }
1762 
1763 //===----------------------------------------------------------------------===//
1764 // CopyConstrain - DAG post-processing to encourage copy elimination.
1765 //===----------------------------------------------------------------------===//
1766 
1767 namespace {
1768 
1769 /// Post-process the DAG to create weak edges from all uses of a copy to
1770 /// the one use that defines the copy's source vreg, most likely an induction
1771 /// variable increment.
1772 class CopyConstrain : public ScheduleDAGMutation {
1773  // Transient state.
1774  SlotIndex RegionBeginIdx;
1775 
1776  // RegionEndIdx is the slot index of the last non-debug instruction in the
1777  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1778  SlotIndex RegionEndIdx;
1779 
1780 public:
1781  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1782 
1783  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1784 
1785 protected:
1786  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1787 };
1788 
1789 } // end anonymous namespace
1790 
1791 namespace llvm {
1792 
1793 std::unique_ptr<ScheduleDAGMutation>
1795  const TargetRegisterInfo *TRI) {
1796  return std::make_unique<CopyConstrain>(TII, TRI);
1797 }
1798 
1799 } // end namespace llvm
1800 
1801 /// constrainLocalCopy handles two possibilities:
1802 /// 1) Local src:
1803 /// I0: = dst
1804 /// I1: src = ...
1805 /// I2: = dst
1806 /// I3: dst = src (copy)
1807 /// (create pred->succ edges I0->I1, I2->I1)
1808 ///
1809 /// 2) Local copy:
1810 /// I0: dst = src (copy)
1811 /// I1: = dst
1812 /// I2: src = ...
1813 /// I3: = dst
1814 /// (create pred->succ edges I1->I2, I3->I2)
1815 ///
1816 /// Although the MachineScheduler is currently constrained to single blocks,
1817 /// this algorithm should handle extended blocks. An EBB is a set of
1818 /// contiguously numbered blocks such that the previous block in the EBB is
1819 /// always the single predecessor.
1820 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1821  LiveIntervals *LIS = DAG->getLIS();
1822  MachineInstr *Copy = CopySU->getInstr();
1823 
1824  // Check for pure vreg copies.
1825  const MachineOperand &SrcOp = Copy->getOperand(1);
1826  Register SrcReg = SrcOp.getReg();
1827  if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1828  return;
1829 
1830  const MachineOperand &DstOp = Copy->getOperand(0);
1831  Register DstReg = DstOp.getReg();
1832  if (!Register::isVirtualRegister(DstReg) || DstOp.isDead())
1833  return;
1834 
1835  // Check if either the dest or source is local. If it's live across a back
1836  // edge, it's not local. Note that if both vregs are live across the back
1837  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1838  // If both the copy's source and dest are local live intervals, then we
1839  // should treat the dest as the global for the purpose of adding
1840  // constraints. This adds edges from source's other uses to the copy.
1841  unsigned LocalReg = SrcReg;
1842  unsigned GlobalReg = DstReg;
1843  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1844  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1845  LocalReg = DstReg;
1846  GlobalReg = SrcReg;
1847  LocalLI = &LIS->getInterval(LocalReg);
1848  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1849  return;
1850  }
1851  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1852 
1853  // Find the global segment after the start of the local LI.
1854  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1855  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1856  // local live range. We could create edges from other global uses to the local
1857  // start, but the coalescer should have already eliminated these cases, so
1858  // don't bother dealing with it.
1859  if (GlobalSegment == GlobalLI->end())
1860  return;
1861 
1862  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1863  // returned the next global segment. But if GlobalSegment overlaps with
1864  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
1865  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1866  if (GlobalSegment->contains(LocalLI->beginIndex()))
1867  ++GlobalSegment;
1868 
1869  if (GlobalSegment == GlobalLI->end())
1870  return;
1871 
1872  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1873  if (GlobalSegment != GlobalLI->begin()) {
1874  // Two address defs have no hole.
1875  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1876  GlobalSegment->start)) {
1877  return;
1878  }
1879  // If the prior global segment may be defined by the same two-address
1880  // instruction that also defines LocalLI, then can't make a hole here.
1881  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1882  LocalLI->beginIndex())) {
1883  return;
1884  }
1885  // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1886  // it would be a disconnected component in the live range.
1887  assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1888  "Disconnected LRG within the scheduling region.");
1889  }
1890  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1891  if (!GlobalDef)
1892  return;
1893 
1894  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1895  if (!GlobalSU)
1896  return;
1897 
1898  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1899  // constraining the uses of the last local def to precede GlobalDef.
1900  SmallVector<SUnit*,8> LocalUses;
1901  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1902  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1903  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1904  for (const SDep &Succ : LastLocalSU->Succs) {
1905  if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1906  continue;
1907  if (Succ.getSUnit() == GlobalSU)
1908  continue;
1909  if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1910  return;
1911  LocalUses.push_back(Succ.getSUnit());
1912  }
1913  // Open the top of the GlobalLI hole by constraining any earlier global uses
1914  // to precede the start of LocalLI.
1915  SmallVector<SUnit*,8> GlobalUses;
1916  MachineInstr *FirstLocalDef =
1917  LIS->getInstructionFromIndex(LocalLI->beginIndex());
1918  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1919  for (const SDep &Pred : GlobalSU->Preds) {
1920  if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1921  continue;
1922  if (Pred.getSUnit() == FirstLocalSU)
1923  continue;
1924  if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1925  return;
1926  GlobalUses.push_back(Pred.getSUnit());
1927  }
1928  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1929  // Add the weak edges.
1930  for (SUnit *LU : LocalUses) {
1931  LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
1932  << GlobalSU->NodeNum << ")\n");
1933  DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
1934  }
1935  for (SUnit *GU : GlobalUses) {
1936  LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
1937  << FirstLocalSU->NodeNum << ")\n");
1938  DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
1939  }
1940 }
1941 
1942 /// Callback from DAG postProcessing to create weak edges to encourage
1943 /// copy elimination.
1944 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1945  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1946  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1947 
1948  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1949  if (FirstPos == DAG->end())
1950  return;
1951  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1952  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1953  *priorNonDebug(DAG->end(), DAG->begin()));
1954 
1955  for (SUnit &SU : DAG->SUnits) {
1956  if (!SU.getInstr()->isCopy())
1957  continue;
1958 
1959  constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1960  }
1961 }
1962 
1963 //===----------------------------------------------------------------------===//
1964 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1965 // and possibly other custom schedulers.
1966 //===----------------------------------------------------------------------===//
1967 
1968 static const unsigned InvalidCycle = ~0U;
1969 
1971 
1972 /// Given a Count of resource usage and a Latency value, return true if a
1973 /// SchedBoundary becomes resource limited.
1974 /// If we are checking after scheduling a node, we should return true when
1975 /// we just reach the resource limit.
1976 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1977  unsigned Latency, bool AfterSchedNode) {
1978  int ResCntFactor = (int)(Count - (Latency * LFactor));
1979  if (AfterSchedNode)
1980  return ResCntFactor >= (int)LFactor;
1981  else
1982  return ResCntFactor > (int)LFactor;
1983 }
1984 
1986  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1987  // Destroying and reconstructing it is very expensive though. So keep
1988  // invalid, placeholder HazardRecs.
1989  if (HazardRec && HazardRec->isEnabled()) {
1990  delete HazardRec;
1991  HazardRec = nullptr;
1992  }
1993  Available.clear();
1994  Pending.clear();
1995  CheckPending = false;
1996  CurrCycle = 0;
1997  CurrMOps = 0;
1998  MinReadyCycle = std::numeric_limits<unsigned>::max();
1999  ExpectedLatency = 0;
2000  DependentLatency = 0;
2001  RetiredMOps = 0;
2002  MaxExecutedResCount = 0;
2003  ZoneCritResIdx = 0;
2004  IsResourceLimited = false;
2005  ReservedCycles.clear();
2006  ReservedCyclesIndex.clear();
2007  ResourceGroupSubUnitMasks.clear();
2008 #ifndef NDEBUG
2009  // Track the maximum number of stall cycles that could arise either from the
2010  // latency of a DAG edge or the number of cycles that a processor resource is
2011  // reserved (SchedBoundary::ReservedCycles).
2012  MaxObservedStall = 0;
2013 #endif
2014  // Reserve a zero-count for invalid CritResIdx.
2015  ExecutedResCounts.resize(1);
2016  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2017 }
2018 
2019 void SchedRemainder::
2020 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
2021  reset();
2022  if (!SchedModel->hasInstrSchedModel())
2023  return;
2025  for (SUnit &SU : DAG->SUnits) {
2026  const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
2027  RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
2028  * SchedModel->getMicroOpFactor();
2030  PI = SchedModel->getWriteProcResBegin(SC),
2031  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2032  unsigned PIdx = PI->ProcResourceIdx;
2033  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2034  RemainingCounts[PIdx] += (Factor * PI->Cycles);
2035  }
2036  }
2037 }
2038 
2039 void SchedBoundary::
2041  reset();
2042  DAG = dag;
2043  SchedModel = smodel;
2044  Rem = rem;
2045  if (SchedModel->hasInstrSchedModel()) {
2046  unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2047  ReservedCyclesIndex.resize(ResourceCount);
2048  ExecutedResCounts.resize(ResourceCount);
2049  ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
2050  unsigned NumUnits = 0;
2051 
2052  for (unsigned i = 0; i < ResourceCount; ++i) {
2053  ReservedCyclesIndex[i] = NumUnits;
2054  NumUnits += SchedModel->getProcResource(i)->NumUnits;
2055  if (isUnbufferedGroup(i)) {
2056  auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
2057  for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
2058  U != UE; ++U)
2059  ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2060  }
2061  }
2062 
2063  ReservedCycles.resize(NumUnits, InvalidCycle);
2064  }
2065 }
2066 
2067 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2068 /// these "soft stalls" differently than the hard stall cycles based on CPU
2069 /// resources and computed by checkHazard(). A fully in-order model
2070 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
2071 /// available for scheduling until they are ready. However, a weaker in-order
2072 /// model may use this for heuristics. For example, if a processor has in-order
2073 /// behavior when reading certain resources, this may come into play.
2075  if (!SU->isUnbuffered)
2076  return 0;
2077 
2078  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2079  if (ReadyCycle > CurrCycle)
2080  return ReadyCycle - CurrCycle;
2081  return 0;
2082 }
2083 
2084 /// Compute the next cycle at which the given processor resource unit
2085 /// can be scheduled.
2087  unsigned Cycles) {
2088  unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2089  // If this resource has never been used, always return cycle zero.
2090  if (NextUnreserved == InvalidCycle)
2091  return 0;
2092  // For bottom-up scheduling add the cycles needed for the current operation.
2093  if (!isTop())
2094  NextUnreserved += Cycles;
2095  return NextUnreserved;
2096 }
2097 
2098 /// Compute the next cycle at which the given processor resource can be
2099 /// scheduled. Returns the next cycle and the index of the processor resource
2100 /// instance in the reserved cycles vector.
2101 std::pair<unsigned, unsigned>
2103  unsigned Cycles) {
2104 
2105  unsigned MinNextUnreserved = InvalidCycle;
2106  unsigned InstanceIdx = 0;
2107  unsigned StartIndex = ReservedCyclesIndex[PIdx];
2108  unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2109  assert(NumberOfInstances > 0 &&
2110  "Cannot have zero instances of a ProcResource");
2111 
2112  if (isUnbufferedGroup(PIdx)) {
2113  // If any subunits are used by the instruction, report that the resource
2114  // group is available at 0, effectively removing the group record from
2115  // hazarding and basing the hazarding decisions on the subunit records.
2116  // Otherwise, choose the first available instance from among the subunits.
2117  // Specifications which assign cycles to both the subunits and the group or
2118  // which use an unbuffered group with buffered subunits will appear to
2119  // schedule strangely. In the first case, the additional cycles for the
2120  // group will be ignored. In the second, the group will be ignored
2121  // entirely.
2122  for (const MCWriteProcResEntry &PE :
2125  if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2126  return std::make_pair(0u, StartIndex);
2127 
2128  auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2129  for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2130  unsigned NextUnreserved, NextInstanceIdx;
2131  std::tie(NextUnreserved, NextInstanceIdx) =
2132  getNextResourceCycle(SC, SubUnits[I], Cycles);
2133  if (MinNextUnreserved > NextUnreserved) {
2134  InstanceIdx = NextInstanceIdx;
2135  MinNextUnreserved = NextUnreserved;
2136  }
2137  }
2138  return std::make_pair(MinNextUnreserved, InstanceIdx);
2139  }
2140 
2141  for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2142  ++I) {
2143  unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
2144  if (MinNextUnreserved > NextUnreserved) {
2145  InstanceIdx = I;
2146  MinNextUnreserved = NextUnreserved;
2147  }
2148  }
2149  return std::make_pair(MinNextUnreserved, InstanceIdx);
2150 }
2151 
2152 /// Does this SU have a hazard within the current instruction group.
2153 ///
2154 /// The scheduler supports two modes of hazard recognition. The first is the
2155 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2156 /// supports highly complicated in-order reservation tables
2157 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2158 ///
2159 /// The second is a streamlined mechanism that checks for hazards based on
2160 /// simple counters that the scheduler itself maintains. It explicitly checks
2161 /// for instruction dispatch limitations, including the number of micro-ops that
2162 /// can dispatch per cycle.
2163 ///
2164 /// TODO: Also check whether the SU must start a new group.
2166  if (HazardRec->isEnabled()
2168  return true;
2169  }
2170 
2171  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2172  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2173  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
2174  << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2175  return true;
2176  }
2177 
2178  if (CurrMOps > 0 &&
2179  ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2180  (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2181  LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
2182  << (isTop() ? "begin" : "end") << " group\n");
2183  return true;
2184  }
2185 
2187  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2188  for (const MCWriteProcResEntry &PE :
2191  unsigned ResIdx = PE.ProcResourceIdx;
2192  unsigned Cycles = PE.Cycles;
2193  unsigned NRCycle, InstanceIdx;
2194  std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles);
2195  if (NRCycle > CurrCycle) {
2196 #ifndef NDEBUG
2197  MaxObservedStall = std::max(Cycles, MaxObservedStall);
2198 #endif
2199  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2200  << SchedModel->getResourceName(ResIdx)
2201  << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
2202  << "=" << NRCycle << "c\n");
2203  return true;
2204  }
2205  }
2206  }
2207  return false;
2208 }
2209 
2210 // Find the unscheduled node in ReadySUs with the highest latency.
2211 unsigned SchedBoundary::
2213  SUnit *LateSU = nullptr;
2214  unsigned RemLatency = 0;
2215  for (SUnit *SU : ReadySUs) {
2216  unsigned L = getUnscheduledLatency(SU);
2217  if (L > RemLatency) {
2218  RemLatency = L;
2219  LateSU = SU;
2220  }
2221  }
2222  if (LateSU) {
2223  LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2224  << LateSU->NodeNum << ") " << RemLatency << "c\n");
2225  }
2226  return RemLatency;
2227 }
2228 
2229 // Count resources in this zone and the remaining unscheduled
2230 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2231 // resource index, or zero if the zone is issue limited.
2232 unsigned SchedBoundary::
2233 getOtherResourceCount(unsigned &OtherCritIdx) {
2234  OtherCritIdx = 0;
2236  return 0;
2237 
2238  unsigned OtherCritCount = Rem->RemIssueCount
2239  + (RetiredMOps * SchedModel->getMicroOpFactor());
2240  LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2241  << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2242  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2243  PIdx != PEnd; ++PIdx) {
2244  unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2245  if (OtherCount > OtherCritCount) {
2246  OtherCritCount = OtherCount;
2247  OtherCritIdx = PIdx;
2248  }
2249  }
2250  if (OtherCritIdx) {
2251  LLVM_DEBUG(
2252  dbgs() << " " << Available.getName() << " + Remain CritRes: "
2253  << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2254  << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2255  }
2256  return OtherCritCount;
2257 }
2258 
2259 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2260  unsigned Idx) {
2261  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2262 
2263 #ifndef NDEBUG
2264  // ReadyCycle was been bumped up to the CurrCycle when this node was
2265  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2266  // scheduling, so may now be greater than ReadyCycle.
2267  if (ReadyCycle > CurrCycle)
2268  MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2269 #endif
2270 
2271  if (ReadyCycle < MinReadyCycle)
2272  MinReadyCycle = ReadyCycle;
2273 
2274  // Check for interlocks first. For the purpose of other heuristics, an
2275  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2276  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2277  bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2278  checkHazard(SU) || (Available.size() >= ReadyListLimit);
2279 
2280  if (!HazardDetected) {
2281  Available.push(SU);
2282 
2283  if (InPQueue)
2284  Pending.remove(Pending.begin() + Idx);
2285  return;
2286  }
2287 
2288  if (!InPQueue)
2289  Pending.push(SU);
2290 }
2291 
2292 /// Move the boundary of scheduled code by one cycle.
2293 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2294  if (SchedModel->getMicroOpBufferSize() == 0) {
2295  assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2296  "MinReadyCycle uninitialized");
2297  if (MinReadyCycle > NextCycle)
2298  NextCycle = MinReadyCycle;
2299  }
2300  // Update the current micro-ops, which will issue in the next cycle.
2301  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2302  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2303 
2304  // Decrement DependentLatency based on the next cycle.
2305  if ((NextCycle - CurrCycle) > DependentLatency)
2306  DependentLatency = 0;
2307  else
2308  DependentLatency -= (NextCycle - CurrCycle);
2309 
2310  if (!HazardRec->isEnabled()) {
2311  // Bypass HazardRec virtual calls.
2312  CurrCycle = NextCycle;
2313  } else {
2314  // Bypass getHazardType calls in case of long latency.
2315  for (; CurrCycle != NextCycle; ++CurrCycle) {
2316  if (isTop())
2318  else
2320  }
2321  }
2322  CheckPending = true;
2323  IsResourceLimited =
2325  getScheduledLatency(), true);
2326 
2327  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2328  << '\n');
2329 }
2330 
2331 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2332  ExecutedResCounts[PIdx] += Count;
2333  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2334  MaxExecutedResCount = ExecutedResCounts[PIdx];
2335 }
2336 
2337 /// Add the given processor resource to this scheduled zone.
2338 ///
2339 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2340 /// during which this resource is consumed.
2341 ///
2342 /// \return the next cycle at which the instruction may execute without
2343 /// oversubscribing resources.
2344 unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
2345  unsigned Cycles, unsigned NextCycle) {
2346  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2347  unsigned Count = Factor * Cycles;
2348  LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2349  << Cycles << "x" << Factor << "u\n");
2350 
2351  // Update Executed resources counts.
2352  incExecutedResources(PIdx, Count);
2353  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2354  Rem->RemainingCounts[PIdx] -= Count;
2355 
2356  // Check if this resource exceeds the current critical resource. If so, it
2357  // becomes the critical resource.
2358  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2359  ZoneCritResIdx = PIdx;
2360  LLVM_DEBUG(dbgs() << " *** Critical resource "
2361  << SchedModel->getResourceName(PIdx) << ": "
2363  << "c\n");
2364  }
2365  // For reserved resources, record the highest cycle using the resource.
2366  unsigned NextAvailable, InstanceIdx;
2367  std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(SC, PIdx, Cycles);
2368  if (NextAvailable > CurrCycle) {
2369  LLVM_DEBUG(dbgs() << " Resource conflict: "
2370  << SchedModel->getResourceName(PIdx)
2371  << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2372  << " reserved until @" << NextAvailable << "\n");
2373  }
2374  return NextAvailable;
2375 }
2376 
2377 /// Move the boundary of scheduled code by one SUnit.
2379  // Update the reservation table.
2380  if (HazardRec->isEnabled()) {
2381  if (!isTop() && SU->isCall) {
2382  // Calls are scheduled with their preceding instructions. For bottom-up
2383  // scheduling, clear the pipeline state before emitting.
2384  HazardRec->Reset();
2385  }
2387  // Scheduling an instruction may have made pending instructions available.
2388  CheckPending = true;
2389  }
2390  // checkHazard should prevent scheduling multiple instructions per cycle that
2391  // exceed the issue width.
2392  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2393  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2394  assert(
2395  (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2396  "Cannot schedule this instruction's MicroOps in the current cycle.");
2397 
2398  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2399  LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2400 
2401  unsigned NextCycle = CurrCycle;
2402  switch (SchedModel->getMicroOpBufferSize()) {
2403  case 0:
2404  assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2405  break;
2406  case 1:
2407  if (ReadyCycle > NextCycle) {
2408  NextCycle = ReadyCycle;
2409  LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2410  }
2411  break;
2412  default:
2413  // We don't currently model the OOO reorder buffer, so consider all
2414  // scheduled MOps to be "retired". We do loosely model in-order resource
2415  // latency. If this instruction uses an in-order resource, account for any
2416  // likely stall cycles.
2417  if (SU->isUnbuffered && ReadyCycle > NextCycle)
2418  NextCycle = ReadyCycle;
2419  break;
2420  }
2421  RetiredMOps += IncMOps;
2422 
2423  // Update resource counts and critical resource.
2424  if (SchedModel->hasInstrSchedModel()) {
2425  unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2426  assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2427  Rem->RemIssueCount -= DecRemIssue;
2428  if (ZoneCritResIdx) {
2429  // Scale scheduled micro-ops for comparing with the critical resource.
2430  unsigned ScaledMOps =
2431  RetiredMOps * SchedModel->getMicroOpFactor();
2432 
2433  // If scaled micro-ops are now more than the previous critical resource by
2434  // a full cycle, then micro-ops issue becomes critical.
2435  if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2436  >= (int)SchedModel->getLatencyFactor()) {
2437  ZoneCritResIdx = 0;
2438  LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2439  << ScaledMOps / SchedModel->getLatencyFactor()
2440  << "c\n");
2441  }
2442  }
2445  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2446  unsigned RCycle =
2447  countResource(SC, PI->ProcResourceIdx, PI->Cycles, NextCycle);
2448  if (RCycle > NextCycle)
2449  NextCycle = RCycle;
2450  }
2451  if (SU->hasReservedResource) {
2452  // For reserved resources, record the highest cycle using the resource.
2453  // For top-down scheduling, this is the cycle in which we schedule this
2454  // instruction plus the number of cycles the operations reserves the
2455  // resource. For bottom-up is it simply the instruction's cycle.
2458  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2459  unsigned PIdx = PI->ProcResourceIdx;
2460  if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2461  unsigned ReservedUntil, InstanceIdx;
2462  std::tie(ReservedUntil, InstanceIdx) =
2463  getNextResourceCycle(SC, PIdx, 0);
2464  if (isTop()) {
2465  ReservedCycles[InstanceIdx] =
2466  std::max(ReservedUntil, NextCycle + PI->Cycles);
2467  } else
2468  ReservedCycles[InstanceIdx] = NextCycle;
2469  }
2470  }
2471  }
2472  }
2473  // Update ExpectedLatency and DependentLatency.
2474  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2475  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2476  if (SU->getDepth() > TopLatency) {
2477  TopLatency = SU->getDepth();
2478  LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2479  << SU->NodeNum << ") " << TopLatency << "c\n");
2480  }
2481  if (SU->getHeight() > BotLatency) {
2482  BotLatency = SU->getHeight();
2483  LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2484  << SU->NodeNum << ") " << BotLatency << "c\n");
2485  }
2486  // If we stall for any reason, bump the cycle.
2487  if (NextCycle > CurrCycle)
2488  bumpCycle(NextCycle);
2489  else
2490  // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2491  // resource limited. If a stall occurred, bumpCycle does this.
2492  IsResourceLimited =
2494  getScheduledLatency(), true);
2495 
2496  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2497  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2498  // one cycle. Since we commonly reach the max MOps here, opportunistically
2499  // bump the cycle to avoid uselessly checking everything in the readyQ.
2500  CurrMOps += IncMOps;
2501 
2502  // Bump the cycle count for issue group constraints.
2503  // This must be done after NextCycle has been adjust for all other stalls.
2504  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2505  // currCycle to X.
2506  if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2507  (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2508  LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2509  << " group\n");
2510  bumpCycle(++NextCycle);
2511  }
2512 
2513  while (CurrMOps >= SchedModel->getIssueWidth()) {
2514  LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2515  << CurrCycle << '\n');
2516  bumpCycle(++NextCycle);
2517  }
2519 }
2520 
2521 /// Release pending ready nodes in to the available queue. This makes them
2522 /// visible to heuristics.
2524  // If the available queue is empty, it is safe to reset MinReadyCycle.
2525  if (Available.empty())
2526  MinReadyCycle = std::numeric_limits<unsigned>::max();
2527 
2528  // Check to see if any of the pending instructions are ready to issue. If
2529  // so, add them to the available queue.
2530  for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
2531  SUnit *SU = *(Pending.begin() + I);
2532  unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2533 
2534  if (ReadyCycle < MinReadyCycle)
2535  MinReadyCycle = ReadyCycle;
2536 
2537  if (Available.size() >= ReadyListLimit)
2538  break;
2539 
2540  releaseNode(SU, ReadyCycle, true, I);
2541  if (E != Pending.size()) {
2542  --I;
2543  --E;
2544  }
2545  }
2546  CheckPending = false;
2547 }
2548 
2549 /// Remove SU from the ready set for this boundary.
2551  if (Available.isInQueue(SU))
2553  else {
2554  assert(Pending.isInQueue(SU) && "bad ready count");
2555  Pending.remove(Pending.find(SU));
2556  }
2557 }
2558 
2559 /// If this queue only has one ready candidate, return it. As a side effect,
2560 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2561 /// one node is ready. If multiple instructions are ready, return NULL.
2563  if (CheckPending)
2564  releasePending();
2565 
2566  // Defer any ready instrs that now have a hazard.
2567  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2568  if (checkHazard(*I)) {
2569  Pending.push(*I);
2570  I = Available.remove(I);
2571  continue;
2572  }
2573  ++I;
2574  }
2575  for (unsigned i = 0; Available.empty(); ++i) {
2576 // FIXME: Re-enable assert once PR20057 is resolved.
2577 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2578 // "permanent hazard");
2579  (void)i;
2580  bumpCycle(CurrCycle + 1);
2581  releasePending();
2582  }
2583 
2584  LLVM_DEBUG(Pending.dump());
2586 
2587  if (Available.size() == 1)
2588  return *Available.begin();
2589  return nullptr;
2590 }
2591 
2592 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2593 // This is useful information to dump after bumpNode.
2594 // Note that the Queue contents are more useful before pickNodeFromQueue.
2596  unsigned ResFactor;
2597  unsigned ResCount;
2598  if (ZoneCritResIdx) {
2599  ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2600  ResCount = getResourceCount(ZoneCritResIdx);
2601  } else {
2602  ResFactor = SchedModel->getMicroOpFactor();
2603  ResCount = RetiredMOps * ResFactor;
2604  }
2605  unsigned LFactor = SchedModel->getLatencyFactor();
2606  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2607  << " Retired: " << RetiredMOps;
2608  dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2609  dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2610  << ResCount / ResFactor << " "
2611  << SchedModel->getResourceName(ZoneCritResIdx)
2612  << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2613  << (IsResourceLimited ? " - Resource" : " - Latency")
2614  << " limited.\n";
2615 }
2616 #endif
2617 
2618 //===----------------------------------------------------------------------===//
2619 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2620 //===----------------------------------------------------------------------===//
2621 
2624  const TargetSchedModel *SchedModel) {
2626  return;
2627 
2628  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2631  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2632  if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2633  ResDelta.CritResources += PI->Cycles;
2634  if (PI->ProcResourceIdx == Policy.DemandResIdx)
2635  ResDelta.DemandedResources += PI->Cycles;
2636  }
2637 }
2638 
2639 /// Compute remaining latency. We need this both to determine whether the
2640 /// overall schedule has become latency-limited and whether the instructions
2641 /// outside this zone are resource or latency limited.
2642 ///
2643 /// The "dependent" latency is updated incrementally during scheduling as the
2644 /// max height/depth of scheduled nodes minus the cycles since it was
2645 /// scheduled:
2646 /// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2647 ///
2648 /// The "independent" latency is the max ready queue depth:
2649 /// ILat = max N.depth for N in Available|Pending
2650 ///
2651 /// RemainingLatency is the greater of independent and dependent latency.
2652 ///
2653 /// These computations are expensive, especially in DAGs with many edges, so
2654 /// only do them if necessary.
2655 static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2656  unsigned RemLatency = CurrZone.getDependentLatency();
2657  RemLatency = std::max(RemLatency,
2658  CurrZone.findMaxLatency(CurrZone.Available.elements()));
2659  RemLatency = std::max(RemLatency,
2660  CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2661  return RemLatency;
2662 }
2663 
2664 /// Returns true if the current cycle plus remaning latency is greater than
2665 /// the critical path in the scheduling region.
2666 bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2667  SchedBoundary &CurrZone,
2668  bool ComputeRemLatency,
2669  unsigned &RemLatency) const {
2670  // The current cycle is already greater than the critical path, so we are
2671  // already latency limited and don't need to compute the remaining latency.
2672  if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2673  return true;
2674 
2675  // If we haven't scheduled anything yet, then we aren't latency limited.
2676  if (CurrZone.getCurrCycle() == 0)
2677  return false;
2678 
2679  if (ComputeRemLatency)
2680  RemLatency = computeRemLatency(CurrZone);
2681 
2682  return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2683 }
2684 
2685 /// Set the CandPolicy given a scheduling zone given the current resources and
2686 /// latencies inside and outside the zone.
2687 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2688  SchedBoundary &CurrZone,
2689  SchedBoundary *OtherZone) {
2690  // Apply preemptive heuristics based on the total latency and resources
2691  // inside and outside this zone. Potential stalls should be considered before
2692  // following this policy.
2693 
2694  // Compute the critical resource outside the zone.
2695  unsigned OtherCritIdx = 0;
2696  unsigned OtherCount =
2697  OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2698 
2699  bool OtherResLimited = false;
2700  unsigned RemLatency = 0;
2701  bool RemLatencyComputed = false;
2702  if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2703  RemLatency = computeRemLatency(CurrZone);
2704  RemLatencyComputed = true;
2705  OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2706  OtherCount, RemLatency, false);
2707  }
2708 
2709  // Schedule aggressively for latency in PostRA mode. We don't check for
2710  // acyclic latency during PostRA, and highly out-of-order processors will
2711  // skip PostRA scheduling.
2712  if (!OtherResLimited &&
2713  (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2714  RemLatency))) {
2715  Policy.ReduceLatency |= true;
2716  LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
2717  << " RemainingLatency " << RemLatency << " + "
2718  << CurrZone.getCurrCycle() << "c > CritPath "
2719  << Rem.CriticalPath << "\n");
2720  }
2721  // If the same resource is limiting inside and outside the zone, do nothing.
2722  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2723  return;
2724 
2725  LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2726  dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2727  << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2728  } if (OtherResLimited) dbgs()
2729  << " RemainingLimit: "
2730  << SchedModel->getResourceName(OtherCritIdx) << "\n";
2731  if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2732  << " Latency limited both directions.\n");
2733 
2734  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2735  Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2736 
2737  if (OtherResLimited)
2738  Policy.DemandResIdx = OtherCritIdx;
2739 }
2740 
2741 #ifndef NDEBUG
2744  switch (Reason) {
2745  case NoCand: return "NOCAND ";
2746  case Only1: return "ONLY1 ";
2747  case PhysReg: return "PHYS-REG ";
2748  case RegExcess: return "REG-EXCESS";
2749  case RegCritical: return "REG-CRIT ";
2750  case Stall: return "STALL ";
2751  case Cluster: return "CLUSTER ";
2752  case Weak: return "WEAK ";
2753  case RegMax: return "REG-MAX ";
2754  case ResourceReduce: return "RES-REDUCE";
2755  case ResourceDemand: return "RES-DEMAND";
2756  case TopDepthReduce: return "TOP-DEPTH ";
2757  case TopPathReduce: return "TOP-PATH ";
2758  case BotHeightReduce:return "BOT-HEIGHT";
2759  case BotPathReduce: return "BOT-PATH ";
2760  case NextDefUse: return "DEF-USE ";
2761  case NodeOrder: return "ORDER ";
2762  };
2763  llvm_unreachable("Unknown reason!");
2764 }
2765 
2767  PressureChange P;
2768  unsigned ResIdx = 0;
2769  unsigned Latency = 0;
2770  switch (Cand.Reason) {
2771  default:
2772  break;
2773  case RegExcess:
2774  P = Cand.RPDelta.Excess;
2775  break;
2776  case RegCritical:
2777  P = Cand.RPDelta.CriticalMax;
2778  break;
2779  case RegMax:
2780  P = Cand.RPDelta.CurrentMax;
2781  break;
2782  case ResourceReduce:
2783  ResIdx = Cand.Policy.ReduceResIdx;
2784  break;
2785  case ResourceDemand:
2786  ResIdx = Cand.Policy.DemandResIdx;
2787  break;
2788  case TopDepthReduce:
2789  Latency = Cand.SU->getDepth();
2790  break;
2791  case TopPathReduce:
2792  Latency = Cand.SU->getHeight();
2793  break;
2794  case BotHeightReduce:
2795  Latency = Cand.SU->getHeight();
2796  break;
2797  case BotPathReduce:
2798  Latency = Cand.SU->getDepth();
2799  break;
2800  }
2801  dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2802  if (P.isValid())
2803  dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2804  << ":" << P.getUnitInc() << " ";
2805  else
2806  dbgs() << " ";
2807  if (ResIdx)
2808  dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2809  else
2810  dbgs() << " ";
2811  if (Latency)
2812  dbgs() << " " << Latency << " cycles ";
2813  else
2814  dbgs() << " ";
2815  dbgs() << '\n';
2816 }
2817 #endif
2818 
2819 namespace llvm {
2820 /// Return true if this heuristic determines order.
2821 /// TODO: Consider refactor return type of these functions as integer or enum,
2822 /// as we may need to differentiate whether TryCand is better than Cand.
2823 bool tryLess(int TryVal, int CandVal,
2827  if (TryVal < CandVal) {
2828  TryCand.Reason = Reason;
2829  return true;
2830  }
2831  if (TryVal > CandVal) {
2832  if (Cand.Reason > Reason)
2833  Cand.Reason = Reason;
2834  return true;
2835  }
2836  return false;
2837 }
2838 
2839 bool tryGreater(int TryVal, int CandVal,
2843  if (TryVal > CandVal) {
2844  TryCand.Reason = Reason;
2845  return true;
2846  }
2847  if (TryVal < CandVal) {
2848  if (Cand.Reason > Reason)
2849  Cand.Reason = Reason;
2850  return true;
2851  }
2852  return false;
2853 }
2854 
2857  SchedBoundary &Zone) {
2858  if (Zone.isTop()) {
2859  // Prefer the candidate with the lesser depth, but only if one of them has
2860  // depth greater than the total latency scheduled so far, otherwise either
2861  // of them could be scheduled now with no stall.
2862  if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
2863  Zone.getScheduledLatency()) {
2864  if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2865  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2866  return true;
2867  }
2868  if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2869  TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2870  return true;
2871  } else {
2872  // Prefer the candidate with the lesser height, but only if one of them has
2873  // height greater than the total latency scheduled so far, otherwise either
2874  // of them could be scheduled now with no stall.
2875  if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
2876  Zone.getScheduledLatency()) {
2877  if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2878  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2879  return true;
2880  }
2881  if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2882  TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2883  return true;
2884  }
2885  return false;
2886 }
2887 } // end namespace llvm
2888 
2889 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2890  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2891  << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2892 }
2893 
2895  tracePick(Cand.Reason, Cand.AtTop);
2896 }
2897 
2899  assert(dag->hasVRegLiveness() &&
2900  "(PreRA)GenericScheduler needs vreg liveness");
2901  DAG = static_cast<ScheduleDAGMILive*>(dag);
2902  SchedModel = DAG->getSchedModel();
2903  TRI = DAG->TRI;
2904 
2905  if (RegionPolicy.ComputeDFSResult)
2906  DAG->computeDFSResult();
2907 
2908  Rem.init(DAG, SchedModel);
2909  Top.init(DAG, SchedModel, &Rem);
2910  Bot.init(DAG, SchedModel, &Rem);
2911 
2912  // Initialize resource counts.
2913 
2914  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2915  // are disabled, then these HazardRecs will be disabled.
2917  if (!Top.HazardRec) {
2918  Top.HazardRec =
2920  Itin, DAG);
2921  }
2922  if (!Bot.HazardRec) {
2923  Bot.HazardRec =
2925  Itin, DAG);
2926  }
2927  TopCand.SU = nullptr;
2928  BotCand.SU = nullptr;
2929 }
2930 
2931 /// Initialize the per-region scheduling policy.
2934  unsigned NumRegionInstrs) {
2935  const MachineFunction &MF = *Begin->getMF();
2936  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2937 
2938  // Avoid setting up the register pressure tracker for small regions to save
2939  // compile time. As a rough heuristic, only track pressure when the number of
2940  // schedulable instructions exceeds half the integer register file.
2941  RegionPolicy.ShouldTrackPressure = true;
2942  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2943  MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2944  if (TLI->isTypeLegal(LegalIntVT)) {
2945  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2946  TLI->getRegClassFor(LegalIntVT));
2947  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2948  }
2949  }
2950 
2951  // For generic targets, we default to bottom-up, because it's simpler and more
2952  // compile-time optimizations have been implemented in that direction.
2953  RegionPolicy.OnlyBottomUp = true;
2954 
2955  // Allow the subtarget to override default policy.
2956  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2957 
2958  // After subtarget overrides, apply command line options.
2959  if (!EnableRegPressure) {
2960  RegionPolicy.ShouldTrackPressure = false;
2961  RegionPolicy.ShouldTrackLaneMasks = false;
2962  }
2963 
2964  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2965  // e.g. -misched-bottomup=false allows scheduling in both directions.
2967  "-misched-topdown incompatible with -misched-bottomup");
2968  if (ForceBottomUp.getNumOccurrences() > 0) {
2969  RegionPolicy.OnlyBottomUp = ForceBottomUp;
2970  if (RegionPolicy.OnlyBottomUp)
2971  RegionPolicy.OnlyTopDown = false;
2972  }
2973  if (ForceTopDown.getNumOccurrences() > 0) {
2974  RegionPolicy.OnlyTopDown = ForceTopDown;
2975  if (RegionPolicy.OnlyTopDown)
2976  RegionPolicy.OnlyBottomUp = false;
2977  }
2978 }
2979 
2981  // Cannot completely remove virtual function even in release mode.
2982 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2983  dbgs() << "GenericScheduler RegionPolicy: "
2984  << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2985  << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2986  << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2987  << "\n";
2988 #endif
2989 }
2990 
2991 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2992 /// critical path by more cycles than it takes to drain the instruction buffer.
2993 /// We estimate an upper bounds on in-flight instructions as:
2994 ///
2995 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2996 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2997 /// InFlightResources = InFlightIterations * LoopResources
2998 ///
2999 /// TODO: Check execution resources in addition to IssueCount.
3002  return;
3003 
3004  // Scaled number of cycles per loop iteration.
3005  unsigned IterCount =
3007  Rem.RemIssueCount);
3008  // Scaled acyclic critical path.
3009  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3010  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
3011  unsigned InFlightCount =
3012  (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3013  unsigned BufferLimit =
3015 
3016  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3017 
3018  LLVM_DEBUG(
3019  dbgs() << "IssueCycles="
3021  << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3022  << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3023  << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3024  << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3025  if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
3026 }
3027 
3029  Rem.CriticalPath = DAG->ExitSU.getDepth();
3030 
3031  // Some roots may not feed into ExitSU. Check all of them in case.
3032  for (const SUnit *SU : Bot.Available) {
3033  if (SU->getDepth() > Rem.CriticalPath)
3034  Rem.CriticalPath = SU->getDepth();
3035  }
3036  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3037  if (DumpCriticalPathLength) {
3038  errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3039  }
3040 
3042  Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3043  checkAcyclicLatency();
3044  }
3045 }
3046 
3047 namespace llvm {
3048 bool tryPressure(const PressureChange &TryP,
3049  const PressureChange &CandP,
3053  const TargetRegisterInfo *TRI,
3054  const MachineFunction &MF) {
3055  // If one candidate decreases and the other increases, go with it.
3056  // Invalid candidates have UnitInc==0.
3057  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
3058  Reason)) {
3059  return true;
3060  }
3061  // Do not compare the magnitude of pressure changes between top and bottom
3062  // boundary.
3063  if (Cand.AtTop != TryCand.AtTop)
3064  return false;
3065 
3066  // If both candidates affect the same set in the same boundary, go with the
3067  // smallest increase.
3068  unsigned TryPSet = TryP.getPSetOrMax();
3069  unsigned CandPSet = CandP.getPSetOrMax();
3070  if (TryPSet == CandPSet) {
3071  return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
3072  Reason);
3073  }
3074 
3075  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3077 
3078  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3080 
3081  // If the candidates are decreasing pressure, reverse priority.
3082  if (TryP.getUnitInc() < 0)
3083  std::swap(TryRank, CandRank);
3084  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3085 }
3086 
3087 unsigned getWeakLeft(const SUnit *SU, bool isTop) {
3088  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3089 }
3090 
3091 /// Minimize physical register live ranges. Regalloc wants them adjacent to
3092 /// their physreg def/use.
3093 ///
3094 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3095 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3096 /// with the operation that produces or consumes the physreg. We'll do this when
3097 /// regalloc has support for parallel copies.
3098 int biasPhysReg(const SUnit *SU, bool isTop) {
3099  const MachineInstr *MI = SU->getInstr();
3100 
3101  if (MI->isCopy()) {
3102  unsigned ScheduledOper = isTop ? 1 : 0;
3103  unsigned UnscheduledOper = isTop ? 0 : 1;
3104  // If we have already scheduled the physreg produce/consumer, immediately
3105  // schedule the copy.
3106  if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg()))
3107  return 1;
3108  // If the physreg is at the boundary, defer it. Otherwise schedule it
3109  // immediately to free the dependent. We can hoist the copy later.
3110  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3111  if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg()))
3112  return AtBoundary ? -1 : 1;
3113  }
3114 
3115  if (MI->isMoveImmediate()) {
3116  // If we have a move immediate and all successors have been assigned, bias
3117  // towards scheduling this later. Make sure all register defs are to
3118  // physical registers.
3119  bool DoBias = true;
3120  for (const MachineOperand &Op : MI->defs()) {
3121  if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) {
3122  DoBias = false;
3123  break;
3124  }
3125  }
3126 
3127  if (DoBias)
3128  return isTop ? -1 : 1;
3129  }
3130 
3131  return 0;
3132 }
3133 } // end namespace llvm
3134 
3136  bool AtTop,
3137  const RegPressureTracker &RPTracker,
3138  RegPressureTracker &TempTracker) {
3139  Cand.SU = SU;
3140  Cand.AtTop = AtTop;
3141  if (DAG->isTrackingPressure()) {
3142  if (AtTop) {
3143  TempTracker.getMaxDownwardPressureDelta(
3144  Cand.SU->getInstr(),
3145  Cand.RPDelta,
3146  DAG->getRegionCriticalPSets(),
3147  DAG->getRegPressure().MaxSetPressure);
3148  } else {
3149  if (VerifyScheduling) {
3150  TempTracker.getMaxUpwardPressureDelta(
3151  Cand.SU->getInstr(),
3152  &DAG->getPressureDiff(Cand.SU),
3153  Cand.RPDelta,
3154  DAG->getRegionCriticalPSets(),
3155  DAG->getRegPressure().MaxSetPressure);
3156  } else {
3157  RPTracker.getUpwardPressureDelta(
3158  Cand.SU->getInstr(),
3159  DAG->getPressureDiff(Cand.SU),
3160  Cand.RPDelta,
3161  DAG->getRegionCriticalPSets(),
3162  DAG->getRegPressure().MaxSetPressure);
3163  }
3164  }
3165  }
3166  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3167  << " Try SU(" << Cand.SU->NodeNum << ") "
3168  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
3169  << Cand.RPDelta.Excess.getUnitInc() << "\n");
3170 }
3171 
3172 /// Apply a set of heuristics to a new candidate. Heuristics are currently
3173 /// hierarchical. This may be more efficient than a graduated cost model because
3174 /// we don't need to evaluate all aspects of the model for each node in the
3175 /// queue. But it's really done to make the heuristics easier to debug and
3176 /// statistically analyze.
3177 ///
3178 /// \param Cand provides the policy and current best candidate.
3179 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3180 /// \param Zone describes the scheduled zone that we are extending, or nullptr
3181 /// if Cand is from a different zone than TryCand.
3182 /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3184  SchedCandidate &TryCand,
3185  SchedBoundary *Zone) const {
3186  // Initialize the candidate if needed.
3187  if (!Cand.isValid()) {
3188  TryCand.Reason = NodeOrder;
3189  return true;
3190  }
3191 
3192  // Bias PhysReg Defs and copies to their uses and defined respectively.
3193  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3194  biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3195  return TryCand.Reason != NoCand;
3196 
3197  // Avoid exceeding the target's limit.
3198  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3199  Cand.RPDelta.Excess,
3200  TryCand, Cand, RegExcess, TRI,
3201  DAG->MF))
3202  return TryCand.Reason != NoCand;
3203 
3204  // Avoid increasing the max critical pressure in the scheduled region.
3205  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3206  Cand.RPDelta.CriticalMax,
3207  TryCand, Cand, RegCritical, TRI,
3208  DAG->MF))
3209  return TryCand.Reason != NoCand;
3210 
3211  // We only compare a subset of features when comparing nodes between
3212  // Top and Bottom boundary. Some properties are simply incomparable, in many
3213  // other instances we should only override the other boundary if something
3214  // is a clear good pick on one boundary. Skip heuristics that are more
3215  // "tie-breaking" in nature.
3216  bool SameBoundary = Zone != nullptr;
3217  if (SameBoundary) {
3218  // For loops that are acyclic path limited, aggressively schedule for
3219  // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3220  // heuristics to take precedence.
3221  if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3222  tryLatency(TryCand, Cand, *Zone))
3223  return TryCand.Reason != NoCand;
3224 
3225  // Prioritize instructions that read unbuffered resources by stall cycles.
3226  if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3227  Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3228  return TryCand.Reason != NoCand;
3229  }
3230 
3231  // Keep clustered nodes together to encourage downstream peephole
3232  // optimizations which may reduce resource requirements.
3233  //
3234  // This is a best effort to set things up for a post-RA pass. Optimizations
3235  // like generating loads of multiple registers should ideally be done within
3236  // the scheduler pass by combining the loads during DAG postprocessing.
3237  const SUnit *CandNextClusterSU =
3238  Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3239  const SUnit *TryCandNextClusterSU =
3240  TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3241  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3242  Cand.SU == CandNextClusterSU,
3243  TryCand, Cand, Cluster))
3244  return TryCand.Reason != NoCand;
3245 
3246  if (SameBoundary) {
3247  // Weak edges are for clustering and other constraints.
3248  if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3249  getWeakLeft(Cand.SU, Cand.AtTop),
3250  TryCand, Cand, Weak))
3251  return TryCand.Reason != NoCand;
3252  }
3253 
3254  // Avoid increasing the max pressure of the entire region.
3255  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3256  Cand.RPDelta.CurrentMax,
3257  TryCand, Cand, RegMax, TRI,
3258  DAG->MF))
3259  return TryCand.Reason != NoCand;
3260 
3261  if (SameBoundary) {
3262  // Avoid critical resource consumption and balance the schedule.
3263  TryCand.initResourceDelta(DAG, SchedModel);
3265  TryCand, Cand, ResourceReduce))
3266  return TryCand.Reason != NoCand;
3269  TryCand, Cand, ResourceDemand))
3270  return TryCand.Reason != NoCand;
3271 
3272  // Avoid serializing long latency dependence chains.
3273  // For acyclic path limited loops, latency was already checked above.
3274  if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3275  !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3276  return TryCand.Reason != NoCand;
3277 
3278  // Fall through to original instruction order.
3279  if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3280  || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3281  TryCand.Reason = NodeOrder;
3282  return true;
3283  }
3284  }
3285 
3286  return false;
3287 }
3288 
3289 /// Pick the best candidate from the queue.
3290 ///
3291 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3292 /// DAG building. To adjust for the current scheduling location we need to
3293 /// maintain the number of vreg uses remaining to be top-scheduled.
3295  const CandPolicy &ZonePolicy,
3296  const RegPressureTracker &RPTracker,
3297  SchedCandidate &Cand) {
3298  // getMaxPressureDelta temporarily modifies the tracker.
3299  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3300 
3301  ReadyQueue &Q = Zone.Available;
3302  for (SUnit *SU : Q) {
3303 
3304  SchedCandidate TryCand(ZonePolicy);
3305  initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3306  // Pass SchedBoundary only when comparing nodes from the same boundary.
3307  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3308  if (tryCandidate(Cand, TryCand, ZoneArg)) {
3309  // Initialize resource delta if needed in case future heuristics query it.
3310  if (TryCand.ResDelta == SchedResourceDelta())
3311  TryCand.initResourceDelta(DAG, SchedModel);
3312  Cand.setBest(TryCand);
3313  LLVM_DEBUG(traceCandidate(Cand));
3314  }
3315  }
3316 }
3317 
3318 /// Pick the best candidate node from either the top or bottom queue.
3320  // Schedule as far as possible in the direction of no choice. This is most
3321  // efficient, but also provides the best heuristics for CriticalPSets.
3322  if (SUnit *SU = Bot.pickOnlyChoice()) {
3323  IsTopNode = false;
3324  tracePick(Only1, false);
3325  return SU;
3326  }
3327  if (SUnit *SU = Top.pickOnlyChoice()) {
3328  IsTopNode = true;
3329  tracePick(Only1, true);
3330  return SU;
3331  }
3332  // Set the bottom-up policy based on the state of the current bottom zone and
3333  // the instructions outside the zone, including the top zone.
3334  CandPolicy BotPolicy;
3335  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3336  // Set the top-down policy based on the state of the current top zone and
3337  // the instructions outside the zone, including the bottom zone.
3338  CandPolicy TopPolicy;
3339  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3340 
3341  // See if BotCand is still valid (because we previously scheduled from Top).
3342  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3343  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3344  BotCand.Policy != BotPolicy) {
3345  BotCand.reset(CandPolicy());
3346  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3347  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3348  } else {
3349  LLVM_DEBUG(traceCandidate(BotCand));
3350 #ifndef NDEBUG
3351  if (VerifyScheduling) {
3352  SchedCandidate TCand;
3353  TCand.reset(CandPolicy());
3354  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3355  assert(TCand.SU == BotCand.SU &&
3356  "Last pick result should correspond to re-picking right now");
3357  }
3358 #endif
3359  }
3360 
3361  // Check if the top Q has a better candidate.
3362  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3363  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3364  TopCand.Policy != TopPolicy) {
3365  TopCand.reset(CandPolicy());
3366  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3367  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3368  } else {
3369  LLVM_DEBUG(traceCandidate(TopCand));
3370 #ifndef NDEBUG
3371  if (VerifyScheduling) {
3372  SchedCandidate TCand;
3373  TCand.reset(CandPolicy());
3374  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3375  assert(TCand.SU == TopCand.SU &&
3376  "Last pick result should correspond to re-picking right now");
3377  }
3378 #endif
3379  }
3380 
3381  // Pick best from BotCand and TopCand.
3382  assert(BotCand.isValid());
3383  assert(TopCand.isValid());
3384  SchedCandidate Cand = BotCand;
3385  TopCand.Reason = NoCand;
3386  if (tryCandidate(Cand, TopCand, nullptr)) {
3387  Cand.setBest(TopCand);
3388  LLVM_DEBUG(traceCandidate(Cand));
3389  }
3390 
3391  IsTopNode = Cand.AtTop;
3392  tracePick(Cand);
3393  return Cand.SU;
3394 }
3395 
3396 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3398  if (DAG->top() == DAG->bottom()) {
3399  assert(Top.Available.empty() && Top.Pending.empty() &&
3400  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3401  return nullptr;
3402  }
3403  SUnit *SU;
3404  do {
3405  if (RegionPolicy.OnlyTopDown) {
3406  SU = Top.pickOnlyChoice();
3407  if (!SU) {
3408  CandPolicy NoPolicy;
3409  TopCand.reset(NoPolicy);
3410  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3411  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3412  tracePick(TopCand);
3413  SU = TopCand.SU;
3414  }
3415  IsTopNode = true;
3416  } else if (RegionPolicy.OnlyBottomUp) {
3417  SU = Bot.pickOnlyChoice();
3418  if (!SU) {
3419  CandPolicy NoPolicy;
3420  BotCand.reset(NoPolicy);
3421  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3422  assert(BotCand.Reason != NoCand && "failed to find a candidate");
3423  tracePick(BotCand);
3424  SU = BotCand.SU;
3425  }
3426  IsTopNode = false;
3427  } else {
3428  SU = pickNodeBidirectional(IsTopNode);
3429  }
3430  } while (SU->isScheduled);
3431 
3432  if (SU->isTopReady())
3433  Top.removeReady(SU);
3434  if (SU->isBottomReady())
3435  Bot.removeReady(SU);
3436 
3437  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3438  << *SU->getInstr());
3439  return SU;
3440 }
3441 
3443  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3444  if (!isTop)
3445  ++InsertPos;
3446  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3447 
3448  // Find already scheduled copies with a single physreg dependence and move
3449  // them just above the scheduled instruction.
3450  for (SDep &Dep : Deps) {
3451  if (Dep.getKind() != SDep::Data ||
3452  !Register::isPhysicalRegister(Dep.getReg()))
3453  continue;
3454  SUnit *DepSU = Dep.getSUnit();
3455  if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3456  continue;
3457  MachineInstr *Copy = DepSU->getInstr();
3458  if (!Copy->isCopy() && !Copy->isMoveImmediate())
3459  continue;
3460  LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3461  DAG->dumpNode(*Dep.getSUnit()));
3462  DAG->moveInstruction(Copy, InsertPos);
3463  }
3464 }
3465 
3466 /// Update the scheduler's state after scheduling a node. This is the same node
3467 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3468 /// update it's state based on the current cycle before MachineSchedStrategy
3469 /// does.
3470 ///
3471 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3472 /// them here. See comments in biasPhysReg.
3473 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3474  if (IsTopNode) {
3475  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3476  Top.bumpNode(SU);
3477  if (SU->hasPhysRegUses)
3478  reschedulePhysReg(SU, true);
3479  } else {
3480  SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3481  Bot.bumpNode(SU);
3482  if (SU->hasPhysRegDefs)
3483  reschedulePhysReg(SU, false);
3484  }
3485 }
3486 
3487 /// Create the standard converging machine scheduler. This will be used as the
3488 /// default scheduler if the target does not set a default.
3490  ScheduleDAGMILive *DAG =
3491  new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
3492  // Register DAG post-processors.
3493  //
3494  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3495  // data and pass it to later mutations. Have a single mutation that gathers
3496  // the interesting nodes in one pass.
3498  return DAG;
3499 }
3500 
3502  return createGenericSchedLive(C);
3503 }
3504 
3505 static MachineSchedRegistry
3506 GenericSchedRegistry("converge", "Standard converging scheduler.",
3508 
3509 //===----------------------------------------------------------------------===//
3510 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3511 //===----------------------------------------------------------------------===//
3512 
3514  DAG = Dag;
3515  SchedModel = DAG->getSchedModel();
3516  TRI = DAG->TRI;
3517 
3518  Rem.init(DAG, SchedModel);
3519  Top.init(DAG, SchedModel, &Rem);
3520  BotRoots.clear();
3521 
3522  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3523  // or are disabled, then these HazardRecs will be disabled.
3525  if (!Top.HazardRec) {
3526  Top.HazardRec =
3528  Itin, DAG);
3529  }
3530 }
3531 
3533  Rem.CriticalPath = DAG->ExitSU.getDepth();
3534 
3535  // Some roots may not feed into ExitSU. Check all of them in case.
3536  for (const SUnit *SU : BotRoots) {
3537  if (SU->getDepth() > Rem.CriticalPath)
3538  Rem.CriticalPath = SU->getDepth();
3539  }
3540  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3541  if (DumpCriticalPathLength) {
3542  errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3543  }
3544 }
3545 
3546 /// Apply a set of heuristics to a new candidate for PostRA scheduling.
3547 ///
3548 /// \param Cand provides the policy and current best candidate.
3549 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3550 /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3552  SchedCandidate &TryCand) {
3553  // Initialize the candidate if needed.
3554  if (!Cand.isValid()) {
3555  TryCand.Reason = NodeOrder;
3556  return true;
3557  }
3558 
3559  // Prioritize instructions that read unbuffered resources by stall cycles.
3560  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3561  Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3562  return TryCand.Reason != NoCand;
3563 
3564  // Keep clustered nodes together.
3565  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3566  Cand.SU == DAG->getNextClusterSucc(),
3567  TryCand, Cand, Cluster))
3568  return TryCand.Reason != NoCand;
3569 
3570  // Avoid critical resource consumption and balance the schedule.
3572  TryCand, Cand, ResourceReduce))
3573  return TryCand.Reason != NoCand;
3576  TryCand, Cand, ResourceDemand))
3577  return TryCand.Reason != NoCand;
3578 
3579  // Avoid serializing long latency dependence chains.
3580  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3581  return TryCand.Reason != NoCand;
3582  }
3583 
3584  // Fall through to original instruction order.
3585  if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
3586  TryCand.Reason = NodeOrder;
3587  return true;
3588  }
3589 
3590  return false;
3591 }
3592 
3594  ReadyQueue &Q = Top.Available;
3595  for (SUnit *SU : Q) {
3596  SchedCandidate TryCand(Cand.Policy);
3597  TryCand.SU = SU;
3598  TryCand.AtTop = true;
3599  TryCand.initResourceDelta(DAG, SchedModel);
3600  if (tryCandidate(Cand, TryCand)) {
3601  Cand.setBest(TryCand);
3602  LLVM_DEBUG(traceCandidate(Cand));
3603  }
3604  }
3605 }
3606 
3607 /// Pick the next node to schedule.
3609  if (DAG->top() == DAG->bottom()) {
3610  assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3611  return nullptr;
3612  }
3613  SUnit *SU;
3614  do {
3615  SU = Top.pickOnlyChoice();
3616  if (SU) {
3617  tracePick(Only1, true);
3618  } else {
3619  CandPolicy NoPolicy;
3620  SchedCandidate TopCand(NoPolicy);
3621  // Set the top-down policy based on the state of the current top zone and
3622  // the instructions outside the zone, including the bottom zone.
3623  setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3624  pickNodeFromQueue(TopCand);
3625  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3626  tracePick(TopCand);
3627  SU = TopCand.SU;
3628  }
3629  } while (SU->isScheduled);
3630 
3631  IsTopNode = true;
3632  Top.removeReady(SU);
3633 
3634  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3635  << *SU->getInstr());
3636  return SU;
3637 }
3638 
3639 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3640 /// scheduled/remaining flags in the DAG nodes.
3641 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3642  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3643  Top.bumpNode(SU);
3644 }
3645 
3647  return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
3648  /*RemoveKillFlags=*/true);
3649 }
3650 
3651 //===----------------------------------------------------------------------===//
3652 // ILP Scheduler. Currently for experimental analysis of heuristics.
3653 //===----------------------------------------------------------------------===//
3654 
3655 namespace {
3656 
3657 /// Order nodes by the ILP metric.
3658 struct ILPOrder {
3659  const SchedDFSResult *DFSResult = nullptr;
3660  const BitVector *ScheduledTrees = nullptr;
3661  bool MaximizeILP;
3662 
3663  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3664 
3665  /// Apply a less-than relation on node priority.
3666  ///
3667  /// (Return true if A comes after B in the Q.)
3668  bool operator()(const SUnit *A, const SUnit *B) const {
3669  unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3670  unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3671  if (SchedTreeA != SchedTreeB) {
3672  // Unscheduled trees have lower priority.
3673  if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3674  return ScheduledTrees->test(SchedTreeB);
3675 
3676  // Trees with shallower connections have have lower priority.
3677  if (DFSResult->getSubtreeLevel(SchedTreeA)
3678  != DFSResult->getSubtreeLevel(SchedTreeB)) {
3679  return DFSResult->getSubtreeLevel(SchedTreeA)
3680  < DFSResult->getSubtreeLevel(SchedTreeB);
3681  }
3682  }
3683  if (MaximizeILP)
3684  return DFSResult->getILP(A) < DFSResult->getILP(B);
3685  else
3686  return DFSResult->getILP(A) > DFSResult->getILP(B);
3687  }
3688 };
3689 
3690 /// Schedule based on the ILP metric.
3691 class ILPScheduler : public MachineSchedStrategy {
3692  ScheduleDAGMILive *DAG = nullptr;
3693  ILPOrder Cmp;
3694 
3695  std::vector<SUnit*> ReadyQ;
3696 
3697 public:
3698  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3699 
3700  void initialize(ScheduleDAGMI *dag) override {
3701  assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3702  DAG = static_cast<ScheduleDAGMILive*>(dag);
3703  DAG->computeDFSResult();
3704  Cmp.DFSResult = DAG->getDFSResult();
3705  Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3706  ReadyQ.clear();
3707  }
3708 
3709  void registerRoots() override {
3710  // Restore the heap in ReadyQ with the updated DFS results.
3711  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3712  }
3713 
3714  /// Implement MachineSchedStrategy interface.
3715  /// -----------------------------------------
3716 
3717  /// Callback to select the highest priority node from the ready Q.
3718  SUnit *pickNode(bool &IsTopNode) override {
3719  if (ReadyQ.empty()) return nullptr;
3720  std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3721  SUnit *SU = ReadyQ.back();
3722  ReadyQ.pop_back();
3723  IsTopNode = false;
3724  LLVM_DEBUG(dbgs() << "Pick node "
3725  << "SU(" << SU->NodeNum << ") "
3726  << " ILP: " << DAG->getDFSResult()->getILP(SU)
3727  << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3728  << " @"
3729  << DAG->getDFSResult()->getSubtreeLevel(
3730  DAG->getDFSResult()->getSubtreeID(SU))
3731  << '\n'
3732  << "Scheduling " << *SU->getInstr());
3733  return SU;
3734  }
3735 
3736  /// Scheduler callback to notify that a new subtree is scheduled.
3737  void scheduleTree(unsigned SubtreeID) override {
3738  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3739  }
3740 
3741  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3742  /// DFSResults, and resort the priority Q.
3743  void schedNode(SUnit *SU, bool IsTopNode) override {
3744  assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3745  }
3746 
3747  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3748 
3749  void releaseBottomNode(SUnit *SU) override {
3750  ReadyQ.push_back(SU);
3751  std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3752  }
3753 };
3754 
3755 } // end anonymous namespace
3756 
3758  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
3759 }
3761  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
3762 }
3763 
3765  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3767  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3768 
3769 //===----------------------------------------------------------------------===//
3770 // Machine Instruction Shuffler for Correctness Testing
3771 //===----------------------------------------------------------------------===//
3772 
3773 #ifndef NDEBUG
3774 namespace {
3775 
3776 /// Apply a less-than relation on the node order, which corresponds to the
3777 /// instruction order prior to scheduling. IsReverse implements greater-than.
3778 template<bool IsReverse>
3779 struct SUnitOrder {
3780  bool operator()(SUnit *A, SUnit *B) const {
3781  if (IsReverse)
3782  return A->NodeNum > B->NodeNum;
3783  else
3784  return A->NodeNum < B->NodeNum;
3785  }
3786 };
3787 
3788 /// Reorder instructions as much as possible.
3789 class InstructionShuffler : public MachineSchedStrategy {
3790  bool IsAlternating;
3791  bool IsTopDown;
3792 
3793  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3794  // gives nodes with a higher number higher priority causing the latest
3795  // instructions to be scheduled first.
3796  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3797  TopQ;
3798 
3799  // When scheduling bottom-up, use greater-than as the queue priority.
3801  BottomQ;
3802 
3803 public:
3804  InstructionShuffler(bool alternate, bool topdown)
3805  : IsAlternating(alternate), IsTopDown(topdown) {}
3806 
3807  void initialize(ScheduleDAGMI*) override {
3808  TopQ.clear();
3809  BottomQ.clear();
3810  }
3811 
3812  /// Implement MachineSchedStrategy interface.
3813  /// -----------------------------------------
3814 
3815  SUnit *pickNode(bool &IsTopNode) override {
3816  SUnit *SU;
3817  if (IsTopDown) {
3818  do {
3819  if (TopQ.empty()) return nullptr;
3820  SU = TopQ.top();
3821  TopQ.pop();
3822  } while (SU->isScheduled);
3823  IsTopNode = true;
3824  } else {
3825  do {
3826  if (BottomQ.empty()) return nullptr;
3827  SU = BottomQ.top();
3828  BottomQ.pop();
3829  } while (SU->isScheduled);
3830  IsTopNode = false;
3831  }
3832  if (IsAlternating)
3833  IsTopDown = !IsTopDown;
3834  return SU;
3835  }
3836 
3837  void schedNode(SUnit *SU, bool IsTopNode) override {}
3838 
3839  void releaseTopNode(SUnit *SU) override {
3840  TopQ.push(SU);
3841  }
3842  void releaseBottomNode(SUnit *SU) override {
3843  BottomQ.push(SU);
3844  }
3845 };
3846 
3847 } // end anonymous namespace
3848 
3850  bool Alternate = !ForceTopDown && !ForceBottomUp;
3851  bool TopDown = !ForceBottomUp;
3852  assert((TopDown || !ForceTopDown) &&
3853  "-misched-topdown incompatible with -misched-bottomup");
3854  return new ScheduleDAGMILive(
3855  C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
3856 }
3857 
3859  "shuffle", "Shuffle machine instructions alternating directions",
3861 #endif // !NDEBUG
3862 
3863 //===----------------------------------------------------------------------===//
3864 // GraphWriter support for ScheduleDAGMILive.
3865 //===----------------------------------------------------------------------===//
3866 
3867 #ifndef NDEBUG
3868 namespace llvm {
3869 
3870 template<> struct GraphTraits<
3872 
3873 template<>
3876 
3877  static std::string getGraphName(const ScheduleDAG *G) {
3878  return std::string(G->MF.getName());
3879  }
3880 
3881  static bool renderGraphFromBottomUp() {
3882  return true;
3883  }
3884 
3885  static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
3886  if (ViewMISchedCutoff == 0)
3887  return false;
3888  return (Node->Preds.size() > ViewMISchedCutoff
3889  || Node->Succs.size() > ViewMISchedCutoff);
3890  }
3891 
3892  /// If you want to override the dot attributes printed for a particular
3893  /// edge, override this method.
3894  static std::string getEdgeAttributes(const SUnit *Node,
3895  SUnitIterator EI,
3896  const ScheduleDAG *Graph) {
3897  if (EI.isArtificialDep())
3898  return "color=cyan,style=dashed";
3899  if (EI.isCtrlDep())
3900  return "color=blue,style=dashed";
3901  return "";
3902  }
3903 
3904  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3905  std::string Str;
3906  raw_string_ostream SS(Str);
3907  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3908  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3909  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3910  SS << "SU:" << SU->NodeNum;
3911  if (DFS)
3912  SS << " I:" << DFS->getNumInstrs(SU);
3913  return SS.str();
3914  }
3915 
3916  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3917  return G->getGraphNodeLabel(SU);
3918  }
3919 
3920  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3921  std::string Str("shape=Mrecord");
3922  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3923  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3924  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3925  if (DFS) {
3926  Str += ",style=filled,fillcolor=\"#";
3927  Str += DOT::getColorString(DFS->getSubtreeID(N));
3928  Str += '"';
3929  }
3930  return Str;
3931  }
3932 };
3933 
3934 } // end namespace llvm
3935 #endif // NDEBUG
3936 
3937 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3938 /// rendered using 'dot'.
3939 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3940 #ifndef NDEBUG
3941  ViewGraph(this, Name, false, Title);
3942 #else
3943  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3944  << "systems with Graphviz or gv!\n";
3945 #endif // NDEBUG
3946 }
3947 
3948 /// Out-of-line implementation with no arguments is handy for gdb.
3950  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3951 }
llvm::ScheduleDAGMILive::updateScheduledPressure
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
Definition: MachineScheduler.cpp:1087
llvm::ScheduleDAGInstrs::FirstDbgValue
MachineInstr * FirstDbgValue
Definition: ScheduleDAGInstrs.h:249
llvm::LiveRange::find
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
Definition: LiveInterval.cpp:350
ForceFastCluster
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))
i
i
Definition: README.txt:29
llvm::ScheduleDAGMI::updateQueues
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
Definition: MachineScheduler.cpp:895
llvm::PressureChange::getPSetOrMax
unsigned getPSetOrMax() const
Definition: RegisterPressure.h:121
llvm::SparseMultiSet::setUniverse
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
Definition: SparseMultiSet.h:202
llvm::PriorityQueue::clear
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:75
llvm::GenericSchedulerBase::SchedResourceDelta::DemandedResources
unsigned DemandedResources
Definition: MachineScheduler.h:845
llvm::TargetSchedModel::mustEndGroup
bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if current group must end.
Definition: TargetSchedule.cpp:96
llvm::GenericSchedulerBase::Weak
@ Weak
Definition: MachineScheduler.h:813
llvm::ScheduleDAG::MRI
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:561
llvm::GenericSchedulerBase::CandReason
CandReason
Represent the type of SchedCandidate found within a single queue.
Definition: MachineScheduler.h:812
DefaultSchedRegistry
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
isSchedBoundary
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
Definition: MachineScheduler.cpp:459
llvm::GenericSchedulerBase::TRI
const TargetRegisterInfo * TRI
Definition: MachineScheduler.h:909
llvm::SchedRemainder::reset
void reset()
Definition: MachineScheduler.h:594
llvm::MachineSchedContext::~MachineSchedContext
virtual ~MachineSchedContext()
Definition: MachineScheduler.cpp:158
llvm::TargetSchedModel::getWriteProcResBegin
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
Definition: TargetSchedule.h:133
llvm::HexagonInstrInfo::isSchedulingBoundary
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
Definition: HexagonInstrInfo.cpp:1706
llvm::SchedRemainder::RemIssueCount
unsigned RemIssueCount
Definition: MachineScheduler.h:585
MISchedCutoff
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
llvm::MCProcResourceDesc::BufferSize
int BufferSize
Definition: MCSchedule.h:48
llvm::ScheduleDAGMILive::~ScheduleDAGMILive
~ScheduleDAGMILive() override
Definition: MachineScheduler.cpp:944
llvm::RegisterOperands::detectDeadDefs
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Definition: RegisterPressure.cpp:581
ScheduleDAG.h
llvm::GenericSchedulerBase::SchedCandidate::setBest
void setBest(SchedCandidate &Best)
Definition: MachineScheduler.h:893
llvm::ScheduleDAGMI::viewGraph
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
Definition: MachineScheduler.cpp:3949
llvm::SchedDFSResult::scheduleTree
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
Definition: ScheduleDAGInstrs.cpp:1493
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
MachineInstr.h
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:506
llvm::ReadyQueue::dump
void dump() const
Definition: MachineScheduler.cpp:616
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::GenericSchedulerBase::NodeOrder
@ NodeOrder
Definition: MachineScheduler.h:815
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::SchedBoundary::HazardRec
ScheduleHazardRecognizer * HazardRec
Definition: MachineScheduler.h:624
llvm::ScheduleDAGMILive::scheduleMI
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
Definition: MachineScheduler.cpp:1406
llvm::RegPressureTracker::addLiveRegs
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
Definition: RegisterPressure.cpp:707
llvm::SUnit::NumPredsLeft
unsigned NumPredsLeft
Definition: ScheduleDAG.h:268
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
TargetFrameLowering.h
llvm::RegPressureTracker::init
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
Definition: RegisterPressure.cpp:263
llvm::TargetFrameLowering
Information about stack frame layout on the target.
Definition: TargetFrameLowering.h:43
llvm::ReadyQueue::isInQueue
bool isInQueue(SUnit *SU) const
Definition: MachineScheduler.h:544
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::SDep::Artificial
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
llvm::ReadyQueue::find
iterator find(SUnit *SU)
Definition: MachineScheduler.h:560
llvm::printVRegOrUnit
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
Definition: TargetRegisterInfo.cpp:164
llvm::DOTGraphTraits< ScheduleDAGMI * >::getGraphName
static std::string getGraphName(const ScheduleDAG *G)
Definition: MachineScheduler.cpp:3877
llvm::GenericSchedulerBase::Cluster
@ Cluster
Definition: MachineScheduler.h:813
llvm::Latency
@ Latency
Definition: SIMachineScheduler.h:34
createILPMinScheduler
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
Definition: MachineScheduler.cpp:3760
ILPMinRegistry
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
llvm::MachineSchedContext::RegClassInfo
RegisterClassInfo * RegClassInfo
Definition: MachineScheduler.h:128
MachinePassRegistry.h
llvm::SchedBoundary::getCriticalCount
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
Definition: MachineScheduler.h:732
llvm::MachineSchedStrategy::initialize
virtual void initialize(ScheduleDAGMI *DAG)=0
Initialize the strategy after building the DAG for a new region.
llvm::SchedBoundary::DAG
ScheduleDAGMI * DAG
Definition: MachineScheduler.h:617
llvm::ScheduleDAGInstrs::buildSchedGraph
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
Definition: ScheduleDAGInstrs.cpp:725
llvm::ScheduleDAGInstrs::begin
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
Definition: ScheduleDAGInstrs.h:277
llvm::SparseMultiSet::find
iterator find(const KeyT &Key)
Find an element by its key.
Definition: SparseMultiSet.h:375
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:343
llvm::RegisterOperands
List of registers defined and used by a machine instruction.
Definition: RegisterPressure.h:167
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:92
llvm::BitVector::clear
void clear()
clear - Removes all bits from the bitvector.
Definition: BitVector.h:327
llvm::GenericSchedulerBase::Stall
@ Stall
Definition: MachineScheduler.h:813
llvm::TargetSchedModel::getResourceFactor
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
Definition: TargetSchedule.h:143
ScheduleDAGInstrs.h
llvm::PressureDiff
List of PressureChanges in order of increasing, unique PSetID.
Definition: RegisterPressure.h:141
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:625
llvm::GenericSchedulerBase::SchedCandidate::reset
void reset(const CandPolicy &NewPolicy)
Definition: MachineScheduler.h:881
llvm::MachineInstr::mayLoad
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:1005
llvm::SchedRemainder::init
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Definition: MachineScheduler.cpp:2020
llvm::SchedBoundary::getNextResourceCycle
std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles)
Compute the next cycle at which the given processor resource can be scheduled.
Definition: MachineScheduler.cpp:2102
llvm::MachineSchedRegistry
MachineSchedRegistry provides a selection of available machine instruction schedulers.
Definition: MachineScheduler.h:136
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::SchedBoundary::Pending
ReadyQueue Pending
Definition: MachineScheduler.h:622
llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:119
llvm::GenericSchedulerBase::SchedModel
const TargetSchedModel * SchedModel
Definition: MachineScheduler.h:908
llvm::ForceTopDown
cl::opt< bool > ForceTopDown
llvm::ReadyQueue::begin
iterator begin()
Definition: MachineScheduler.h:554
llvm::LiveIntervals::getInstructionFromIndex
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
Definition: LiveIntervals.h:231
llvm::SparseMultiSet::clear
void clear()
Clears the set.
Definition: SparseMultiSet.h:342
ErrorHandling.h
llvm::tryGreater
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Definition: MachineScheduler.cpp:2839
llvm::ScheduleDAGMI::hasVRegLiveness
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
Definition: MachineScheduler.h:311
llvm::GenericSchedulerBase::SchedCandidate::RPDelta
RegPressureDelta RPDelta
Definition: MachineScheduler.h:873
llvm::SUnit::NumSuccsLeft
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:269
llvm::SDep::Anti
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
ReadyListLimit
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::PostGenericScheduler::initialize
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
Definition: MachineScheduler.cpp:3513
InvalidCycle
static const unsigned InvalidCycle
Definition: MachineScheduler.cpp:1968
llvm::GenericSchedulerBase::RegCritical
@ RegCritical
Definition: MachineScheduler.h:813
llvm::SUnit::isCall
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
llvm::MCProcResourceDesc::NumUnits
unsigned NumUnits
Definition: MCSchedule.h:33
RegisterClassInfo.h
llvm::GenericScheduler::tryCandidate
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
Definition: MachineScheduler.cpp:3183
MachineBasicBlock.h
llvm::ScheduleDAGMILive::SUPressureDiffs
PressureDiffs SUPressureDiffs
Definition: MachineScheduler.h:402
priorNonDebug
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
Definition: MachineScheduler.cpp:296
llvm::DOTGraphTraits< ScheduleDAGMI * >::getNodeAttributes
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
Definition: MachineScheduler.cpp:3920
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::ScheduleDAGMILive::initRegPressure
void initRegPressure()
Definition: MachineScheduler.cpp:1012
llvm::ScheduleDAGMI::releasePredecessors
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
Definition: MachineScheduler.cpp:702
llvm::VNInfo::def
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
llvm::MemOp
Definition: TargetLowering.h:112
llvm::ScheduleDAGInstrs::addEdge
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
Definition: ScheduleDAGInstrs.cpp:1200
llvm::ScheduleDAGMI::postprocessDAG
void postprocessDAG()
Apply each ScheduleDAGMutation step in order.
Definition: MachineScheduler.cpp:840
getSchedRegions
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
Definition: MachineScheduler.cpp:487
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition: RegisterPressure.h:358
tracePick
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
Definition: MachineScheduler.cpp:2889
DenseMap.h
llvm::GenericSchedulerBase::setPolicy
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
Definition: MachineScheduler.cpp:2687
llvm::GenericScheduler::registerRoots
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Definition: MachineScheduler.cpp:3028
llvm::BitVector::resize
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:333
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:333
llvm::PressureDiffs::clear
void clear()
Definition: RegisterPressure.h:207
TargetInstrInfo.h
llvm::GenericScheduler::initialize
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
Definition: MachineScheduler.cpp:2898
Groups
static const X86InstrFMA3Group Groups[]
Definition: X86InstrFMA3Info.cpp:73
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
llvm::RegisterMaskPair
Definition: RegisterPressure.h:39
llvm::ScheduleDAGMI::moveInstruction
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
Definition: MachineScheduler.cpp:733
llvm::ScheduleDAGInstrs::startBlock
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
Definition: ScheduleDAGInstrs.cpp:178
llvm::initializePostMachineSchedulerPass
void initializePostMachineSchedulerPass(PassRegistry &)
llvm::MachineSchedRegistry::Registry
static MachinePassRegistry< ScheduleDAGCtor > Registry
Definition: MachineScheduler.h:145
llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
llvm::GenericSchedulerBase::SchedCandidate::Reason
CandReason Reason
Definition: MachineScheduler.h:867
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1291
llvm::SchedBoundary::countResource
unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles, unsigned ReadyCycle)
Add the given processor resource to this scheduled zone.
Definition: MachineScheduler.cpp:2344
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::MachineRegisterInfo::getNumVirtRegs
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
Definition: MachineRegisterInfo.h:757
llvm::ScheduleDAGMILive::getPressureDiff
PressureDiff & getPressureDiff(const SUnit *SU)
Definition: MachineScheduler.h:457
EnableMachineSched
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
llvm::SUnit::getDepth
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:893
llvm::ScheduleDAGMILive::computeDFSResult
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
Definition: MachineScheduler.cpp:1299
llvm::MCWriteProcResEntry
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63
llvm::ViewGraph
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:375
llvm::ScheduleDAGMI::CurrentBottom
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
Definition: MachineScheduler.h:279
llvm::GenericSchedulerBase::ResourceDemand
@ ResourceDemand
Definition: MachineScheduler.h:814
llvm::ScheduleDAGMI::checkSchedLimit
bool checkSchedLimit()
Definition: MachineScheduler.cpp:751
STLExtras.h
llvm::RegPressureTracker::closeRegion
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
Definition: RegisterPressure.cpp:343
llvm::GenericScheduler::initCandidate
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
Definition: MachineScheduler.cpp:3135
llvm::ScheduleDAGMILive::TopRPTracker
RegPressureTracker TopRPTracker
Definition: MachineScheduler.h:417
llvm::MachineSchedStrategy::releaseTopNode
virtual void releaseTopNode(SUnit *SU)=0
When all predecessor dependencies have been resolved, free this node for top-down scheduling.
llvm::ScheduleDAGInstrs::SchedModel
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
Definition: ScheduleDAGInstrs.h:125
nextIfDebug
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
Definition: MachineScheduler.cpp:317
llvm::LiveQueryResult
Result of a LiveRange query.
Definition: LiveInterval.h:90
llvm::ScheduleDAGMI::releaseSucc
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
Definition: MachineScheduler.cpp:637
llvm::TargetSchedModel::getInstrItineraries
const InstrItineraryData * getInstrItineraries() const
Definition: TargetSchedule.h:82
llvm::MachineSchedStrategy::schedNode
virtual void schedNode(SUnit *SU, bool IsTopNode)=0
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
llvm::DOTGraphTraits< ScheduleDAGMI * >::getEdgeAttributes
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method.
Definition: MachineScheduler.cpp:3894
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::SUnit::isTopReady
bool isTopReady() const
Definition: ScheduleDAG.h:446
llvm::SchedDFSResult::getSubtreeID
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:169
llvm::ScheduleDAGMI::enterRegion
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
Definition: MachineScheduler.cpp:721
llvm::ScheduleDAGInstrs::end
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
Definition: ScheduleDAGInstrs.h:280
llvm::SchedBoundary::Rem
SchedRemainder * Rem
Definition: MachineScheduler.h:619
llvm::ScheduleDAGMI::releaseSuccessors
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
Definition: MachineScheduler.cpp:665
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
RegisterPressure.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: MachineScheduler.cpp:75
llvm::LiveIntervals::getInstructionIndex
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
Definition: LiveIntervals.h:226
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:198
llvm::ScheduleDAGMILive::initQueues
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
Definition: MachineScheduler.cpp:1396
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
llvm::SchedDFSResult::clear
void clear()
Clear the results.
Definition: ScheduleDFS.h:128
llvm::TargetSchedModel::getProcResource
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
Definition: TargetSchedule.h:117
MachineRegisterInfo.h
llvm::ReadyQueue::remove
iterator remove(iterator I)
Definition: MachineScheduler.h:567
llvm::ScheduleDAGMI::Mutations
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Definition: MachineScheduler.h:273
llvm::LiveIntervals::handleMove
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
Definition: LiveIntervals.cpp:1512
llvm::LiveRange::beginIndex
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:377
llvm::ScheduleDAGMILive::RegionCriticalPSets
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
Definition: MachineScheduler.h:413
AliasAnalysis.h
llvm::SDep::isArtificial
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
MachineValueType.h
llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:33
llvm::GenericScheduler::dumpPolicy
void dumpPolicy() const override
Definition: MachineScheduler.cpp:2980
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::SchedRemainder::CriticalPath
unsigned CriticalPath
Definition: MachineScheduler.h:581
llvm::tgtok::Dag
@ Dag
Definition: TGLexer.h:50
llvm::ScheduleDAGMILive::dump
void dump() const override
Definition: MachineScheduler.cpp:1183
CommandLine.h
llvm::SUnitIterator::isCtrlDep
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:657
llvm::RegPressureTracker::setPos
void setPos(MachineBasicBlock::const_iterator Pos)
Definition: RegisterPressure.h:420
TargetLowering.h
llvm::GenericScheduler::reschedulePhysReg
void reschedulePhysReg(SUnit *SU, bool isTop)
Definition: MachineScheduler.cpp:3442
MinSubtreeSize
static const unsigned MinSubtreeSize
Definition: MachineScheduler.cpp:143
llvm::SchedBoundary::isTop
bool isTop() const
Definition: MachineScheduler.h:702
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
bb
< i1 > br i1 label label bb bb
Definition: README.txt:978
llvm::MachineSchedStrategy
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
Definition: MachineScheduler.h:201
llvm::SUnit::BotReadyCycle
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:300
llvm::SDep::Weak
@ Weak
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:73
ScheduleHazardRecognizer.h
llvm::DOT::getColorString
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:86
checkResourceLimit
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
Definition: MachineScheduler.cpp:1976
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
llvm::cl::apply
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1318
llvm::TargetSchedModel::getIssueWidth
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
Definition: TargetSchedule.h:98
MachineLoopInfo.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::PPCISD::SC
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Definition: PPCISelLowering.h:410
llvm::SUnit::isBottomReady
bool isBottomReady() const
Definition: ScheduleDAG.h:449
llvm::SUnit::isUnbuffered
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288
llvm::LiveRange::begin
iterator begin()
Definition: LiveInterval.h:215
llvm::SchedBoundary::~SchedBoundary
~SchedBoundary()
Definition: MachineScheduler.cpp:1970
llvm::RegPressureDelta::CurrentMax
PressureChange CurrentMax
Definition: RegisterPressure.h:242
EnableRegPressure
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
llvm::SchedBoundary::getExecutedCount
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
Definition: MachineScheduler.h:741
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::RegPressureTracker::getUpwardPressureDelta
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
Definition: RegisterPressure.cpp:1163
llvm::DstOp::getReg
Register getReg() const
Definition: MachineIRBuilder.h:99
llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::ScheduleDAGMI::NextClusterPred
const SUnit * NextClusterPred
Record the next node in a scheduled cluster.
Definition: MachineScheduler.h:282
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::tryLatency
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
Definition: MachineScheduler.cpp:2855
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3195
llvm::GenericSchedulerBase::Only1
@ Only1
Definition: MachineScheduler.h:813
llvm::SchedRemainder::IsAcyclicLatencyLimited
bool IsAcyclicLatencyLimited
Definition: MachineScheduler.h:587
llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
llvm::MachineBasicBlock::isSuccessor
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
Definition: MachineBasicBlock.cpp:913
llvm::MCSchedClassDesc
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:109
llvm::MachineSchedStrategy::releaseBottomNode
virtual void releaseBottomNode(SUnit *SU)=0
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::LaneBitmask::getNone
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:83
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ScheduleDAGMI::findRootsAndBiasEdges
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
Definition: MachineScheduler.cpp:846
SchedOnlyBlock
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
llvm::createCopyConstrainDAGMutation
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Definition: MachineScheduler.cpp:1794
llvm::SchedDFSResult::getSubtreeLevel
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:180
llvm::GenericSchedulerBase::RegMax
@ RegMax
Definition: MachineScheduler.h:814
llvm::ScheduleDAGInstrs::getSUnit
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
Definition: ScheduleDAGInstrs.h:390
llvm::RegPressureTracker::getLiveThru
ArrayRef< unsigned > getLiveThru() const
Definition: RegisterPressure.h:455
false
Definition: StackSlotColoring.cpp:142
llvm::SchedBoundary::reset
void reset()
Definition: MachineScheduler.cpp:1985
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::ScheduleDAGMILive::updatePressureDiffs
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
Definition: MachineScheduler.cpp:1115
llvm::createGenericSchedPostRA
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
Definition: MachineScheduler.cpp:3646
llvm::ScheduleDAGMILive::getScheduledTrees
BitVector & getScheduledTrees()
Definition: MachineScheduler.h:471
llvm::GenericSchedulerBase::CandPolicy
Policy for scheduling the next instruction in the candidate's zone.
Definition: MachineScheduler.h:822
llvm::Instruction
Definition: Instruction.h:45
ShufflerRegistry
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
llvm::ScheduleDAGMI::addMutation
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
Definition: MachineScheduler.h:318
llvm::SchedBoundary::getCurrCycle
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
Definition: MachineScheduler.h:707
createILPMaxScheduler
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
Definition: MachineScheduler.cpp:3757
llvm::RegisterPressure::LiveOutRegs
SmallVector< RegisterMaskPair, 8 > LiveOutRegs
Definition: RegisterPressure.h:54
llvm::RegPressureTracker::initLiveThru
void initLiveThru(const RegPressureTracker &RPTracker)
Initialize the LiveThru pressure set based on the untied defs found in RPTracker.
Definition: RegisterPressure.cpp:359
llvm::SchedRemainder::CyclicCritPath
unsigned CyclicCritPath
Definition: MachineScheduler.h:582
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::PrintLaneMask
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:94
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:606
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:402
BitVector.h
llvm::SlotIndexes
SlotIndexes pass.
Definition: SlotIndexes.h:314
llvm::ScheduleDAGMI::startBlock
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
Definition: MachineScheduler.cpp:707
llvm::ArrayRef::rbegin
reverse_iterator rbegin() const
Definition: ArrayRef.h:156
llvm::SDep::Data
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
llvm::RegPressureDelta::Excess
PressureChange Excess
Definition: RegisterPressure.h:240
llvm::ScheduleDAGInstrs::RegionEnd
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
Definition: ScheduleDAGInstrs.h:151
llvm::TargetSchedModel::getWriteProcResEnd
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
Definition: TargetSchedule.h:137
llvm::BitVector
Definition: BitVector.h:74
llvm::SchedDFSResult::getNumSubtrees
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:163
ViewMISchedDAGs
static cl::opt< bool > ViewMISchedDAGs("view-misched-dags", cl::Hidden, cl::desc("Pop up a window to show MISched dags after they are processed"))
llvm::PostGenericScheduler::tryCandidate
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
Definition: MachineScheduler.cpp:3551
llvm::LiveInterval
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:680
llvm::LiveQueryResult::valueIn
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
llvm::ReadyQueue::push
void push(SUnit *SU)
Definition: MachineScheduler.h:562
llvm::SlotIndex
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:83
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_END(MachineScheduler
llvm::VReg2SUnit
An individual mapping from virtual register number to SUnit.
Definition: ScheduleDAGInstrs.h:52
llvm::RegisterPassParser
RegisterPassParser class - Handle the addition of new machine passes.
Definition: MachinePassRegistry.h:135
llvm::SchedBoundary::dumpScheduledState
void dumpScheduledState() const
Definition: MachineScheduler.cpp:2595
llvm::ScheduleDAGMI::initQueues
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
Definition: MachineScheduler.cpp:865
llvm::ArrayRef::rend
reverse_iterator rend() const
Definition: ArrayRef.h:157
llvm::SchedBoundary::releasePending
void releasePending()
Release pending ready nodes in to the available queue.
Definition: MachineScheduler.cpp:2523
llvm::RegPressureTracker::dump
void dump() const
Definition: RegisterPressure.cpp:117
llvm::GenericSchedulerBase::SchedCandidate::SU
SUnit * SU
Definition: MachineScheduler.h:864
llvm::ScheduleDAGMILive::RPTracker
RegPressureTracker RPTracker
Definition: MachineScheduler.h:408
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::GenericScheduler::initPolicy
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
Definition: MachineScheduler.cpp:2932
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::RegisterOperands::collect
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Definition: RegisterPressure.cpp:570
llvm::RegisterClassInfo::getNumAllocatableRegs
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
Definition: RegisterClassInfo.h:92
llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
llvm::DOTGraphTraits
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
Definition: DOTGraphTraits.h:161
llvm::ScheduleDAGMI::NumInstrsScheduled
unsigned NumInstrsScheduled
The number of instructions scheduled so far.
Definition: MachineScheduler.h:288
llvm::SUnit::getHeight
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
llvm::SUnit::TopReadyCycle
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:299
llvm::ScheduleDAGMILive::RegClassInfo
RegisterClassInfo * RegClassInfo
Definition: MachineScheduler.h:387
Passes.h
llvm::initializeMachineSchedulerPass
void initializeMachineSchedulerPass(PassRegistry &)
llvm::SchedBoundary::getZoneCritResIdx
unsigned getZoneCritResIdx() const
Definition: MachineScheduler.h:746
llvm::GenericSchedulerBase::Rem
SchedRemainder Rem
Definition: MachineScheduler.h:911
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::SparseMultiSet::end
iterator end()
Returns an iterator past this container.
Definition: SparseMultiSet.h:319
llvm::ReadyQueue::end
iterator end()
Definition: MachineScheduler.h:556
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
llvm::tryLess
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
Definition: MachineScheduler.cpp:2823
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:634
llvm::RegPressureTracker::getRegSetPressureAtPos
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
Definition: RegisterPressure.h:464
llvm::RegPressureTracker::recedeSkipDebugValues
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
Definition: RegisterPressure.cpp:853
llvm::cl::opt< bool >
llvm::GenericSchedulerBase::SchedCandidate
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
Definition: MachineScheduler.h:860
llvm::GenericSchedulerBase::NoCand
@ NoCand
Definition: MachineScheduler.h:813
llvm::SparseMultiSet::insert
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
Definition: SparseMultiSet.h:419
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:197
llvm::ScheduleDAGMI::LIS
LiveIntervals * LIS
Definition: MachineScheduler.h:269
llvm::SUnit::WeakSuccsLeft
unsigned WeakSuccsLeft
Definition: ScheduleDAG.h:271
LiveDebugValues::DbgValue
Class recording the (high level) value of a variable.
Definition: InstrRefBasedImpl.h:225
llvm::LiveRange::getVNInfoBefore
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx,...
Definition: LiveInterval.h:421
TargetSchedule.h
llvm::RegPressureTracker::closeBottom
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
Definition: RegisterPressure.cpp:331
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::TargetSubtargetInfo::enablePostRAMachineScheduler
virtual bool enablePostRAMachineScheduler() const
True if the subtarget should run a machine scheduler after register allocation.
Definition: TargetSubtargetInfo.cpp:56
LiveIntervals.h
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:903
llvm::GenericSchedulerBase::CandPolicy::DemandResIdx
unsigned DemandResIdx
Definition: MachineScheduler.h:825
llvm::createGenericSchedLive
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
Definition: MachineScheduler.cpp:3489
llvm::LiveIntervals::getMBBEndIdx
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
Definition: LiveIntervals.h:241
llvm::SchedBoundary::SchedModel
const TargetSchedModel * SchedModel
Definition: MachineScheduler.h:618
llvm::RegPressureTracker::reset
void reset()
Definition: RegisterPressure.cpp:243
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::MachineSchedStrategy::registerRoots
virtual void registerRoots()
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Definition: MachineScheduler.h:239
llvm::MachineSchedStrategy::scheduleTree
virtual void scheduleTree(unsigned SubtreeID)
Scheduler callback to notify that a new subtree is scheduled.
Definition: MachineScheduler.h:247
llvm::SchedBoundary::init
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
Definition: MachineScheduler.cpp:2040
llvm::GenericSchedulerBase::Context
const MachineSchedContext * Context
Definition: MachineScheduler.h:907
llvm::tryPressure
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
Definition: MachineScheduler.cpp:3048
llvm::SchedDFSResult::resize
void resize(unsigned NumSUnits)
Initialize the result data with the size of the DAG.
Definition: ScheduleDFS.h:136
llvm::ScheduleDAGInstrs::IsReachable
bool IsReachable(SUnit *SU, SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
Definition: ScheduleDAGInstrs.h:272
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::PostGenericScheduler::registerRoots
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Definition: MachineScheduler.cpp:3532
llvm::DenseMap
Definition: DenseMap.h:714
llvm::codeview::FrameCookieKind::Copy
@ Copy
llvm::operator<
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:338
llvm::PressureChange::isValid
bool isValid() const
Definition: RegisterPressure.h:113
llvm::SchedBoundary::getLatencyStallCycles
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
Definition: MachineScheduler.cpp:2074
ViewMISchedCutoff
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...
llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition: MachineScheduler.h:120
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::RegPressureTracker::closeTop
void closeTop()
Set the boundary for the top of the region and summarize live ins.
Definition: RegisterPressure.cpp:319
PriorityQueue.h
llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
llvm::SDep::getReg
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
llvm::SparseMultiSet< VReg2SUnit, VirtReg2IndexFunctor >::iterator
iterator_base< SparseMultiSet * > iterator
Definition: SparseMultiSet.h:311
llvm::RegPressureTracker::advance
void advance()
Advance across the current instruction.
Definition: RegisterPressure.cpp:948
llvm::SUnit::isScheduled
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::SchedBoundary::getCurrMOps
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
Definition: MachineScheduler.h:710
llvm::PressureChange::getUnitInc
int getUnitInc() const
Definition: RegisterPressure.h:125
llvm::SchedBoundary::incExecutedResources
void incExecutedResources(unsigned PIdx, unsigned Count)
Definition: MachineScheduler.cpp:2331
llvm::SlotIndex::isSameInstr
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:197
llvm::DOTGraphTraits< ScheduleDAGMI * >::DOTGraphTraits
DOTGraphTraits(bool isSimple=false)
Definition: MachineScheduler.cpp:3875
ArrayRef.h
llvm::LiveRange::isLocal
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries,...
Definition: LiveInterval.h:510
llvm::LiveRange::Query
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:533
TargetPassConfig.h
llvm::ScheduleDAGMI::getLIS
LiveIntervals * getLIS() const
Definition: MachineScheduler.h:308
llvm::SchedRemainder
Summarize the unscheduled region.
Definition: MachineScheduler.h:579
MachineFunctionPass.h
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::SUnit::hasPhysRegUses
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:279
llvm::RegPressureTracker::getPressure
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
Definition: RegisterPressure.h:459
llvm::ScheduleDAGMILive::DFSResult
SchedDFSResult * DFSResult
Information about DAG subtrees.
Definition: MachineScheduler.h:391
llvm::TargetRegisterInfo::getRegPressureSetName
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
llvm::RegPressureDelta::CriticalMax
PressureChange CriticalMax
Definition: RegisterPressure.h:241
llvm::SUnit::biasCriticalPath
void biasCriticalPath()
Orders this node's predecessor edges such that the critical path edge occurs first.
Definition: ScheduleDAG.cpp:325
llvm::RegisterPressure::MaxSetPressure
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
Definition: RegisterPressure.h:50
ILPMaxRegistry
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
llvm::RegPressureTracker::getMaxDownwardPressureDelta
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
Definition: RegisterPressure.cpp:1344
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::RegisterPressure::LiveInRegs
SmallVector< RegisterMaskPair, 8 > LiveInRegs
List of live in virtual registers or physical register units.
Definition: RegisterPressure.h:53
llvm::ScheduleDAGMILive::ScheduledTrees
BitVector ScheduledTrees
Definition: MachineScheduler.h:392
llvm::ScheduleDAGMI::dumpSchedule
void dumpSchedule() const
dump the scheduled Sequence.
Definition: MachineScheduler.cpp:929
llvm::RegisterClassInfo
Definition: RegisterClassInfo.h:30
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::ScheduleDAGInstrs::canAddEdge
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
Definition: ScheduleDAGInstrs.cpp:1196
llvm::ScheduleDAGInstrs::TrackLaneMasks
bool TrackLaneMasks
Whether lane masks should get tracked.
Definition: ScheduleDAGInstrs.h:139
llvm::ScheduleDAGMI::SchedImpl
std::unique_ptr< MachineSchedStrategy > SchedImpl
Definition: MachineScheduler.h:270
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::GenericSchedulerBase::SchedCandidate::Policy
CandPolicy Policy
Definition: MachineScheduler.h:861
llvm::GenericSchedulerBase::SchedResourceDelta::CritResources
unsigned CritResources
Definition: MachineScheduler.h:842
iterator_range.h
llvm::TargetFrameLowering::getStackGrowthDirection
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
Definition: TargetFrameLowering.h:89
llvm::ScheduleDAG::dumpNodeAll
void dumpNodeAll(const SUnit &SU) const
Definition: ScheduleDAG.cpp:363