LLVM  16.0.0git
MachineScheduler.cpp
Go to the documentation of this file.
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // MachineScheduler schedules machine instructions after phi elimination. It
10 // preserves LiveIntervals so it can be invoked before register allocation.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/PriorityQueue.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
50 #include "llvm/Config/llvm-config.h"
51 #include "llvm/InitializePasses.h"
52 #include "llvm/MC/LaneBitmask.h"
53 #include "llvm/Pass.h"
55 #include "llvm/Support/Compiler.h"
56 #include "llvm/Support/Debug.h"
61 #include <algorithm>
62 #include <cassert>
63 #include <cstdint>
64 #include <iterator>
65 #include <limits>
66 #include <memory>
67 #include <string>
68 #include <tuple>
69 #include <utility>
70 #include <vector>
71 
72 using namespace llvm;
73 
74 #define DEBUG_TYPE "machine-scheduler"
75 
76 STATISTIC(NumClustered, "Number of load/store pairs clustered");
77 
78 namespace llvm {
79 
80 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
81  cl::desc("Force top-down list scheduling"));
82 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
83  cl::desc("Force bottom-up list scheduling"));
85 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
86  cl::desc("Print critical path length to stdout"));
87 
89  "verify-misched", cl::Hidden,
90  cl::desc("Verify machine instrs before and after machine scheduling"));
91 
92 #ifndef NDEBUG
94  "view-misched-dags", cl::Hidden,
95  cl::desc("Pop up a window to show MISched dags after they are processed"));
96 cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
97  cl::desc("Print schedule DAGs"));
98 #else
99 const bool ViewMISchedDAGs = false;
100 const bool PrintDAGs = false;
101 #endif // NDEBUG
102 
103 } // end namespace llvm
104 
105 #ifndef NDEBUG
106 /// In some situations a few uninteresting nodes depend on nearly all other
107 /// nodes in the graph, provide a cutoff to hide them.
108 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
109  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
110 
111 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
112  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
113 
114 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
115  cl::desc("Only schedule this function"));
116 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
117  cl::desc("Only schedule this MBB#"));
118 #endif // NDEBUG
119 
120 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
121 /// size of the ready lists.
122 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
123  cl::desc("Limit ready list to N instructions"), cl::init(256));
124 
125 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
126  cl::desc("Enable register pressure scheduling."), cl::init(true));
127 
128 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
129  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
130 
131 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
132  cl::desc("Enable memop clustering."),
133  cl::init(true));
134 static cl::opt<bool>
135  ForceFastCluster("force-fast-cluster", cl::Hidden,
136  cl::desc("Switch to fast cluster algorithm with the lost "
137  "of some fusion opportunities"),
138  cl::init(false));
139 static cl::opt<unsigned>
140  FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
141  cl::desc("The threshold for fast cluster"),
142  cl::init(1000));
143 
144 // DAG subtrees must have at least this many nodes.
145 static const unsigned MinSubtreeSize = 8;
146 
147 // Pin the vtables to this file.
148 void MachineSchedStrategy::anchor() {}
149 
150 void ScheduleDAGMutation::anchor() {}
151 
152 //===----------------------------------------------------------------------===//
153 // Machine Instruction Scheduling Pass and Registry
154 //===----------------------------------------------------------------------===//
155 
158 }
159 
161  delete RegClassInfo;
162 }
163 
164 namespace {
165 
166 /// Base class for a machine scheduler class that can run at any point.
167 class MachineSchedulerBase : public MachineSchedContext,
168  public MachineFunctionPass {
169 public:
170  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
171 
172  void print(raw_ostream &O, const Module* = nullptr) const override;
173 
174 protected:
175  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
176 };
177 
178 /// MachineScheduler runs after coalescing and before register allocation.
179 class MachineScheduler : public MachineSchedulerBase {
180 public:
181  MachineScheduler();
182 
183  void getAnalysisUsage(AnalysisUsage &AU) const override;
184 
185  bool runOnMachineFunction(MachineFunction&) override;
186 
187  static char ID; // Class identification, replacement for typeinfo
188 
189 protected:
190  ScheduleDAGInstrs *createMachineScheduler();
191 };
192 
193 /// PostMachineScheduler runs after shortly before code emission.
194 class PostMachineScheduler : public MachineSchedulerBase {
195 public:
196  PostMachineScheduler();
197 
198  void getAnalysisUsage(AnalysisUsage &AU) const override;
199 
200  bool runOnMachineFunction(MachineFunction&) override;
201 
202  static char ID; // Class identification, replacement for typeinfo
203 
204 protected:
205  ScheduleDAGInstrs *createPostMachineScheduler();
206 };
207 
208 } // end anonymous namespace
209 
210 char MachineScheduler::ID = 0;
211 
213 
214 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
215  "Machine Instruction Scheduler", false, false)
221 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
223 
224 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
226 }
227 
228 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
229  AU.setPreservesCFG();
234  AU.addRequired<SlotIndexes>();
239 }
240 
241 char PostMachineScheduler::ID = 0;
242 
244 
245 INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
246  "PostRA Machine Instruction Scheduler", false, false)
250 INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
252 
253 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
255 }
256 
257 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
258  AU.setPreservesCFG();
264 }
265 
268 
269 /// A dummy default scheduler factory indicates whether the scheduler
270 /// is overridden on the command line.
272  return nullptr;
273 }
274 
275 /// MachineSchedOpt allows command line selection of the scheduler.
278 MachineSchedOpt("misched",
280  cl::desc("Machine instruction scheduler to use"));
281 
283 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
285 
287  "enable-misched",
288  cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
289  cl::Hidden);
290 
292  "enable-post-misched",
293  cl::desc("Enable the post-ra machine instruction scheduling pass."),
294  cl::init(true), cl::Hidden);
295 
296 /// Decrement this iterator until reaching the top or a non-debug instr.
300  assert(I != Beg && "reached the top of the region, cannot decrement");
301  while (--I != Beg) {
302  if (!I->isDebugOrPseudoInstr())
303  break;
304  }
305  return I;
306 }
307 
308 /// Non-const version.
314 }
315 
316 /// If this iterator is a debug value, increment until reaching the End or a
317 /// non-debug instruction.
321  for(; I != End; ++I) {
322  if (!I->isDebugOrPseudoInstr())
323  break;
324  }
325  return I;
326 }
327 
328 /// Non-const version.
334 }
335 
336 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
337 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
338  // Select the scheduler, or set the default.
340  if (Ctor != useDefaultMachineSched)
341  return Ctor(this);
342 
343  // Get the default scheduler set by the target for this function.
344  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
345  if (Scheduler)
346  return Scheduler;
347 
348  // Default to GenericScheduler.
349  return createGenericSchedLive(this);
350 }
351 
352 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
353 /// the caller. We don't have a command line option to override the postRA
354 /// scheduler. The Target must configure it.
355 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
356  // Get the postRA scheduler set by the target for this function.
357  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
358  if (Scheduler)
359  return Scheduler;
360 
361  // Default to GenericScheduler.
362  return createGenericSchedPostRA(this);
363 }
364 
365 /// Top-level MachineScheduler pass driver.
366 ///
367 /// Visit blocks in function order. Divide each block into scheduling regions
368 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
369 /// consistent with the DAG builder, which traverses the interior of the
370 /// scheduling regions bottom-up.
371 ///
372 /// This design avoids exposing scheduling boundaries to the DAG builder,
373 /// simplifying the DAG builder's support for "special" target instructions.
374 /// At the same time the design allows target schedulers to operate across
375 /// scheduling boundaries, for example to bundle the boundary instructions
376 /// without reordering them. This creates complexity, because the target
377 /// scheduler must update the RegionBegin and RegionEnd positions cached by
378 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
379 /// design would be to split blocks at scheduling boundaries, but LLVM has a
380 /// general bias against block splitting purely for implementation simplicity.
381 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
382  if (skipFunction(mf.getFunction()))
383  return false;
384 
386  if (!EnableMachineSched)
387  return false;
388  } else if (!mf.getSubtarget().enableMachineScheduler())
389  return false;
390 
391  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
392 
393  // Initialize the context of the pass.
394  MF = &mf;
395  MLI = &getAnalysis<MachineLoopInfo>();
396  MDT = &getAnalysis<MachineDominatorTree>();
397  PassConfig = &getAnalysis<TargetPassConfig>();
398  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
399 
400  LIS = &getAnalysis<LiveIntervals>();
401 
402  if (VerifyScheduling) {
403  LLVM_DEBUG(LIS->dump());
404  MF->verify(this, "Before machine scheduling.");
405  }
406  RegClassInfo->runOnMachineFunction(*MF);
407 
408  // Instantiate the selected scheduler for this target, function, and
409  // optimization level.
410  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
411  scheduleRegions(*Scheduler, false);
412 
413  LLVM_DEBUG(LIS->dump());
414  if (VerifyScheduling)
415  MF->verify(this, "After machine scheduling.");
416  return true;
417 }
418 
419 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
420  if (skipFunction(mf.getFunction()))
421  return false;
422 
425  return false;
426  } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
427  LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
428  return false;
429  }
430  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
431 
432  // Initialize the context of the pass.
433  MF = &mf;
434  MLI = &getAnalysis<MachineLoopInfo>();
435  PassConfig = &getAnalysis<TargetPassConfig>();
436  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
437 
438  if (VerifyScheduling)
439  MF->verify(this, "Before post machine scheduling.");
440 
441  // Instantiate the selected scheduler for this target, function, and
442  // optimization level.
443  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
444  scheduleRegions(*Scheduler, true);
445 
446  if (VerifyScheduling)
447  MF->verify(this, "After post machine scheduling.");
448  return true;
449 }
450 
451 /// Return true of the given instruction should not be included in a scheduling
452 /// region.
453 ///
454 /// MachineScheduler does not currently support scheduling across calls. To
455 /// handle calls, the DAG builder needs to be modified to create register
456 /// anti/output dependencies on the registers clobbered by the call's regmask
457 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
458 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
459 /// the boundary, but there would be no benefit to postRA scheduling across
460 /// calls this late anyway.
463  MachineFunction *MF,
464  const TargetInstrInfo *TII) {
465  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
466 }
467 
468 /// A region of an MBB for scheduling.
469 namespace {
470 struct SchedRegion {
471  /// RegionBegin is the first instruction in the scheduling region, and
472  /// RegionEnd is either MBB->end() or the scheduling boundary after the
473  /// last instruction in the scheduling region. These iterators cannot refer
474  /// to instructions outside of the identified scheduling region because
475  /// those may be reordered before scheduling this region.
476  MachineBasicBlock::iterator RegionBegin;
477  MachineBasicBlock::iterator RegionEnd;
478  unsigned NumRegionInstrs;
479 
481  unsigned N) :
482  RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
483 };
484 } // end anonymous namespace
485 
487 
488 static void
490  MBBRegionsVector &Regions,
491  bool RegionsTopDown) {
492  MachineFunction *MF = MBB->getParent();
493  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
494 
495  MachineBasicBlock::iterator I = nullptr;
496  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
497  RegionEnd != MBB->begin(); RegionEnd = I) {
498 
499  // Avoid decrementing RegionEnd for blocks with no terminator.
500  if (RegionEnd != MBB->end() ||
501  isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
502  --RegionEnd;
503  }
504 
505  // The next region starts above the previous region. Look backward in the
506  // instruction stream until we find the nearest boundary.
507  unsigned NumRegionInstrs = 0;
508  I = RegionEnd;
509  for (;I != MBB->begin(); --I) {
510  MachineInstr &MI = *std::prev(I);
511  if (isSchedBoundary(&MI, &*MBB, MF, TII))
512  break;
513  if (!MI.isDebugOrPseudoInstr()) {
514  // MBB::size() uses instr_iterator to count. Here we need a bundle to
515  // count as a single instruction.
516  ++NumRegionInstrs;
517  }
518  }
519 
520  // It's possible we found a scheduling region that only has debug
521  // instructions. Don't bother scheduling these.
522  if (NumRegionInstrs != 0)
523  Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
524  }
525 
526  if (RegionsTopDown)
527  std::reverse(Regions.begin(), Regions.end());
528 }
529 
530 /// Main driver for both MachineScheduler and PostMachineScheduler.
531 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
532  bool FixKillFlags) {
533  // Visit all machine basic blocks.
534  //
535  // TODO: Visit blocks in global postorder or postorder within the bottom-up
536  // loop tree. Then we can optionally compute global RegPressure.
537  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
538  MBB != MBBEnd; ++MBB) {
539 
540  Scheduler.startBlock(&*MBB);
541 
542 #ifndef NDEBUG
543  if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
544  continue;
545  if (SchedOnlyBlock.getNumOccurrences()
546  && (int)SchedOnlyBlock != MBB->getNumber())
547  continue;
548 #endif
549 
550  // Break the block into scheduling regions [I, RegionEnd). RegionEnd
551  // points to the scheduling boundary at the bottom of the region. The DAG
552  // does not include RegionEnd, but the region does (i.e. the next
553  // RegionEnd is above the previous RegionBegin). If the current block has
554  // no terminator then RegionEnd == MBB->end() for the bottom region.
555  //
556  // All the regions of MBB are first found and stored in MBBRegions, which
557  // will be processed (MBB) top-down if initialized with true.
558  //
559  // The Scheduler may insert instructions during either schedule() or
560  // exitRegion(), even for empty regions. So the local iterators 'I' and
561  // 'RegionEnd' are invalid across these calls. Instructions must not be
562  // added to other regions than the current one without updating MBBRegions.
563 
564  MBBRegionsVector MBBRegions;
565  getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
566  for (const SchedRegion &R : MBBRegions) {
567  MachineBasicBlock::iterator I = R.RegionBegin;
568  MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
569  unsigned NumRegionInstrs = R.NumRegionInstrs;
570 
571  // Notify the scheduler of the region, even if we may skip scheduling
572  // it. Perhaps it still needs to be bundled.
573  Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
574 
575  // Skip empty scheduling regions (0 or 1 schedulable instructions).
576  if (I == RegionEnd || I == std::prev(RegionEnd)) {
577  // Close the current region. Bundle the terminator if needed.
578  // This invalidates 'RegionEnd' and 'I'.
579  Scheduler.exitRegion();
580  continue;
581  }
582  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
583  LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
584  << " " << MBB->getName() << "\n From: " << *I
585  << " To: ";
586  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
587  else dbgs() << "End\n";
588  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
590  errs() << MF->getName();
591  errs() << ":%bb. " << MBB->getNumber();
592  errs() << " " << MBB->getName() << " \n";
593  }
594 
595  // Schedule a region: possibly reorder instructions.
596  // This invalidates the original region iterators.
597  Scheduler.schedule();
598 
599  // Close the current region.
600  Scheduler.exitRegion();
601  }
602  Scheduler.finishBlock();
603  // FIXME: Ideally, no further passes should rely on kill flags. However,
604  // thumb2 size reduction is currently an exception, so the PostMIScheduler
605  // needs to do this.
606  if (FixKillFlags)
607  Scheduler.fixupKills(*MBB);
608  }
609  Scheduler.finalizeSchedule();
610 }
611 
612 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
613  // unimplemented
614 }
615 
616 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
618  dbgs() << "Queue " << Name << ": ";
619  for (const SUnit *SU : Queue)
620  dbgs() << SU->NodeNum << " ";
621  dbgs() << "\n";
622 }
623 #endif
624 
625 //===----------------------------------------------------------------------===//
626 // ScheduleDAGMI - Basic machine instruction scheduling. This is
627 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
628 // virtual registers.
629 // ===----------------------------------------------------------------------===/
630 
631 // Provide a vtable anchor.
633 
634 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
635 /// NumPredsLeft reaches zero, release the successor node.
636 ///
637 /// FIXME: Adjust SuccSU height based on MinLatency.
638 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
639  SUnit *SuccSU = SuccEdge->getSUnit();
640 
641  if (SuccEdge->isWeak()) {
642  --SuccSU->WeakPredsLeft;
643  if (SuccEdge->isCluster())
644  NextClusterSucc = SuccSU;
645  return;
646  }
647 #ifndef NDEBUG
648  if (SuccSU->NumPredsLeft == 0) {
649  dbgs() << "*** Scheduling failed! ***\n";
650  dumpNode(*SuccSU);
651  dbgs() << " has been released too many times!\n";
652  llvm_unreachable(nullptr);
653  }
654 #endif
655  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
656  // CurrCycle may have advanced since then.
657  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
658  SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
659 
660  --SuccSU->NumPredsLeft;
661  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
662  SchedImpl->releaseTopNode(SuccSU);
663 }
664 
665 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
667  for (SDep &Succ : SU->Succs)
668  releaseSucc(SU, &Succ);
669 }
670 
671 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
672 /// NumSuccsLeft reaches zero, release the predecessor node.
673 ///
674 /// FIXME: Adjust PredSU height based on MinLatency.
675 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
676  SUnit *PredSU = PredEdge->getSUnit();
677 
678  if (PredEdge->isWeak()) {
679  --PredSU->WeakSuccsLeft;
680  if (PredEdge->isCluster())
681  NextClusterPred = PredSU;
682  return;
683  }
684 #ifndef NDEBUG
685  if (PredSU->NumSuccsLeft == 0) {
686  dbgs() << "*** Scheduling failed! ***\n";
687  dumpNode(*PredSU);
688  dbgs() << " has been released too many times!\n";
689  llvm_unreachable(nullptr);
690  }
691 #endif
692  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
693  // CurrCycle may have advanced since then.
694  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
695  PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
696 
697  --PredSU->NumSuccsLeft;
698  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
699  SchedImpl->releaseBottomNode(PredSU);
700 }
701 
702 /// releasePredecessors - Call releasePred on each of SU's predecessors.
704  for (SDep &Pred : SU->Preds)
705  releasePred(SU, &Pred);
706 }
707 
710  SchedImpl->enterMBB(bb);
711 }
712 
714  SchedImpl->leaveMBB();
716 }
717 
718 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
719 /// crossing a scheduling boundary. [begin, end) includes all instructions in
720 /// the region, including the boundary itself and single-instruction regions
721 /// that don't get scheduled.
725  unsigned regioninstrs)
726 {
727  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
728 
729  SchedImpl->initPolicy(begin, end, regioninstrs);
730 }
731 
732 /// This is normally called from the main scheduler loop but may also be invoked
733 /// by the scheduling strategy to perform additional code motion.
736  // Advance RegionBegin if the first instruction moves down.
737  if (&*RegionBegin == MI)
738  ++RegionBegin;
739 
740  // Update the instruction stream.
741  BB->splice(InsertPos, BB, MI);
742 
743  // Update LiveIntervals
744  if (LIS)
745  LIS->handleMove(*MI, /*UpdateFlags=*/true);
746 
747  // Recede RegionBegin if an instruction moves above the first.
748  if (RegionBegin == InsertPos)
749  RegionBegin = MI;
750 }
751 
753 #if LLVM_ENABLE_ABI_BREAKING_CHECKS && !defined(NDEBUG)
754  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
756  return false;
757  }
758  ++NumInstrsScheduled;
759 #endif
760  return true;
761 }
762 
763 /// Per-region scheduling driver, called back from
764 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
765 /// does not consider liveness or register pressure. It is useful for PostRA
766 /// scheduling and potentially other custom schedulers.
768  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
769  LLVM_DEBUG(SchedImpl->dumpPolicy());
770 
771  // Build the DAG.
773 
774  postprocessDAG();
775 
776  SmallVector<SUnit*, 8> TopRoots, BotRoots;
777  findRootsAndBiasEdges(TopRoots, BotRoots);
778 
779  LLVM_DEBUG(dump());
780  if (PrintDAGs) dump();
781  if (ViewMISchedDAGs) viewGraph();
782 
783  // Initialize the strategy before modifying the DAG.
784  // This may initialize a DFSResult to be used for queue priority.
785  SchedImpl->initialize(this);
786 
787  // Initialize ready queues now that the DAG and priority data are finalized.
788  initQueues(TopRoots, BotRoots);
789 
790  bool IsTopNode = false;
791  while (true) {
792  LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
793  SUnit *SU = SchedImpl->pickNode(IsTopNode);
794  if (!SU) break;
795 
796  assert(!SU->isScheduled && "Node already scheduled");
797  if (!checkSchedLimit())
798  break;
799 
800  MachineInstr *MI = SU->getInstr();
801  if (IsTopNode) {
802  assert(SU->isTopReady() && "node still has unscheduled dependencies");
803  if (&*CurrentTop == MI)
805  else
807  } else {
808  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
811  if (&*priorII == MI)
812  CurrentBottom = priorII;
813  else {
814  if (&*CurrentTop == MI)
815  CurrentTop = nextIfDebug(++CurrentTop, priorII);
817  CurrentBottom = MI;
818  }
819  }
820  // Notify the scheduling strategy before updating the DAG.
821  // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
822  // runs, it can then use the accurate ReadyCycle time to determine whether
823  // newly released nodes can move to the readyQ.
824  SchedImpl->schedNode(SU, IsTopNode);
825 
826  updateQueues(SU, IsTopNode);
827  }
828  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
829 
831 
832  LLVM_DEBUG({
833  dbgs() << "*** Final schedule for "
834  << printMBBReference(*begin()->getParent()) << " ***\n";
835  dumpSchedule();
836  dbgs() << '\n';
837  });
838 }
839 
840 /// Apply each ScheduleDAGMutation step in order.
842  for (auto &m : Mutations)
843  m->apply(this);
844 }
845 
846 void ScheduleDAGMI::
848  SmallVectorImpl<SUnit*> &BotRoots) {
849  for (SUnit &SU : SUnits) {
850  assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
851 
852  // Order predecessors so DFSResult follows the critical path.
853  SU.biasCriticalPath();
854 
855  // A SUnit is ready to top schedule if it has no predecessors.
856  if (!SU.NumPredsLeft)
857  TopRoots.push_back(&SU);
858  // A SUnit is ready to bottom schedule if it has no successors.
859  if (!SU.NumSuccsLeft)
860  BotRoots.push_back(&SU);
861  }
863 }
864 
865 /// Identify DAG roots and setup scheduler queues.
867  ArrayRef<SUnit*> BotRoots) {
868  NextClusterSucc = nullptr;
869  NextClusterPred = nullptr;
870 
871  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
872  //
873  // Nodes with unreleased weak edges can still be roots.
874  // Release top roots in forward order.
875  for (SUnit *SU : TopRoots)
876  SchedImpl->releaseTopNode(SU);
877 
878  // Release bottom roots in reverse order so the higher priority nodes appear
879  // first. This is more natural and slightly more efficient.
881  I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
882  SchedImpl->releaseBottomNode(*I);
883  }
884 
887 
888  SchedImpl->registerRoots();
889 
890  // Advance past initial DebugValues.
893 }
894 
895 /// Update scheduler queues after scheduling an instruction.
896 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
897  // Release dependent instructions for scheduling.
898  if (IsTopNode)
899  releaseSuccessors(SU);
900  else
902 
903  SU->isScheduled = true;
904 }
905 
906 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
908  // If first instruction was a DBG_VALUE then put it back.
909  if (FirstDbgValue) {
912  }
913 
914  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
915  DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
916  std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
917  MachineInstr *DbgValue = P.first;
918  MachineBasicBlock::iterator OrigPrevMI = P.second;
919  if (&*RegionBegin == DbgValue)
920  ++RegionBegin;
921  BB->splice(std::next(OrigPrevMI), BB, DbgValue);
922  if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
924  }
925 }
926 
927 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
929  for (MachineInstr &MI : *this) {
930  if (SUnit *SU = getSUnit(&MI))
931  dumpNode(*SU);
932  else
933  dbgs() << "Missing SUnit\n";
934  }
935 }
936 #endif
937 
938 //===----------------------------------------------------------------------===//
939 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
940 // preservation.
941 //===----------------------------------------------------------------------===//
942 
944  delete DFSResult;
945 }
946 
948  const MachineInstr &MI = *SU.getInstr();
949  for (const MachineOperand &MO : MI.operands()) {
950  if (!MO.isReg())
951  continue;
952  if (!MO.readsReg())
953  continue;
954  if (TrackLaneMasks && !MO.isUse())
955  continue;
956 
957  Register Reg = MO.getReg();
959  continue;
960 
961  // Ignore re-defs.
962  if (TrackLaneMasks) {
963  bool FoundDef = false;
964  for (const MachineOperand &MO2 : MI.operands()) {
965  if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
966  FoundDef = true;
967  break;
968  }
969  }
970  if (FoundDef)
971  continue;
972  }
973 
974  // Record this local VReg use.
976  for (; UI != VRegUses.end(); ++UI) {
977  if (UI->SU == &SU)
978  break;
979  }
980  if (UI == VRegUses.end())
982  }
983 }
984 
985 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
986 /// crossing a scheduling boundary. [begin, end) includes all instructions in
987 /// the region, including the boundary itself and single-instruction regions
988 /// that don't get scheduled.
992  unsigned regioninstrs)
993 {
994  // ScheduleDAGMI initializes SchedImpl's per-region policy.
995  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
996 
997  // For convenience remember the end of the liveness region.
998  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
999 
1001 
1002  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1003  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1004 
1006  "ShouldTrackLaneMasks requires ShouldTrackPressure");
1007 }
1008 
1009 // Setup the register pressure trackers for the top scheduled and bottom
1010 // scheduled regions.
1012  VRegUses.clear();
1014  for (SUnit &SU : SUnits)
1015  collectVRegUses(SU);
1016 
1018  ShouldTrackLaneMasks, false);
1020  ShouldTrackLaneMasks, false);
1021 
1022  // Close the RPTracker to finalize live ins.
1024 
1026 
1027  // Initialize the live ins and live outs.
1030 
1031  // Close one end of the tracker so we can call
1032  // getMaxUpward/DownwardPressureDelta before advancing across any
1033  // instructions. This converts currently live regs into live ins/outs.
1036 
1038  if (!BotRPTracker.getLiveThru().empty()) {
1040  LLVM_DEBUG(dbgs() << "Live Thru: ";
1042  };
1043 
1044  // For each live out vreg reduce the pressure change associated with other
1045  // uses of the same vreg below the live-out reaching def.
1047 
1048  // Account for liveness generated by the region boundary.
1049  if (LiveRegionEnd != RegionEnd) {
1051  BotRPTracker.recede(&LiveUses);
1052  updatePressureDiffs(LiveUses);
1053  }
1054 
1055  LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1057  dbgs() << "Bottom Pressure:\n";
1059 
1061  (RegionEnd->isDebugInstr() &&
1063  "Can't find the region bottom");
1064 
1065  // Cache the list of excess pressure sets in this region. This will also track
1066  // the max pressure in the scheduled code for these sets.
1067  RegionCriticalPSets.clear();
1068  const std::vector<unsigned> &RegionPressure =
1070  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1071  unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1072  if (RegionPressure[i] > Limit) {
1073  LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1074  << " Actual " << RegionPressure[i] << "\n");
1075  RegionCriticalPSets.push_back(PressureChange(i));
1076  }
1077  }
1078  LLVM_DEBUG(dbgs() << "Excess PSets: ";
1079  for (const PressureChange &RCPS
1081  << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1082  dbgs() << "\n");
1083 }
1084 
1087  const std::vector<unsigned> &NewMaxPressure) {
1088  const PressureDiff &PDiff = getPressureDiff(SU);
1089  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1090  for (const PressureChange &PC : PDiff) {
1091  if (!PC.isValid())
1092  break;
1093  unsigned ID = PC.getPSet();
1094  while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1095  ++CritIdx;
1096  if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1097  if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1098  && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1099  RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1100  }
1101  unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1102  if (NewMaxPressure[ID] >= Limit - 2) {
1103  LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1104  << NewMaxPressure[ID]
1105  << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1106  << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1107  << " livethru)\n");
1108  }
1109  }
1110 }
1111 
1112 /// Update the PressureDiff array for liveness after scheduling this
1113 /// instruction.
1115  ArrayRef<RegisterMaskPair> LiveUses) {
1116  for (const RegisterMaskPair &P : LiveUses) {
1117  Register Reg = P.RegUnit;
1118  /// FIXME: Currently assuming single-use physregs.
1120  continue;
1121 
1122  if (ShouldTrackLaneMasks) {
1123  // If the register has just become live then other uses won't change
1124  // this fact anymore => decrement pressure.
1125  // If the register has just become dead then other uses make it come
1126  // back to life => increment pressure.
1127  bool Decrement = P.LaneMask.any();
1128 
1129  for (const VReg2SUnit &V2SU
1131  SUnit &SU = *V2SU.SU;
1132  if (SU.isScheduled || &SU == &ExitSU)
1133  continue;
1134 
1135  PressureDiff &PDiff = getPressureDiff(&SU);
1136  PDiff.addPressureChange(Reg, Decrement, &MRI);
1137  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1138  << printReg(Reg, TRI) << ':'
1139  << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1140  dbgs() << " to "; PDiff.dump(*TRI););
1141  }
1142  } else {
1143  assert(P.LaneMask.any());
1144  LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1145  // This may be called before CurrentBottom has been initialized. However,
1146  // BotRPTracker must have a valid position. We want the value live into the
1147  // instruction or live out of the block, so ask for the previous
1148  // instruction's live-out.
1149  const LiveInterval &LI = LIS->getInterval(Reg);
1150  VNInfo *VNI;
1153  if (I == BB->end())
1154  VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1155  else {
1157  VNI = LRQ.valueIn();
1158  }
1159  // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1160  assert(VNI && "No live value at use.");
1161  for (const VReg2SUnit &V2SU
1163  SUnit *SU = V2SU.SU;
1164  // If this use comes before the reaching def, it cannot be a last use,
1165  // so decrease its pressure change.
1166  if (!SU->isScheduled && SU != &ExitSU) {
1167  LiveQueryResult LRQ =
1168  LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1169  if (LRQ.valueIn() == VNI) {
1170  PressureDiff &PDiff = getPressureDiff(SU);
1171  PDiff.addPressureChange(Reg, true, &MRI);
1172  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1173  << *SU->getInstr();
1174  dbgs() << " to "; PDiff.dump(*TRI););
1175  }
1176  }
1177  }
1178  }
1179  }
1180 }
1181 
1183 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1184  if (EntrySU.getInstr() != nullptr)
1186  for (const SUnit &SU : SUnits) {
1187  dumpNodeAll(SU);
1188  if (ShouldTrackPressure) {
1189  dbgs() << " Pressure Diff : ";
1190  getPressureDiff(&SU).dump(*TRI);
1191  }
1192  dbgs() << " Single Issue : ";
1193  if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1194  SchedModel.mustEndGroup(SU.getInstr()))
1195  dbgs() << "true;";
1196  else
1197  dbgs() << "false;";
1198  dbgs() << '\n';
1199  }
1200  if (ExitSU.getInstr() != nullptr)
1202 #endif
1203 }
1204 
1205 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1206 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1207 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1208 ///
1209 /// This is a skeletal driver, with all the functionality pushed into helpers,
1210 /// so that it can be easily extended by experimental schedulers. Generally,
1211 /// implementing MachineSchedStrategy should be sufficient to implement a new
1212 /// scheduling algorithm. However, if a scheduler further subclasses
1213 /// ScheduleDAGMILive then it will want to override this virtual method in order
1214 /// to update any specialized state.
1216  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1217  LLVM_DEBUG(SchedImpl->dumpPolicy());
1219 
1220  postprocessDAG();
1221 
1222  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1223  findRootsAndBiasEdges(TopRoots, BotRoots);
1224 
1225  // Initialize the strategy before modifying the DAG.
1226  // This may initialize a DFSResult to be used for queue priority.
1227  SchedImpl->initialize(this);
1228 
1229  LLVM_DEBUG(dump());
1230  if (PrintDAGs) dump();
1231  if (ViewMISchedDAGs) viewGraph();
1232 
1233  // Initialize ready queues now that the DAG and priority data are finalized.
1234  initQueues(TopRoots, BotRoots);
1235 
1236  bool IsTopNode = false;
1237  while (true) {
1238  LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1239  SUnit *SU = SchedImpl->pickNode(IsTopNode);
1240  if (!SU) break;
1241 
1242  assert(!SU->isScheduled && "Node already scheduled");
1243  if (!checkSchedLimit())
1244  break;
1245 
1246  scheduleMI(SU, IsTopNode);
1247 
1248  if (DFSResult) {
1249  unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1250  if (!ScheduledTrees.test(SubtreeID)) {
1251  ScheduledTrees.set(SubtreeID);
1252  DFSResult->scheduleTree(SubtreeID);
1253  SchedImpl->scheduleTree(SubtreeID);
1254  }
1255  }
1256 
1257  // Notify the scheduling strategy after updating the DAG.
1258  SchedImpl->schedNode(SU, IsTopNode);
1259 
1260  updateQueues(SU, IsTopNode);
1261  }
1262  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1263 
1264  placeDebugValues();
1265 
1266  LLVM_DEBUG({
1267  dbgs() << "*** Final schedule for "
1268  << printMBBReference(*begin()->getParent()) << " ***\n";
1269  dumpSchedule();
1270  dbgs() << '\n';
1271  });
1272 }
1273 
1274 /// Build the DAG and setup three register pressure trackers.
1276  if (!ShouldTrackPressure) {
1277  RPTracker.reset();
1278  RegionCriticalPSets.clear();
1280  return;
1281  }
1282 
1283  // Initialize the register pressure tracker used by buildSchedGraph.
1285  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1286 
1287  // Account for liveness generate by the region boundary.
1288  if (LiveRegionEnd != RegionEnd)
1289  RPTracker.recede();
1290 
1291  // Build the DAG, and compute current register pressure.
1293 
1294  // Initialize top/bottom trackers after computing region pressure.
1295  initRegPressure();
1296 }
1297 
1299  if (!DFSResult)
1300  DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1301  DFSResult->clear();
1303  DFSResult->resize(SUnits.size());
1306 }
1307 
1308 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1309 /// only provides the critical path for single block loops. To handle loops that
1310 /// span blocks, we could use the vreg path latencies provided by
1311 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1312 /// available for use in the scheduler.
1313 ///
1314 /// The cyclic path estimation identifies a def-use pair that crosses the back
1315 /// edge and considers the depth and height of the nodes. For example, consider
1316 /// the following instruction sequence where each instruction has unit latency
1317 /// and defines an eponymous virtual register:
1318 ///
1319 /// a->b(a,c)->c(b)->d(c)->exit
1320 ///
1321 /// The cyclic critical path is a two cycles: b->c->b
1322 /// The acyclic critical path is four cycles: a->b->c->d->exit
1323 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1324 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1325 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1326 /// LiveInDepth = depth(b) = len(a->b) = 1
1327 ///
1328 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1329 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1330 /// CyclicCriticalPath = min(2, 2) = 2
1331 ///
1332 /// This could be relevant to PostRA scheduling, but is currently implemented
1333 /// assuming LiveIntervals.
1335  // This only applies to single block loop.
1336  if (!BB->isSuccessor(BB))
1337  return 0;
1338 
1339  unsigned MaxCyclicLatency = 0;
1340  // Visit each live out vreg def to find def/use pairs that cross iterations.
1342  Register Reg = P.RegUnit;
1344  continue;
1345  const LiveInterval &LI = LIS->getInterval(Reg);
1346  const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1347  if (!DefVNI)
1348  continue;
1349 
1351  const SUnit *DefSU = getSUnit(DefMI);
1352  if (!DefSU)
1353  continue;
1354 
1355  unsigned LiveOutHeight = DefSU->getHeight();
1356  unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1357  // Visit all local users of the vreg def.
1358  for (const VReg2SUnit &V2SU
1360  SUnit *SU = V2SU.SU;
1361  if (SU == &ExitSU)
1362  continue;
1363 
1364  // Only consider uses of the phi.
1366  if (!LRQ.valueIn()->isPHIDef())
1367  continue;
1368 
1369  // Assume that a path spanning two iterations is a cycle, which could
1370  // overestimate in strange cases. This allows cyclic latency to be
1371  // estimated as the minimum slack of the vreg's depth or height.
1372  unsigned CyclicLatency = 0;
1373  if (LiveOutDepth > SU->getDepth())
1374  CyclicLatency = LiveOutDepth - SU->getDepth();
1375 
1376  unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1377  if (LiveInHeight > LiveOutHeight) {
1378  if (LiveInHeight - LiveOutHeight < CyclicLatency)
1379  CyclicLatency = LiveInHeight - LiveOutHeight;
1380  } else
1381  CyclicLatency = 0;
1382 
1383  LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1384  << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1385  if (CyclicLatency > MaxCyclicLatency)
1386  MaxCyclicLatency = CyclicLatency;
1387  }
1388  }
1389  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1390  return MaxCyclicLatency;
1391 }
1392 
1393 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1394 /// the Top RP tracker in case the region beginning has changed.
1396  ArrayRef<SUnit*> BotRoots) {
1397  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1398  if (ShouldTrackPressure) {
1399  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1401  }
1402 }
1403 
1404 /// Move an instruction and update register pressure.
1405 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1406  // Move the instruction to its new location in the instruction stream.
1407  MachineInstr *MI = SU->getInstr();
1408 
1409  if (IsTopNode) {
1410  assert(SU->isTopReady() && "node still has unscheduled dependencies");
1411  if (&*CurrentTop == MI)
1413  else {
1416  }
1417 
1418  if (ShouldTrackPressure) {
1419  // Update top scheduled pressure.
1420  RegisterOperands RegOpers;
1421  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1422  if (ShouldTrackLaneMasks) {
1423  // Adjust liveness and add missing dead+read-undef flags.
1424  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1425  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1426  } else {
1427  // Adjust for missing dead-def flags.
1428  RegOpers.detectDeadDefs(*MI, *LIS);
1429  }
1430 
1431  TopRPTracker.advance(RegOpers);
1432  assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1433  LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1435 
1437  }
1438  } else {
1439  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1440  MachineBasicBlock::iterator priorII =
1442  if (&*priorII == MI)
1443  CurrentBottom = priorII;
1444  else {
1445  if (&*CurrentTop == MI) {
1446  CurrentTop = nextIfDebug(++CurrentTop, priorII);
1448  }
1450  CurrentBottom = MI;
1452  }
1453  if (ShouldTrackPressure) {
1454  RegisterOperands RegOpers;
1455  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1456  if (ShouldTrackLaneMasks) {
1457  // Adjust liveness and add missing dead+read-undef flags.
1458  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1459  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1460  } else {
1461  // Adjust for missing dead-def flags.
1462  RegOpers.detectDeadDefs(*MI, *LIS);
1463  }
1464 
1468  BotRPTracker.recede(RegOpers, &LiveUses);
1469  assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1470  LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1472 
1474  updatePressureDiffs(LiveUses);
1475  }
1476  }
1477 }
1478 
1479 //===----------------------------------------------------------------------===//
1480 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1481 //===----------------------------------------------------------------------===//
1482 
1483 namespace {
1484 
1485 /// Post-process the DAG to create cluster edges between neighboring
1486 /// loads or between neighboring stores.
1487 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1488  struct MemOpInfo {
1489  SUnit *SU;
1491  int64_t Offset;
1492  unsigned Width;
1493 
1494  MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1495  int64_t Offset, unsigned Width)
1496  : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
1497  Width(Width) {}
1498 
1499  static bool Compare(const MachineOperand *const &A,
1500  const MachineOperand *const &B) {
1501  if (A->getType() != B->getType())
1502  return A->getType() < B->getType();
1503  if (A->isReg())
1504  return A->getReg() < B->getReg();
1505  if (A->isFI()) {
1506  const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1507  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1508  bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1510  return StackGrowsDown ? A->getIndex() > B->getIndex()
1511  : A->getIndex() < B->getIndex();
1512  }
1513 
1514  llvm_unreachable("MemOpClusterMutation only supports register or frame "
1515  "index bases.");
1516  }
1517 
1518  bool operator<(const MemOpInfo &RHS) const {
1519  // FIXME: Don't compare everything twice. Maybe use C++20 three way
1520  // comparison instead when it's available.
1521  if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
1522  RHS.BaseOps.begin(), RHS.BaseOps.end(),
1523  Compare))
1524  return true;
1525  if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
1526  BaseOps.begin(), BaseOps.end(), Compare))
1527  return false;
1528  if (Offset != RHS.Offset)
1529  return Offset < RHS.Offset;
1530  return SU->NodeNum < RHS.SU->NodeNum;
1531  }
1532  };
1533 
1534  const TargetInstrInfo *TII;
1535  const TargetRegisterInfo *TRI;
1536  bool IsLoad;
1537 
1538 public:
1539  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1540  const TargetRegisterInfo *tri, bool IsLoad)
1541  : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1542 
1543  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1544 
1545 protected:
1546  void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
1547  ScheduleDAGInstrs *DAG);
1548  void collectMemOpRecords(std::vector<SUnit> &SUnits,
1549  SmallVectorImpl<MemOpInfo> &MemOpRecords);
1550  bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1552 };
1553 
1554 class StoreClusterMutation : public BaseMemOpClusterMutation {
1555 public:
1556  StoreClusterMutation(const TargetInstrInfo *tii,
1557  const TargetRegisterInfo *tri)
1558  : BaseMemOpClusterMutation(tii, tri, false) {}
1559 };
1560 
1561 class LoadClusterMutation : public BaseMemOpClusterMutation {
1562 public:
1563  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1564  : BaseMemOpClusterMutation(tii, tri, true) {}
1565 };
1566 
1567 } // end anonymous namespace
1568 
1569 namespace llvm {
1570 
1571 std::unique_ptr<ScheduleDAGMutation>
1573  const TargetRegisterInfo *TRI) {
1574  return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
1575  : nullptr;
1576 }
1577 
1578 std::unique_ptr<ScheduleDAGMutation>
1580  const TargetRegisterInfo *TRI) {
1581  return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
1582  : nullptr;
1583 }
1584 
1585 } // end namespace llvm
1586 
1587 // Sorting all the loads/stores first, then for each load/store, checking the
1588 // following load/store one by one, until reach the first non-dependent one and
1589 // call target hook to see if they can cluster.
1590 // If FastCluster is enabled, we assume that, all the loads/stores have been
1591 // preprocessed and now, they didn't have dependencies on each other.
1592 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1593  ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
1594  ScheduleDAGInstrs *DAG) {
1595  // Keep track of the current cluster length and bytes for each SUnit.
1597 
1598  // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
1599  // cluster mem ops collected within `MemOpRecords` array.
1600  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1601  // Decision to cluster mem ops is taken based on target dependent logic
1602  auto MemOpa = MemOpRecords[Idx];
1603 
1604  // Seek for the next load/store to do the cluster.
1605  unsigned NextIdx = Idx + 1;
1606  for (; NextIdx < End; ++NextIdx)
1607  // Skip if MemOpb has been clustered already or has dependency with
1608  // MemOpa.
1609  if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
1610  (FastCluster ||
1611  (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
1612  !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
1613  break;
1614  if (NextIdx == End)
1615  continue;
1616 
1617  auto MemOpb = MemOpRecords[NextIdx];
1618  unsigned ClusterLength = 2;
1619  unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width;
1620  if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
1621  ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
1622  CurrentClusterBytes =
1623  SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
1624  }
1625 
1626  if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
1627  CurrentClusterBytes))
1628  continue;
1629 
1630  SUnit *SUa = MemOpa.SU;
1631  SUnit *SUb = MemOpb.SU;
1632  if (SUa->NodeNum > SUb->NodeNum)
1633  std::swap(SUa, SUb);
1634 
1635  // FIXME: Is this check really required?
1636  if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
1637  continue;
1638 
1639  LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1640  << SUb->NodeNum << ")\n");
1641  ++NumClustered;
1642 
1643  if (IsLoad) {
1644  // Copy successor edges from SUa to SUb. Interleaving computation
1645  // dependent on SUa can prevent load combining due to register reuse.
1646  // Predecessor edges do not need to be copied from SUb to SUa since
1647  // nearby loads should have effectively the same inputs.
1648  for (const SDep &Succ : SUa->Succs) {
1649  if (Succ.getSUnit() == SUb)
1650  continue;
1651  LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1652  << ")\n");
1653  DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1654  }
1655  } else {
1656  // Copy predecessor edges from SUb to SUa to avoid the SUnits that
1657  // SUb dependent on scheduled in-between SUb and SUa. Successor edges
1658  // do not need to be copied from SUa to SUb since no one will depend
1659  // on stores.
1660  // Notice that, we don't need to care about the memory dependency as
1661  // we won't try to cluster them if they have any memory dependency.
1662  for (const SDep &Pred : SUb->Preds) {
1663  if (Pred.getSUnit() == SUa)
1664  continue;
1665  LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
1666  << ")\n");
1667  DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
1668  }
1669  }
1670 
1671  SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
1672  CurrentClusterBytes};
1673 
1674  LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
1675  << ", Curr cluster bytes: " << CurrentClusterBytes
1676  << "\n");
1677  }
1678 }
1679 
1680 void BaseMemOpClusterMutation::collectMemOpRecords(
1681  std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
1682  for (auto &SU : SUnits) {
1683  if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1684  (!IsLoad && !SU.getInstr()->mayStore()))
1685  continue;
1686 
1687  const MachineInstr &MI = *SU.getInstr();
1689  int64_t Offset;
1690  bool OffsetIsScalable;
1691  unsigned Width;
1692  if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
1693  OffsetIsScalable, Width, TRI)) {
1694  MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width));
1695 
1696  LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
1697  << Offset << ", OffsetIsScalable: " << OffsetIsScalable
1698  << ", Width: " << Width << "\n");
1699  }
1700 #ifndef NDEBUG
1701  for (const auto *Op : BaseOps)
1702  assert(Op);
1703 #endif
1704  }
1705 }
1706 
1707 bool BaseMemOpClusterMutation::groupMemOps(
1710  bool FastCluster =
1711  ForceFastCluster ||
1712  MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
1713 
1714  for (const auto &MemOp : MemOps) {
1715  unsigned ChainPredID = DAG->SUnits.size();
1716  if (FastCluster) {
1717  for (const SDep &Pred : MemOp.SU->Preds) {
1718  // We only want to cluster the mem ops that have the same ctrl(non-data)
1719  // pred so that they didn't have ctrl dependency for each other. But for
1720  // store instrs, we can still cluster them if the pred is load instr.
1721  if ((Pred.isCtrl() &&
1722  (IsLoad ||
1723  (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
1724  !Pred.isArtificial()) {
1725  ChainPredID = Pred.getSUnit()->NodeNum;
1726  break;
1727  }
1728  }
1729  } else
1730  ChainPredID = 0;
1731 
1732  Groups[ChainPredID].push_back(MemOp);
1733  }
1734  return FastCluster;
1735 }
1736 
1737 /// Callback from DAG postProcessing to create cluster edges for loads/stores.
1739  // Collect all the clusterable loads/stores
1740  SmallVector<MemOpInfo, 32> MemOpRecords;
1741  collectMemOpRecords(DAG->SUnits, MemOpRecords);
1742 
1743  if (MemOpRecords.size() < 2)
1744  return;
1745 
1746  // Put the loads/stores without dependency into the same group with some
1747  // heuristic if the DAG is too complex to avoid compiling time blow up.
1748  // Notice that, some fusion pair could be lost with this.
1750  bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
1751 
1752  for (auto &Group : Groups) {
1753  // Sorting the loads/stores, so that, we can stop the cluster as early as
1754  // possible.
1755  llvm::sort(Group.second);
1756 
1757  // Trying to cluster all the neighboring loads/stores.
1758  clusterNeighboringMemOps(Group.second, FastCluster, DAG);
1759  }
1760 }
1761 
1762 //===----------------------------------------------------------------------===//
1763 // CopyConstrain - DAG post-processing to encourage copy elimination.
1764 //===----------------------------------------------------------------------===//
1765 
1766 namespace {
1767 
1768 /// Post-process the DAG to create weak edges from all uses of a copy to
1769 /// the one use that defines the copy's source vreg, most likely an induction
1770 /// variable increment.
1771 class CopyConstrain : public ScheduleDAGMutation {
1772  // Transient state.
1773  SlotIndex RegionBeginIdx;
1774 
1775  // RegionEndIdx is the slot index of the last non-debug instruction in the
1776  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1777  SlotIndex RegionEndIdx;
1778 
1779 public:
1780  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1781 
1782  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1783 
1784 protected:
1785  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1786 };
1787 
1788 } // end anonymous namespace
1789 
1790 namespace llvm {
1791 
1792 std::unique_ptr<ScheduleDAGMutation>
1794  const TargetRegisterInfo *TRI) {
1795  return std::make_unique<CopyConstrain>(TII, TRI);
1796 }
1797 
1798 } // end namespace llvm
1799 
1800 /// constrainLocalCopy handles two possibilities:
1801 /// 1) Local src:
1802 /// I0: = dst
1803 /// I1: src = ...
1804 /// I2: = dst
1805 /// I3: dst = src (copy)
1806 /// (create pred->succ edges I0->I1, I2->I1)
1807 ///
1808 /// 2) Local copy:
1809 /// I0: dst = src (copy)
1810 /// I1: = dst
1811 /// I2: src = ...
1812 /// I3: = dst
1813 /// (create pred->succ edges I1->I2, I3->I2)
1814 ///
1815 /// Although the MachineScheduler is currently constrained to single blocks,
1816 /// this algorithm should handle extended blocks. An EBB is a set of
1817 /// contiguously numbered blocks such that the previous block in the EBB is
1818 /// always the single predecessor.
1819 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1820  LiveIntervals *LIS = DAG->getLIS();
1821  MachineInstr *Copy = CopySU->getInstr();
1822 
1823  // Check for pure vreg copies.
1824  const MachineOperand &SrcOp = Copy->getOperand(1);
1825  Register SrcReg = SrcOp.getReg();
1826  if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1827  return;
1828 
1829  const MachineOperand &DstOp = Copy->getOperand(0);
1830  Register DstReg = DstOp.getReg();
1831  if (!Register::isVirtualRegister(DstReg) || DstOp.isDead())
1832  return;
1833 
1834  // Check if either the dest or source is local. If it's live across a back
1835  // edge, it's not local. Note that if both vregs are live across the back
1836  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1837  // If both the copy's source and dest are local live intervals, then we
1838  // should treat the dest as the global for the purpose of adding
1839  // constraints. This adds edges from source's other uses to the copy.
1840  unsigned LocalReg = SrcReg;
1841  unsigned GlobalReg = DstReg;
1842  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1843  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1844  LocalReg = DstReg;
1845  GlobalReg = SrcReg;
1846  LocalLI = &LIS->getInterval(LocalReg);
1847  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1848  return;
1849  }
1850  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1851 
1852  // Find the global segment after the start of the local LI.
1853  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1854  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1855  // local live range. We could create edges from other global uses to the local
1856  // start, but the coalescer should have already eliminated these cases, so
1857  // don't bother dealing with it.
1858  if (GlobalSegment == GlobalLI->end())
1859  return;
1860 
1861  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1862  // returned the next global segment. But if GlobalSegment overlaps with
1863  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
1864  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1865  if (GlobalSegment->contains(LocalLI->beginIndex()))
1866  ++GlobalSegment;
1867 
1868  if (GlobalSegment == GlobalLI->end())
1869  return;
1870 
1871  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1872  if (GlobalSegment != GlobalLI->begin()) {
1873  // Two address defs have no hole.
1874  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1875  GlobalSegment->start)) {
1876  return;
1877  }
1878  // If the prior global segment may be defined by the same two-address
1879  // instruction that also defines LocalLI, then can't make a hole here.
1880  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1881  LocalLI->beginIndex())) {
1882  return;
1883  }
1884  // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1885  // it would be a disconnected component in the live range.
1886  assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1887  "Disconnected LRG within the scheduling region.");
1888  }
1889  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1890  if (!GlobalDef)
1891  return;
1892 
1893  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1894  if (!GlobalSU)
1895  return;
1896 
1897  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1898  // constraining the uses of the last local def to precede GlobalDef.
1899  SmallVector<SUnit*,8> LocalUses;
1900  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1901  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1902  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1903  for (const SDep &Succ : LastLocalSU->Succs) {
1904  if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1905  continue;
1906  if (Succ.getSUnit() == GlobalSU)
1907  continue;
1908  if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1909  return;
1910  LocalUses.push_back(Succ.getSUnit());
1911  }
1912  // Open the top of the GlobalLI hole by constraining any earlier global uses
1913  // to precede the start of LocalLI.
1914  SmallVector<SUnit*,8> GlobalUses;
1915  MachineInstr *FirstLocalDef =
1916  LIS->getInstructionFromIndex(LocalLI->beginIndex());
1917  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1918  for (const SDep &Pred : GlobalSU->Preds) {
1919  if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1920  continue;
1921  if (Pred.getSUnit() == FirstLocalSU)
1922  continue;
1923  if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1924  return;
1925  GlobalUses.push_back(Pred.getSUnit());
1926  }
1927  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1928  // Add the weak edges.
1929  for (SUnit *LU : LocalUses) {
1930  LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
1931  << GlobalSU->NodeNum << ")\n");
1932  DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
1933  }
1934  for (SUnit *GU : GlobalUses) {
1935  LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
1936  << FirstLocalSU->NodeNum << ")\n");
1937  DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
1938  }
1939 }
1940 
1941 /// Callback from DAG postProcessing to create weak edges to encourage
1942 /// copy elimination.
1943 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1944  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1945  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1946 
1947  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1948  if (FirstPos == DAG->end())
1949  return;
1950  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1951  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1952  *priorNonDebug(DAG->end(), DAG->begin()));
1953 
1954  for (SUnit &SU : DAG->SUnits) {
1955  if (!SU.getInstr()->isCopy())
1956  continue;
1957 
1958  constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1959  }
1960 }
1961 
1962 //===----------------------------------------------------------------------===//
1963 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1964 // and possibly other custom schedulers.
1965 //===----------------------------------------------------------------------===//
1966 
1967 static const unsigned InvalidCycle = ~0U;
1968 
1970 
1971 /// Given a Count of resource usage and a Latency value, return true if a
1972 /// SchedBoundary becomes resource limited.
1973 /// If we are checking after scheduling a node, we should return true when
1974 /// we just reach the resource limit.
1975 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1976  unsigned Latency, bool AfterSchedNode) {
1977  int ResCntFactor = (int)(Count - (Latency * LFactor));
1978  if (AfterSchedNode)
1979  return ResCntFactor >= (int)LFactor;
1980  else
1981  return ResCntFactor > (int)LFactor;
1982 }
1983 
1985  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1986  // Destroying and reconstructing it is very expensive though. So keep
1987  // invalid, placeholder HazardRecs.
1988  if (HazardRec && HazardRec->isEnabled()) {
1989  delete HazardRec;
1990  HazardRec = nullptr;
1991  }
1992  Available.clear();
1993  Pending.clear();
1994  CheckPending = false;
1995  CurrCycle = 0;
1996  CurrMOps = 0;
1997  MinReadyCycle = std::numeric_limits<unsigned>::max();
1998  ExpectedLatency = 0;
1999  DependentLatency = 0;
2000  RetiredMOps = 0;
2001  MaxExecutedResCount = 0;
2002  ZoneCritResIdx = 0;
2003  IsResourceLimited = false;
2004  ReservedCycles.clear();
2005  ReservedCyclesIndex.clear();
2006  ResourceGroupSubUnitMasks.clear();
2007 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
2008  // Track the maximum number of stall cycles that could arise either from the
2009  // latency of a DAG edge or the number of cycles that a processor resource is
2010  // reserved (SchedBoundary::ReservedCycles).
2011  MaxObservedStall = 0;
2012 #endif
2013  // Reserve a zero-count for invalid CritResIdx.
2014  ExecutedResCounts.resize(1);
2015  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2016 }
2017 
2018 void SchedRemainder::
2019 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
2020  reset();
2021  if (!SchedModel->hasInstrSchedModel())
2022  return;
2024  for (SUnit &SU : DAG->SUnits) {
2025  const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
2026  RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
2027  * SchedModel->getMicroOpFactor();
2029  PI = SchedModel->getWriteProcResBegin(SC),
2030  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2031  unsigned PIdx = PI->ProcResourceIdx;
2032  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2033  RemainingCounts[PIdx] += (Factor * PI->Cycles);
2034  }
2035  }
2036 }
2037 
2038 void SchedBoundary::
2040  reset();
2041  DAG = dag;
2042  SchedModel = smodel;
2043  Rem = rem;
2044  if (SchedModel->hasInstrSchedModel()) {
2045  unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2046  ReservedCyclesIndex.resize(ResourceCount);
2047  ExecutedResCounts.resize(ResourceCount);
2048  ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
2049  unsigned NumUnits = 0;
2050 
2051  for (unsigned i = 0; i < ResourceCount; ++i) {
2052  ReservedCyclesIndex[i] = NumUnits;
2053  NumUnits += SchedModel->getProcResource(i)->NumUnits;
2054  if (isUnbufferedGroup(i)) {
2055  auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
2056  for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
2057  U != UE; ++U)
2058  ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2059  }
2060  }
2061 
2062  ReservedCycles.resize(NumUnits, InvalidCycle);
2063  }
2064 }
2065 
2066 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2067 /// these "soft stalls" differently than the hard stall cycles based on CPU
2068 /// resources and computed by checkHazard(). A fully in-order model
2069 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
2070 /// available for scheduling until they are ready. However, a weaker in-order
2071 /// model may use this for heuristics. For example, if a processor has in-order
2072 /// behavior when reading certain resources, this may come into play.
2074  if (!SU->isUnbuffered)
2075  return 0;
2076 
2077  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2078  if (ReadyCycle > CurrCycle)
2079  return ReadyCycle - CurrCycle;
2080  return 0;
2081 }
2082 
2083 /// Compute the next cycle at which the given processor resource unit
2084 /// can be scheduled.
2086  unsigned Cycles) {
2087  unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2088  // If this resource has never been used, always return cycle zero.
2089  if (NextUnreserved == InvalidCycle)
2090  return 0;
2091  // For bottom-up scheduling add the cycles needed for the current operation.
2092  if (!isTop())
2093  NextUnreserved += Cycles;
2094  return NextUnreserved;
2095 }
2096 
2097 /// Compute the next cycle at which the given processor resource can be
2098 /// scheduled. Returns the next cycle and the index of the processor resource
2099 /// instance in the reserved cycles vector.
2100 std::pair<unsigned, unsigned>
2102  unsigned Cycles) {
2103 
2104  unsigned MinNextUnreserved = InvalidCycle;
2105  unsigned InstanceIdx = 0;
2106  unsigned StartIndex = ReservedCyclesIndex[PIdx];
2107  unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2108  assert(NumberOfInstances > 0 &&
2109  "Cannot have zero instances of a ProcResource");
2110 
2111  if (isUnbufferedGroup(PIdx)) {
2112  // If any subunits are used by the instruction, report that the resource
2113  // group is available at 0, effectively removing the group record from
2114  // hazarding and basing the hazarding decisions on the subunit records.
2115  // Otherwise, choose the first available instance from among the subunits.
2116  // Specifications which assign cycles to both the subunits and the group or
2117  // which use an unbuffered group with buffered subunits will appear to
2118  // schedule strangely. In the first case, the additional cycles for the
2119  // group will be ignored. In the second, the group will be ignored
2120  // entirely.
2121  for (const MCWriteProcResEntry &PE :
2124  if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2125  return std::make_pair(0u, StartIndex);
2126 
2127  auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2128  for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2129  unsigned NextUnreserved, NextInstanceIdx;
2130  std::tie(NextUnreserved, NextInstanceIdx) =
2131  getNextResourceCycle(SC, SubUnits[I], Cycles);
2132  if (MinNextUnreserved > NextUnreserved) {
2133  InstanceIdx = NextInstanceIdx;
2134  MinNextUnreserved = NextUnreserved;
2135  }
2136  }
2137  return std::make_pair(MinNextUnreserved, InstanceIdx);
2138  }
2139 
2140  for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2141  ++I) {
2142  unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
2143  if (MinNextUnreserved > NextUnreserved) {
2144  InstanceIdx = I;
2145  MinNextUnreserved = NextUnreserved;
2146  }
2147  }
2148  return std::make_pair(MinNextUnreserved, InstanceIdx);
2149 }
2150 
2151 /// Does this SU have a hazard within the current instruction group.
2152 ///
2153 /// The scheduler supports two modes of hazard recognition. The first is the
2154 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2155 /// supports highly complicated in-order reservation tables
2156 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2157 ///
2158 /// The second is a streamlined mechanism that checks for hazards based on
2159 /// simple counters that the scheduler itself maintains. It explicitly checks
2160 /// for instruction dispatch limitations, including the number of micro-ops that
2161 /// can dispatch per cycle.
2162 ///
2163 /// TODO: Also check whether the SU must start a new group.
2165  if (HazardRec->isEnabled()
2167  return true;
2168  }
2169 
2170  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2171  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2172  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
2173  << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2174  return true;
2175  }
2176 
2177  if (CurrMOps > 0 &&
2178  ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2179  (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2180  LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
2181  << (isTop() ? "begin" : "end") << " group\n");
2182  return true;
2183  }
2184 
2186  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2187  for (const MCWriteProcResEntry &PE :
2190  unsigned ResIdx = PE.ProcResourceIdx;
2191  unsigned Cycles = PE.Cycles;
2192  unsigned NRCycle, InstanceIdx;
2193  std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles);
2194  if (NRCycle > CurrCycle) {
2195 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
2196  MaxObservedStall = std::max(Cycles, MaxObservedStall);
2197 #endif
2198  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2199  << SchedModel->getResourceName(ResIdx)
2200  << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
2201  << "=" << NRCycle << "c\n");
2202  return true;
2203  }
2204  }
2205  }
2206  return false;
2207 }
2208 
2209 // Find the unscheduled node in ReadySUs with the highest latency.
2210 unsigned SchedBoundary::
2212  SUnit *LateSU = nullptr;
2213  unsigned RemLatency = 0;
2214  for (SUnit *SU : ReadySUs) {
2215  unsigned L = getUnscheduledLatency(SU);
2216  if (L > RemLatency) {
2217  RemLatency = L;
2218  LateSU = SU;
2219  }
2220  }
2221  if (LateSU) {
2222  LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2223  << LateSU->NodeNum << ") " << RemLatency << "c\n");
2224  }
2225  return RemLatency;
2226 }
2227 
2228 // Count resources in this zone and the remaining unscheduled
2229 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2230 // resource index, or zero if the zone is issue limited.
2231 unsigned SchedBoundary::
2232 getOtherResourceCount(unsigned &OtherCritIdx) {
2233  OtherCritIdx = 0;
2235  return 0;
2236 
2237  unsigned OtherCritCount = Rem->RemIssueCount
2238  + (RetiredMOps * SchedModel->getMicroOpFactor());
2239  LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2240  << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2241  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2242  PIdx != PEnd; ++PIdx) {
2243  unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2244  if (OtherCount > OtherCritCount) {
2245  OtherCritCount = OtherCount;
2246  OtherCritIdx = PIdx;
2247  }
2248  }
2249  if (OtherCritIdx) {
2250  LLVM_DEBUG(
2251  dbgs() << " " << Available.getName() << " + Remain CritRes: "
2252  << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2253  << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2254  }
2255  return OtherCritCount;
2256 }
2257 
2258 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2259  unsigned Idx) {
2260  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2261 
2262 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
2263  // ReadyCycle was been bumped up to the CurrCycle when this node was
2264  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2265  // scheduling, so may now be greater than ReadyCycle.
2266  if (ReadyCycle > CurrCycle)
2267  MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2268 #endif
2269 
2270  if (ReadyCycle < MinReadyCycle)
2271  MinReadyCycle = ReadyCycle;
2272 
2273  // Check for interlocks first. For the purpose of other heuristics, an
2274  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2275  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2276  bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2277  checkHazard(SU) || (Available.size() >= ReadyListLimit);
2278 
2279  if (!HazardDetected) {
2280  Available.push(SU);
2281 
2282  if (InPQueue)
2283  Pending.remove(Pending.begin() + Idx);
2284  return;
2285  }
2286 
2287  if (!InPQueue)
2288  Pending.push(SU);
2289 }
2290 
2291 /// Move the boundary of scheduled code by one cycle.
2292 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2293  if (SchedModel->getMicroOpBufferSize() == 0) {
2294  assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2295  "MinReadyCycle uninitialized");
2296  if (MinReadyCycle > NextCycle)
2297  NextCycle = MinReadyCycle;
2298  }
2299  // Update the current micro-ops, which will issue in the next cycle.
2300  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2301  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2302 
2303  // Decrement DependentLatency based on the next cycle.
2304  if ((NextCycle - CurrCycle) > DependentLatency)
2305  DependentLatency = 0;
2306  else
2307  DependentLatency -= (NextCycle - CurrCycle);
2308 
2309  if (!HazardRec->isEnabled()) {
2310  // Bypass HazardRec virtual calls.
2311  CurrCycle = NextCycle;
2312  } else {
2313  // Bypass getHazardType calls in case of long latency.
2314  for (; CurrCycle != NextCycle; ++CurrCycle) {
2315  if (isTop())
2317  else
2319  }
2320  }
2321  CheckPending = true;
2322  IsResourceLimited =
2324  getScheduledLatency(), true);
2325 
2326  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2327  << '\n');
2328 }
2329 
2330 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2331  ExecutedResCounts[PIdx] += Count;
2332  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2333  MaxExecutedResCount = ExecutedResCounts[PIdx];
2334 }
2335 
2336 /// Add the given processor resource to this scheduled zone.
2337 ///
2338 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2339 /// during which this resource is consumed.
2340 ///
2341 /// \return the next cycle at which the instruction may execute without
2342 /// oversubscribing resources.
2343 unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
2344  unsigned Cycles, unsigned NextCycle) {
2345  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2346  unsigned Count = Factor * Cycles;
2347  LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2348  << Cycles << "x" << Factor << "u\n");
2349 
2350  // Update Executed resources counts.
2351  incExecutedResources(PIdx, Count);
2352  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2353  Rem->RemainingCounts[PIdx] -= Count;
2354 
2355  // Check if this resource exceeds the current critical resource. If so, it
2356  // becomes the critical resource.
2357  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2358  ZoneCritResIdx = PIdx;
2359  LLVM_DEBUG(dbgs() << " *** Critical resource "
2360  << SchedModel->getResourceName(PIdx) << ": "
2362  << "c\n");
2363  }
2364  // For reserved resources, record the highest cycle using the resource.
2365  unsigned NextAvailable, InstanceIdx;
2366  std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(SC, PIdx, Cycles);
2367  if (NextAvailable > CurrCycle) {
2368  LLVM_DEBUG(dbgs() << " Resource conflict: "
2369  << SchedModel->getResourceName(PIdx)
2370  << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2371  << " reserved until @" << NextAvailable << "\n");
2372  }
2373  return NextAvailable;
2374 }
2375 
2376 /// Move the boundary of scheduled code by one SUnit.
2378  // Update the reservation table.
2379  if (HazardRec->isEnabled()) {
2380  if (!isTop() && SU->isCall) {
2381  // Calls are scheduled with their preceding instructions. For bottom-up
2382  // scheduling, clear the pipeline state before emitting.
2383  HazardRec->Reset();
2384  }
2386  // Scheduling an instruction may have made pending instructions available.
2387  CheckPending = true;
2388  }
2389  // checkHazard should prevent scheduling multiple instructions per cycle that
2390  // exceed the issue width.
2391  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2392  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2393  assert(
2394  (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2395  "Cannot schedule this instruction's MicroOps in the current cycle.");
2396 
2397  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2398  LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2399 
2400  unsigned NextCycle = CurrCycle;
2401  switch (SchedModel->getMicroOpBufferSize()) {
2402  case 0:
2403  assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2404  break;
2405  case 1:
2406  if (ReadyCycle > NextCycle) {
2407  NextCycle = ReadyCycle;
2408  LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2409  }
2410  break;
2411  default:
2412  // We don't currently model the OOO reorder buffer, so consider all
2413  // scheduled MOps to be "retired". We do loosely model in-order resource
2414  // latency. If this instruction uses an in-order resource, account for any
2415  // likely stall cycles.
2416  if (SU->isUnbuffered && ReadyCycle > NextCycle)
2417  NextCycle = ReadyCycle;
2418  break;
2419  }
2420  RetiredMOps += IncMOps;
2421 
2422  // Update resource counts and critical resource.
2423  if (SchedModel->hasInstrSchedModel()) {
2424  unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2425  assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2426  Rem->RemIssueCount -= DecRemIssue;
2427  if (ZoneCritResIdx) {
2428  // Scale scheduled micro-ops for comparing with the critical resource.
2429  unsigned ScaledMOps =
2430  RetiredMOps * SchedModel->getMicroOpFactor();
2431 
2432  // If scaled micro-ops are now more than the previous critical resource by
2433  // a full cycle, then micro-ops issue becomes critical.
2434  if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2435  >= (int)SchedModel->getLatencyFactor()) {
2436  ZoneCritResIdx = 0;
2437  LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2438  << ScaledMOps / SchedModel->getLatencyFactor()
2439  << "c\n");
2440  }
2441  }
2444  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2445  unsigned RCycle =
2446  countResource(SC, PI->ProcResourceIdx, PI->Cycles, NextCycle);
2447  if (RCycle > NextCycle)
2448  NextCycle = RCycle;
2449  }
2450  if (SU->hasReservedResource) {
2451  // For reserved resources, record the highest cycle using the resource.
2452  // For top-down scheduling, this is the cycle in which we schedule this
2453  // instruction plus the number of cycles the operations reserves the
2454  // resource. For bottom-up is it simply the instruction's cycle.
2457  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2458  unsigned PIdx = PI->ProcResourceIdx;
2459  if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2460  unsigned ReservedUntil, InstanceIdx;
2461  std::tie(ReservedUntil, InstanceIdx) =
2462  getNextResourceCycle(SC, PIdx, 0);
2463  if (isTop()) {
2464  ReservedCycles[InstanceIdx] =
2465  std::max(ReservedUntil, NextCycle + PI->Cycles);
2466  } else
2467  ReservedCycles[InstanceIdx] = NextCycle;
2468  }
2469  }
2470  }
2471  }
2472  // Update ExpectedLatency and DependentLatency.
2473  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2474  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2475  if (SU->getDepth() > TopLatency) {
2476  TopLatency = SU->getDepth();
2477  LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2478  << SU->NodeNum << ") " << TopLatency << "c\n");
2479  }
2480  if (SU->getHeight() > BotLatency) {
2481  BotLatency = SU->getHeight();
2482  LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2483  << SU->NodeNum << ") " << BotLatency << "c\n");
2484  }
2485  // If we stall for any reason, bump the cycle.
2486  if (NextCycle > CurrCycle)
2487  bumpCycle(NextCycle);
2488  else
2489  // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2490  // resource limited. If a stall occurred, bumpCycle does this.
2491  IsResourceLimited =
2493  getScheduledLatency(), true);
2494 
2495  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2496  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2497  // one cycle. Since we commonly reach the max MOps here, opportunistically
2498  // bump the cycle to avoid uselessly checking everything in the readyQ.
2499  CurrMOps += IncMOps;
2500 
2501  // Bump the cycle count for issue group constraints.
2502  // This must be done after NextCycle has been adjust for all other stalls.
2503  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2504  // currCycle to X.
2505  if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2506  (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2507  LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2508  << " group\n");
2509  bumpCycle(++NextCycle);
2510  }
2511 
2512  while (CurrMOps >= SchedModel->getIssueWidth()) {
2513  LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2514  << CurrCycle << '\n');
2515  bumpCycle(++NextCycle);
2516  }
2518 }
2519 
2520 /// Release pending ready nodes in to the available queue. This makes them
2521 /// visible to heuristics.
2523  // If the available queue is empty, it is safe to reset MinReadyCycle.
2524  if (Available.empty())
2525  MinReadyCycle = std::numeric_limits<unsigned>::max();
2526 
2527  // Check to see if any of the pending instructions are ready to issue. If
2528  // so, add them to the available queue.
2529  for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
2530  SUnit *SU = *(Pending.begin() + I);
2531  unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2532 
2533  if (ReadyCycle < MinReadyCycle)
2534  MinReadyCycle = ReadyCycle;
2535 
2536  if (Available.size() >= ReadyListLimit)
2537  break;
2538 
2539  releaseNode(SU, ReadyCycle, true, I);
2540  if (E != Pending.size()) {
2541  --I;
2542  --E;
2543  }
2544  }
2545  CheckPending = false;
2546 }
2547 
2548 /// Remove SU from the ready set for this boundary.
2550  if (Available.isInQueue(SU))
2552  else {
2553  assert(Pending.isInQueue(SU) && "bad ready count");
2554  Pending.remove(Pending.find(SU));
2555  }
2556 }
2557 
2558 /// If this queue only has one ready candidate, return it. As a side effect,
2559 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2560 /// one node is ready. If multiple instructions are ready, return NULL.
2562  if (CheckPending)
2563  releasePending();
2564 
2565  // Defer any ready instrs that now have a hazard.
2566  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2567  if (checkHazard(*I)) {
2568  Pending.push(*I);
2569  I = Available.remove(I);
2570  continue;
2571  }
2572  ++I;
2573  }
2574  for (unsigned i = 0; Available.empty(); ++i) {
2575 // FIXME: Re-enable assert once PR20057 is resolved.
2576 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2577 // "permanent hazard");
2578  (void)i;
2579  bumpCycle(CurrCycle + 1);
2580  releasePending();
2581  }
2582 
2583  LLVM_DEBUG(Pending.dump());
2585 
2586  if (Available.size() == 1)
2587  return *Available.begin();
2588  return nullptr;
2589 }
2590 
2591 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2592 // This is useful information to dump after bumpNode.
2593 // Note that the Queue contents are more useful before pickNodeFromQueue.
2595  unsigned ResFactor;
2596  unsigned ResCount;
2597  if (ZoneCritResIdx) {
2598  ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2599  ResCount = getResourceCount(ZoneCritResIdx);
2600  } else {
2601  ResFactor = SchedModel->getMicroOpFactor();
2602  ResCount = RetiredMOps * ResFactor;
2603  }
2604  unsigned LFactor = SchedModel->getLatencyFactor();
2605  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2606  << " Retired: " << RetiredMOps;
2607  dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2608  dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2609  << ResCount / ResFactor << " "
2610  << SchedModel->getResourceName(ZoneCritResIdx)
2611  << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2612  << (IsResourceLimited ? " - Resource" : " - Latency")
2613  << " limited.\n";
2614 }
2615 #endif
2616 
2617 //===----------------------------------------------------------------------===//
2618 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2619 //===----------------------------------------------------------------------===//
2620 
2623  const TargetSchedModel *SchedModel) {
2625  return;
2626 
2627  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2630  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2631  if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2632  ResDelta.CritResources += PI->Cycles;
2633  if (PI->ProcResourceIdx == Policy.DemandResIdx)
2634  ResDelta.DemandedResources += PI->Cycles;
2635  }
2636 }
2637 
2638 /// Compute remaining latency. We need this both to determine whether the
2639 /// overall schedule has become latency-limited and whether the instructions
2640 /// outside this zone are resource or latency limited.
2641 ///
2642 /// The "dependent" latency is updated incrementally during scheduling as the
2643 /// max height/depth of scheduled nodes minus the cycles since it was
2644 /// scheduled:
2645 /// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2646 ///
2647 /// The "independent" latency is the max ready queue depth:
2648 /// ILat = max N.depth for N in Available|Pending
2649 ///
2650 /// RemainingLatency is the greater of independent and dependent latency.
2651 ///
2652 /// These computations are expensive, especially in DAGs with many edges, so
2653 /// only do them if necessary.
2654 static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2655  unsigned RemLatency = CurrZone.getDependentLatency();
2656  RemLatency = std::max(RemLatency,
2657  CurrZone.findMaxLatency(CurrZone.Available.elements()));
2658  RemLatency = std::max(RemLatency,
2659  CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2660  return RemLatency;
2661 }
2662 
2663 /// Returns true if the current cycle plus remaning latency is greater than
2664 /// the critical path in the scheduling region.
2665 bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2666  SchedBoundary &CurrZone,
2667  bool ComputeRemLatency,
2668  unsigned &RemLatency) const {
2669  // The current cycle is already greater than the critical path, so we are
2670  // already latency limited and don't need to compute the remaining latency.
2671  if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2672  return true;
2673 
2674  // If we haven't scheduled anything yet, then we aren't latency limited.
2675  if (CurrZone.getCurrCycle() == 0)
2676  return false;
2677 
2678  if (ComputeRemLatency)
2679  RemLatency = computeRemLatency(CurrZone);
2680 
2681  return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2682 }
2683 
2684 /// Set the CandPolicy given a scheduling zone given the current resources and
2685 /// latencies inside and outside the zone.
2686 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2687  SchedBoundary &CurrZone,
2688  SchedBoundary *OtherZone) {
2689  // Apply preemptive heuristics based on the total latency and resources
2690  // inside and outside this zone. Potential stalls should be considered before
2691  // following this policy.
2692 
2693  // Compute the critical resource outside the zone.
2694  unsigned OtherCritIdx = 0;
2695  unsigned OtherCount =
2696  OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2697 
2698  bool OtherResLimited = false;
2699  unsigned RemLatency = 0;
2700  bool RemLatencyComputed = false;
2701  if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2702  RemLatency = computeRemLatency(CurrZone);
2703  RemLatencyComputed = true;
2704  OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2705  OtherCount, RemLatency, false);
2706  }
2707 
2708  // Schedule aggressively for latency in PostRA mode. We don't check for
2709  // acyclic latency during PostRA, and highly out-of-order processors will
2710  // skip PostRA scheduling.
2711  if (!OtherResLimited &&
2712  (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2713  RemLatency))) {
2714  Policy.ReduceLatency |= true;
2715  LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
2716  << " RemainingLatency " << RemLatency << " + "
2717  << CurrZone.getCurrCycle() << "c > CritPath "
2718  << Rem.CriticalPath << "\n");
2719  }
2720  // If the same resource is limiting inside and outside the zone, do nothing.
2721  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2722  return;
2723 
2724  LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2725  dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2726  << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2727  } if (OtherResLimited) dbgs()
2728  << " RemainingLimit: "
2729  << SchedModel->getResourceName(OtherCritIdx) << "\n";
2730  if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2731  << " Latency limited both directions.\n");
2732 
2733  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2734  Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2735 
2736  if (OtherResLimited)
2737  Policy.DemandResIdx = OtherCritIdx;
2738 }
2739 
2740 #ifndef NDEBUG
2743  switch (Reason) {
2744  case NoCand: return "NOCAND ";
2745  case Only1: return "ONLY1 ";
2746  case PhysReg: return "PHYS-REG ";
2747  case RegExcess: return "REG-EXCESS";
2748  case RegCritical: return "REG-CRIT ";
2749  case Stall: return "STALL ";
2750  case Cluster: return "CLUSTER ";
2751  case Weak: return "WEAK ";
2752  case RegMax: return "REG-MAX ";
2753  case ResourceReduce: return "RES-REDUCE";
2754  case ResourceDemand: return "RES-DEMAND";
2755  case TopDepthReduce: return "TOP-DEPTH ";
2756  case TopPathReduce: return "TOP-PATH ";
2757  case BotHeightReduce:return "BOT-HEIGHT";
2758  case BotPathReduce: return "BOT-PATH ";
2759  case NextDefUse: return "DEF-USE ";
2760  case NodeOrder: return "ORDER ";
2761  };
2762  llvm_unreachable("Unknown reason!");
2763 }
2764 
2766  PressureChange P;
2767  unsigned ResIdx = 0;
2768  unsigned Latency = 0;
2769  switch (Cand.Reason) {
2770  default:
2771  break;
2772  case RegExcess:
2773  P = Cand.RPDelta.Excess;
2774  break;
2775  case RegCritical:
2776  P = Cand.RPDelta.CriticalMax;
2777  break;
2778  case RegMax:
2779  P = Cand.RPDelta.CurrentMax;
2780  break;
2781  case ResourceReduce:
2782  ResIdx = Cand.Policy.ReduceResIdx;
2783  break;
2784  case ResourceDemand:
2785  ResIdx = Cand.Policy.DemandResIdx;
2786  break;
2787  case TopDepthReduce:
2788  Latency = Cand.SU->getDepth();
2789  break;
2790  case TopPathReduce:
2791  Latency = Cand.SU->getHeight();
2792  break;
2793  case BotHeightReduce:
2794  Latency = Cand.SU->getHeight();
2795  break;
2796  case BotPathReduce:
2797  Latency = Cand.SU->getDepth();
2798  break;
2799  }
2800  dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2801  if (P.isValid())
2802  dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2803  << ":" << P.getUnitInc() << " ";
2804  else
2805  dbgs() << " ";
2806  if (ResIdx)
2807  dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2808  else
2809  dbgs() << " ";
2810  if (Latency)
2811  dbgs() << " " << Latency << " cycles ";
2812  else
2813  dbgs() << " ";
2814  dbgs() << '\n';
2815 }
2816 #endif
2817 
2818 namespace llvm {
2819 /// Return true if this heuristic determines order.
2820 /// TODO: Consider refactor return type of these functions as integer or enum,
2821 /// as we may need to differentiate whether TryCand is better than Cand.
2822 bool tryLess(int TryVal, int CandVal,
2826  if (TryVal < CandVal) {
2827  TryCand.Reason = Reason;
2828  return true;
2829  }
2830  if (TryVal > CandVal) {
2831  if (Cand.Reason > Reason)
2832  Cand.Reason = Reason;
2833  return true;
2834  }
2835  return false;
2836 }
2837 
2838 bool tryGreater(int TryVal, int CandVal,
2842  if (TryVal > CandVal) {
2843  TryCand.Reason = Reason;
2844  return true;
2845  }
2846  if (TryVal < CandVal) {
2847  if (Cand.Reason > Reason)
2848  Cand.Reason = Reason;
2849  return true;
2850  }
2851  return false;
2852 }
2853 
2856  SchedBoundary &Zone) {
2857  if (Zone.isTop()) {
2858  // Prefer the candidate with the lesser depth, but only if one of them has
2859  // depth greater than the total latency scheduled so far, otherwise either
2860  // of them could be scheduled now with no stall.
2861  if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
2862  Zone.getScheduledLatency()) {
2863  if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2864  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2865  return true;
2866  }
2867  if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2868  TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2869  return true;
2870  } else {
2871  // Prefer the candidate with the lesser height, but only if one of them has
2872  // height greater than the total latency scheduled so far, otherwise either
2873  // of them could be scheduled now with no stall.
2874  if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
2875  Zone.getScheduledLatency()) {
2876  if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2877  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2878  return true;
2879  }
2880  if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2881  TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2882  return true;
2883  }
2884  return false;
2885 }
2886 } // end namespace llvm
2887 
2888 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2889  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2890  << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2891 }
2892 
2894  tracePick(Cand.Reason, Cand.AtTop);
2895 }
2896 
2898  assert(dag->hasVRegLiveness() &&
2899  "(PreRA)GenericScheduler needs vreg liveness");
2900  DAG = static_cast<ScheduleDAGMILive*>(dag);
2901  SchedModel = DAG->getSchedModel();
2902  TRI = DAG->TRI;
2903 
2904  if (RegionPolicy.ComputeDFSResult)
2905  DAG->computeDFSResult();
2906 
2907  Rem.init(DAG, SchedModel);
2908  Top.init(DAG, SchedModel, &Rem);
2909  Bot.init(DAG, SchedModel, &Rem);
2910 
2911  // Initialize resource counts.
2912 
2913  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2914  // are disabled, then these HazardRecs will be disabled.
2916  if (!Top.HazardRec) {
2917  Top.HazardRec =
2919  Itin, DAG);
2920  }
2921  if (!Bot.HazardRec) {
2922  Bot.HazardRec =
2924  Itin, DAG);
2925  }
2926  TopCand.SU = nullptr;
2927  BotCand.SU = nullptr;
2928 }
2929 
2930 /// Initialize the per-region scheduling policy.
2933  unsigned NumRegionInstrs) {
2934  const MachineFunction &MF = *Begin->getMF();
2935  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2936 
2937  // Avoid setting up the register pressure tracker for small regions to save
2938  // compile time. As a rough heuristic, only track pressure when the number of
2939  // schedulable instructions exceeds half the integer register file.
2940  RegionPolicy.ShouldTrackPressure = true;
2941  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2942  MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2943  if (TLI->isTypeLegal(LegalIntVT)) {
2944  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2945  TLI->getRegClassFor(LegalIntVT));
2946  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2947  }
2948  }
2949 
2950  // For generic targets, we default to bottom-up, because it's simpler and more
2951  // compile-time optimizations have been implemented in that direction.
2952  RegionPolicy.OnlyBottomUp = true;
2953 
2954  // Allow the subtarget to override default policy.
2955  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2956 
2957  // After subtarget overrides, apply command line options.
2958  if (!EnableRegPressure) {
2959  RegionPolicy.ShouldTrackPressure = false;
2960  RegionPolicy.ShouldTrackLaneMasks = false;
2961  }
2962 
2963  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2964  // e.g. -misched-bottomup=false allows scheduling in both directions.
2966  "-misched-topdown incompatible with -misched-bottomup");
2967  if (ForceBottomUp.getNumOccurrences() > 0) {
2968  RegionPolicy.OnlyBottomUp = ForceBottomUp;
2969  if (RegionPolicy.OnlyBottomUp)
2970  RegionPolicy.OnlyTopDown = false;
2971  }
2972  if (ForceTopDown.getNumOccurrences() > 0) {
2973  RegionPolicy.OnlyTopDown = ForceTopDown;
2974  if (RegionPolicy.OnlyTopDown)
2975  RegionPolicy.OnlyBottomUp = false;
2976  }
2977 }
2978 
2980  // Cannot completely remove virtual function even in release mode.
2981 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2982  dbgs() << "GenericScheduler RegionPolicy: "
2983  << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2984  << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2985  << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2986  << "\n";
2987 #endif
2988 }
2989 
2990 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2991 /// critical path by more cycles than it takes to drain the instruction buffer.
2992 /// We estimate an upper bounds on in-flight instructions as:
2993 ///
2994 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2995 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2996 /// InFlightResources = InFlightIterations * LoopResources
2997 ///
2998 /// TODO: Check execution resources in addition to IssueCount.
3001  return;
3002 
3003  // Scaled number of cycles per loop iteration.
3004  unsigned IterCount =
3006  Rem.RemIssueCount);
3007  // Scaled acyclic critical path.
3008  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3009  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
3010  unsigned InFlightCount =
3011  (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3012  unsigned BufferLimit =
3014 
3015  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3016 
3017  LLVM_DEBUG(
3018  dbgs() << "IssueCycles="
3020  << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3021  << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3022  << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3023  << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3024  if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
3025 }
3026 
3028  Rem.CriticalPath = DAG->ExitSU.getDepth();
3029 
3030  // Some roots may not feed into ExitSU. Check all of them in case.
3031  for (const SUnit *SU : Bot.Available) {
3032  if (SU->getDepth() > Rem.CriticalPath)
3033  Rem.CriticalPath = SU->getDepth();
3034  }
3035  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3036  if (DumpCriticalPathLength) {
3037  errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3038  }
3039 
3041  Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3042  checkAcyclicLatency();
3043  }
3044 }
3045 
3046 namespace llvm {
3047 bool tryPressure(const PressureChange &TryP,
3048  const PressureChange &CandP,
3052  const TargetRegisterInfo *TRI,
3053  const MachineFunction &MF) {
3054  // If one candidate decreases and the other increases, go with it.
3055  // Invalid candidates have UnitInc==0.
3056  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
3057  Reason)) {
3058  return true;
3059  }
3060  // Do not compare the magnitude of pressure changes between top and bottom
3061  // boundary.
3062  if (Cand.AtTop != TryCand.AtTop)
3063  return false;
3064 
3065  // If both candidates affect the same set in the same boundary, go with the
3066  // smallest increase.
3067  unsigned TryPSet = TryP.getPSetOrMax();
3068  unsigned CandPSet = CandP.getPSetOrMax();
3069  if (TryPSet == CandPSet) {
3070  return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
3071  Reason);
3072  }
3073 
3074  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3076 
3077  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3079 
3080  // If the candidates are decreasing pressure, reverse priority.
3081  if (TryP.getUnitInc() < 0)
3082  std::swap(TryRank, CandRank);
3083  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3084 }
3085 
3086 unsigned getWeakLeft(const SUnit *SU, bool isTop) {
3087  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3088 }
3089 
3090 /// Minimize physical register live ranges. Regalloc wants them adjacent to
3091 /// their physreg def/use.
3092 ///
3093 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3094 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3095 /// with the operation that produces or consumes the physreg. We'll do this when
3096 /// regalloc has support for parallel copies.
3097 int biasPhysReg(const SUnit *SU, bool isTop) {
3098  const MachineInstr *MI = SU->getInstr();
3099 
3100  if (MI->isCopy()) {
3101  unsigned ScheduledOper = isTop ? 1 : 0;
3102  unsigned UnscheduledOper = isTop ? 0 : 1;
3103  // If we have already scheduled the physreg produce/consumer, immediately
3104  // schedule the copy.
3105  if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg()))
3106  return 1;
3107  // If the physreg is at the boundary, defer it. Otherwise schedule it
3108  // immediately to free the dependent. We can hoist the copy later.
3109  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3110  if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg()))
3111  return AtBoundary ? -1 : 1;
3112  }
3113 
3114  if (MI->isMoveImmediate()) {
3115  // If we have a move immediate and all successors have been assigned, bias
3116  // towards scheduling this later. Make sure all register defs are to
3117  // physical registers.
3118  bool DoBias = true;
3119  for (const MachineOperand &Op : MI->defs()) {
3120  if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) {
3121  DoBias = false;
3122  break;
3123  }
3124  }
3125 
3126  if (DoBias)
3127  return isTop ? -1 : 1;
3128  }
3129 
3130  return 0;
3131 }
3132 } // end namespace llvm
3133 
3135  bool AtTop,
3136  const RegPressureTracker &RPTracker,
3137  RegPressureTracker &TempTracker) {
3138  Cand.SU = SU;
3139  Cand.AtTop = AtTop;
3140  if (DAG->isTrackingPressure()) {
3141  if (AtTop) {
3142  TempTracker.getMaxDownwardPressureDelta(
3143  Cand.SU->getInstr(),
3144  Cand.RPDelta,
3145  DAG->getRegionCriticalPSets(),
3146  DAG->getRegPressure().MaxSetPressure);
3147  } else {
3148  if (VerifyScheduling) {
3149  TempTracker.getMaxUpwardPressureDelta(
3150  Cand.SU->getInstr(),
3151  &DAG->getPressureDiff(Cand.SU),
3152  Cand.RPDelta,
3153  DAG->getRegionCriticalPSets(),
3154  DAG->getRegPressure().MaxSetPressure);
3155  } else {
3156  RPTracker.getUpwardPressureDelta(
3157  Cand.SU->getInstr(),
3158  DAG->getPressureDiff(Cand.SU),
3159  Cand.RPDelta,
3160  DAG->getRegionCriticalPSets(),
3161  DAG->getRegPressure().MaxSetPressure);
3162  }
3163  }
3164  }
3165  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3166  << " Try SU(" << Cand.SU->NodeNum << ") "
3167  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
3168  << Cand.RPDelta.Excess.getUnitInc() << "\n");
3169 }
3170 
3171 /// Apply a set of heuristics to a new candidate. Heuristics are currently
3172 /// hierarchical. This may be more efficient than a graduated cost model because
3173 /// we don't need to evaluate all aspects of the model for each node in the
3174 /// queue. But it's really done to make the heuristics easier to debug and
3175 /// statistically analyze.
3176 ///
3177 /// \param Cand provides the policy and current best candidate.
3178 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3179 /// \param Zone describes the scheduled zone that we are extending, or nullptr
3180 /// if Cand is from a different zone than TryCand.
3181 /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3183  SchedCandidate &TryCand,
3184  SchedBoundary *Zone) const {
3185  // Initialize the candidate if needed.
3186  if (!Cand.isValid()) {
3187  TryCand.Reason = NodeOrder;
3188  return true;
3189  }
3190 
3191  // Bias PhysReg Defs and copies to their uses and defined respectively.
3192  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3193  biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3194  return TryCand.Reason != NoCand;
3195 
3196  // Avoid exceeding the target's limit.
3197  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3198  Cand.RPDelta.Excess,
3199  TryCand, Cand, RegExcess, TRI,
3200  DAG->MF))
3201  return TryCand.Reason != NoCand;
3202 
3203  // Avoid increasing the max critical pressure in the scheduled region.
3204  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3205  Cand.RPDelta.CriticalMax,
3206  TryCand, Cand, RegCritical, TRI,
3207  DAG->MF))
3208  return TryCand.Reason != NoCand;
3209 
3210  // We only compare a subset of features when comparing nodes between
3211  // Top and Bottom boundary. Some properties are simply incomparable, in many
3212  // other instances we should only override the other boundary if something
3213  // is a clear good pick on one boundary. Skip heuristics that are more
3214  // "tie-breaking" in nature.
3215  bool SameBoundary = Zone != nullptr;
3216  if (SameBoundary) {
3217  // For loops that are acyclic path limited, aggressively schedule for
3218  // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3219  // heuristics to take precedence.
3220  if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3221  tryLatency(TryCand, Cand, *Zone))
3222  return TryCand.Reason != NoCand;
3223 
3224  // Prioritize instructions that read unbuffered resources by stall cycles.
3225  if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3226  Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3227  return TryCand.Reason != NoCand;
3228  }
3229 
3230  // Keep clustered nodes together to encourage downstream peephole
3231  // optimizations which may reduce resource requirements.
3232  //
3233  // This is a best effort to set things up for a post-RA pass. Optimizations
3234  // like generating loads of multiple registers should ideally be done within
3235  // the scheduler pass by combining the loads during DAG postprocessing.
3236  const SUnit *CandNextClusterSU =
3237  Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3238  const SUnit *TryCandNextClusterSU =
3239  TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3240  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3241  Cand.SU == CandNextClusterSU,
3242  TryCand, Cand, Cluster))
3243  return TryCand.Reason != NoCand;
3244 
3245  if (SameBoundary) {
3246  // Weak edges are for clustering and other constraints.
3247  if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3248  getWeakLeft(Cand.SU, Cand.AtTop),
3249  TryCand, Cand, Weak))
3250  return TryCand.Reason != NoCand;
3251  }
3252 
3253  // Avoid increasing the max pressure of the entire region.
3254  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3255  Cand.RPDelta.CurrentMax,
3256  TryCand, Cand, RegMax, TRI,
3257  DAG->MF))
3258  return TryCand.Reason != NoCand;
3259 
3260  if (SameBoundary) {
3261  // Avoid critical resource consumption and balance the schedule.
3262  TryCand.initResourceDelta(DAG, SchedModel);
3264  TryCand, Cand, ResourceReduce))
3265  return TryCand.Reason != NoCand;
3268  TryCand, Cand, ResourceDemand))
3269  return TryCand.Reason != NoCand;
3270 
3271  // Avoid serializing long latency dependence chains.
3272  // For acyclic path limited loops, latency was already checked above.
3273  if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3274  !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3275  return TryCand.Reason != NoCand;
3276 
3277  // Fall through to original instruction order.
3278  if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3279  || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3280  TryCand.Reason = NodeOrder;
3281  return true;
3282  }
3283  }
3284 
3285  return false;
3286 }
3287 
3288 /// Pick the best candidate from the queue.
3289 ///
3290 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3291 /// DAG building. To adjust for the current scheduling location we need to
3292 /// maintain the number of vreg uses remaining to be top-scheduled.
3294  const CandPolicy &ZonePolicy,
3295  const RegPressureTracker &RPTracker,
3296  SchedCandidate &Cand) {
3297  // getMaxPressureDelta temporarily modifies the tracker.
3298  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3299 
3300  ReadyQueue &Q = Zone.Available;
3301  for (SUnit *SU : Q) {
3302 
3303  SchedCandidate TryCand(ZonePolicy);
3304  initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3305  // Pass SchedBoundary only when comparing nodes from the same boundary.
3306  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3307  if (tryCandidate(Cand, TryCand, ZoneArg)) {
3308  // Initialize resource delta if needed in case future heuristics query it.
3309  if (TryCand.ResDelta == SchedResourceDelta())
3310  TryCand.initResourceDelta(DAG, SchedModel);
3311  Cand.setBest(TryCand);
3312  LLVM_DEBUG(traceCandidate(Cand));
3313  }
3314  }
3315 }
3316 
3317 /// Pick the best candidate node from either the top or bottom queue.
3319  // Schedule as far as possible in the direction of no choice. This is most
3320  // efficient, but also provides the best heuristics for CriticalPSets.
3321  if (SUnit *SU = Bot.pickOnlyChoice()) {
3322  IsTopNode = false;
3323  tracePick(Only1, false);
3324  return SU;
3325  }
3326  if (SUnit *SU = Top.pickOnlyChoice()) {
3327  IsTopNode = true;
3328  tracePick(Only1, true);
3329  return SU;
3330  }
3331  // Set the bottom-up policy based on the state of the current bottom zone and
3332  // the instructions outside the zone, including the top zone.
3333  CandPolicy BotPolicy;
3334  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3335  // Set the top-down policy based on the state of the current top zone and
3336  // the instructions outside the zone, including the bottom zone.
3337  CandPolicy TopPolicy;
3338  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3339 
3340  // See if BotCand is still valid (because we previously scheduled from Top).
3341  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3342  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3343  BotCand.Policy != BotPolicy) {
3344  BotCand.reset(CandPolicy());
3345  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3346  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3347  } else {
3348  LLVM_DEBUG(traceCandidate(BotCand));
3349 #ifndef NDEBUG
3350  if (VerifyScheduling) {
3351  SchedCandidate TCand;
3352  TCand.reset(CandPolicy());
3353  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3354  assert(TCand.SU == BotCand.SU &&
3355  "Last pick result should correspond to re-picking right now");
3356  }
3357 #endif
3358  }
3359 
3360  // Check if the top Q has a better candidate.
3361  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3362  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3363  TopCand.Policy != TopPolicy) {
3364  TopCand.reset(CandPolicy());
3365  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3366  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3367  } else {
3368  LLVM_DEBUG(traceCandidate(TopCand));
3369 #ifndef NDEBUG
3370  if (VerifyScheduling) {
3371  SchedCandidate TCand;
3372  TCand.reset(CandPolicy());
3373  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3374  assert(TCand.SU == TopCand.SU &&
3375  "Last pick result should correspond to re-picking right now");
3376  }
3377 #endif
3378  }
3379 
3380  // Pick best from BotCand and TopCand.
3381  assert(BotCand.isValid());
3382  assert(TopCand.isValid());
3383  SchedCandidate Cand = BotCand;
3384  TopCand.Reason = NoCand;
3385  if (tryCandidate(Cand, TopCand, nullptr)) {
3386  Cand.setBest(TopCand);
3387  LLVM_DEBUG(traceCandidate(Cand));
3388  }
3389 
3390  IsTopNode = Cand.AtTop;
3391  tracePick(Cand);
3392  return Cand.SU;
3393 }
3394 
3395 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3397  if (DAG->top() == DAG->bottom()) {
3398  assert(Top.Available.empty() && Top.Pending.empty() &&
3399  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3400  return nullptr;
3401  }
3402  SUnit *SU;
3403  do {
3404  if (RegionPolicy.OnlyTopDown) {
3405  SU = Top.pickOnlyChoice();
3406  if (!SU) {
3407  CandPolicy NoPolicy;
3408  TopCand.reset(NoPolicy);
3409  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3410  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3411  tracePick(TopCand);
3412  SU = TopCand.SU;
3413  }
3414  IsTopNode = true;
3415  } else if (RegionPolicy.OnlyBottomUp) {
3416  SU = Bot.pickOnlyChoice();
3417  if (!SU) {
3418  CandPolicy NoPolicy;
3419  BotCand.reset(NoPolicy);
3420  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3421  assert(BotCand.Reason != NoCand && "failed to find a candidate");
3422  tracePick(BotCand);
3423  SU = BotCand.SU;
3424  }
3425  IsTopNode = false;
3426  } else {
3427  SU = pickNodeBidirectional(IsTopNode);
3428  }
3429  } while (SU->isScheduled);
3430 
3431  if (SU->isTopReady())
3432  Top.removeReady(SU);
3433  if (SU->isBottomReady())
3434  Bot.removeReady(SU);
3435 
3436  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3437  << *SU->getInstr());
3438  return SU;
3439 }
3440 
3442  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3443  if (!isTop)
3444  ++InsertPos;
3445  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3446 
3447  // Find already scheduled copies with a single physreg dependence and move
3448  // them just above the scheduled instruction.
3449  for (SDep &Dep : Deps) {
3450  if (Dep.getKind() != SDep::Data ||
3451  !Register::isPhysicalRegister(Dep.getReg()))
3452  continue;
3453  SUnit *DepSU = Dep.getSUnit();
3454  if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3455  continue;
3456  MachineInstr *Copy = DepSU->getInstr();
3457  if (!Copy->isCopy() && !Copy->isMoveImmediate())
3458  continue;
3459  LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3460  DAG->dumpNode(*Dep.getSUnit()));
3461  DAG->moveInstruction(Copy, InsertPos);
3462  }
3463 }
3464 
3465 /// Update the scheduler's state after scheduling a node. This is the same node
3466 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3467 /// update it's state based on the current cycle before MachineSchedStrategy
3468 /// does.
3469 ///
3470 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3471 /// them here. See comments in biasPhysReg.
3472 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3473  if (IsTopNode) {
3474  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3475  Top.bumpNode(SU);
3476  if (SU->hasPhysRegUses)
3477  reschedulePhysReg(SU, true);
3478  } else {
3479  SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3480  Bot.bumpNode(SU);
3481  if (SU->hasPhysRegDefs)
3482  reschedulePhysReg(SU, false);
3483  }
3484 }
3485 
3486 /// Create the standard converging machine scheduler. This will be used as the
3487 /// default scheduler if the target does not set a default.
3489  ScheduleDAGMILive *DAG =
3490  new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
3491  // Register DAG post-processors.
3492  //
3493  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3494  // data and pass it to later mutations. Have a single mutation that gathers
3495  // the interesting nodes in one pass.
3497  return DAG;
3498 }
3499 
3501  return createGenericSchedLive(C);
3502 }
3503 
3504 static MachineSchedRegistry
3505 GenericSchedRegistry("converge", "Standard converging scheduler.",
3507 
3508 //===----------------------------------------------------------------------===//
3509 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3510 //===----------------------------------------------------------------------===//
3511 
3513  DAG = Dag;
3514  SchedModel = DAG->getSchedModel();
3515  TRI = DAG->TRI;
3516 
3517  Rem.init(DAG, SchedModel);
3518  Top.init(DAG, SchedModel, &Rem);
3519  BotRoots.clear();
3520 
3521  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3522  // or are disabled, then these HazardRecs will be disabled.
3524  if (!Top.HazardRec) {
3525  Top.HazardRec =
3527  Itin, DAG);
3528  }
3529 }
3530 
3532  Rem.CriticalPath = DAG->ExitSU.getDepth();
3533 
3534  // Some roots may not feed into ExitSU. Check all of them in case.
3535  for (const SUnit *SU : BotRoots) {
3536  if (SU->getDepth() > Rem.CriticalPath)
3537  Rem.CriticalPath = SU->getDepth();
3538  }
3539  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3540  if (DumpCriticalPathLength) {
3541  errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3542  }
3543 }
3544 
3545 /// Apply a set of heuristics to a new candidate for PostRA scheduling.
3546 ///
3547 /// \param Cand provides the policy and current best candidate.
3548 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3549 /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3551  SchedCandidate &TryCand) {
3552  // Initialize the candidate if needed.
3553  if (!Cand.isValid()) {
3554  TryCand.Reason = NodeOrder;
3555  return true;
3556  }
3557 
3558  // Prioritize instructions that read unbuffered resources by stall cycles.
3559  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3560  Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3561  return TryCand.Reason != NoCand;
3562 
3563  // Keep clustered nodes together.
3564  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3565  Cand.SU == DAG->getNextClusterSucc(),
3566  TryCand, Cand, Cluster))
3567  return TryCand.Reason != NoCand;
3568 
3569  // Avoid critical resource consumption and balance the schedule.
3571  TryCand, Cand, ResourceReduce))
3572  return TryCand.Reason != NoCand;
3575  TryCand, Cand, ResourceDemand))
3576  return TryCand.Reason != NoCand;
3577 
3578  // Avoid serializing long latency dependence chains.
3579  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3580  return TryCand.Reason != NoCand;
3581  }
3582 
3583  // Fall through to original instruction order.
3584  if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
3585  TryCand.Reason = NodeOrder;
3586  return true;
3587  }
3588 
3589  return false;
3590 }
3591 
3593  ReadyQueue &Q = Top.Available;
3594  for (SUnit *SU : Q) {
3595  SchedCandidate TryCand(Cand.Policy);
3596  TryCand.SU = SU;
3597  TryCand.AtTop = true;
3598  TryCand.initResourceDelta(DAG, SchedModel);
3599  if (tryCandidate(Cand, TryCand)) {
3600  Cand.setBest(TryCand);
3601  LLVM_DEBUG(traceCandidate(Cand));
3602  }
3603  }
3604 }
3605 
3606 /// Pick the next node to schedule.
3608  if (DAG->top() == DAG->bottom()) {
3609  assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3610  return nullptr;
3611  }
3612  SUnit *SU;
3613  do {
3614  SU = Top.pickOnlyChoice();
3615  if (SU) {
3616  tracePick(Only1, true);
3617  } else {
3618  CandPolicy NoPolicy;
3619  SchedCandidate TopCand(NoPolicy);
3620  // Set the top-down policy based on the state of the current top zone and
3621  // the instructions outside the zone, including the bottom zone.
3622  setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3623  pickNodeFromQueue(TopCand);
3624  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3625  tracePick(TopCand);
3626  SU = TopCand.SU;
3627  }
3628  } while (SU->isScheduled);
3629 
3630  IsTopNode = true;
3631  Top.removeReady(SU);
3632 
3633  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3634  << *SU->getInstr());
3635  return SU;
3636 }
3637 
3638 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3639 /// scheduled/remaining flags in the DAG nodes.
3640 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3641  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3642  Top.bumpNode(SU);
3643 }
3644 
3646  return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
3647  /*RemoveKillFlags=*/true);
3648 }
3649 
3650 //===----------------------------------------------------------------------===//
3651 // ILP Scheduler. Currently for experimental analysis of heuristics.
3652 //===----------------------------------------------------------------------===//
3653 
3654 namespace {
3655 
3656 /// Order nodes by the ILP metric.
3657 struct ILPOrder {
3658  const SchedDFSResult *DFSResult = nullptr;
3659  const BitVector *ScheduledTrees = nullptr;
3660  bool MaximizeILP;
3661 
3662  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3663 
3664  /// Apply a less-than relation on node priority.
3665  ///
3666  /// (Return true if A comes after B in the Q.)
3667  bool operator()(const SUnit *A, const SUnit *B) const {
3668  unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3669  unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3670  if (SchedTreeA != SchedTreeB) {
3671  // Unscheduled trees have lower priority.
3672  if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3673  return ScheduledTrees->test(SchedTreeB);
3674 
3675  // Trees with shallower connections have have lower priority.
3676  if (DFSResult->getSubtreeLevel(SchedTreeA)
3677  != DFSResult->getSubtreeLevel(SchedTreeB)) {
3678  return DFSResult->getSubtreeLevel(SchedTreeA)
3679  < DFSResult->getSubtreeLevel(SchedTreeB);
3680  }
3681  }
3682  if (MaximizeILP)
3683  return DFSResult->getILP(A) < DFSResult->getILP(B);
3684  else
3685  return DFSResult->getILP(A) > DFSResult->getILP(B);
3686  }
3687 };
3688 
3689 /// Schedule based on the ILP metric.
3690 class ILPScheduler : public MachineSchedStrategy {
3691  ScheduleDAGMILive *DAG = nullptr;
3692  ILPOrder Cmp;
3693 
3694  std::vector<SUnit*> ReadyQ;
3695 
3696 public:
3697  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3698 
3699  void initialize(ScheduleDAGMI *dag) override {
3700  assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3701  DAG = static_cast<ScheduleDAGMILive*>(dag);
3702  DAG->computeDFSResult();
3703  Cmp.DFSResult = DAG->getDFSResult();
3704  Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3705  ReadyQ.clear();
3706  }
3707 
3708  void registerRoots() override {
3709  // Restore the heap in ReadyQ with the updated DFS results.
3710  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3711  }
3712 
3713  /// Implement MachineSchedStrategy interface.
3714  /// -----------------------------------------
3715 
3716  /// Callback to select the highest priority node from the ready Q.
3717  SUnit *pickNode(bool &IsTopNode) override {
3718  if (ReadyQ.empty()) return nullptr;
3719  std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3720  SUnit *SU = ReadyQ.back();
3721  ReadyQ.pop_back();
3722  IsTopNode = false;
3723  LLVM_DEBUG(dbgs() << "Pick node "
3724  << "SU(" << SU->NodeNum << ") "
3725  << " ILP: " << DAG->getDFSResult()->getILP(SU)
3726  << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3727  << " @"
3728  << DAG->getDFSResult()->getSubtreeLevel(
3729  DAG->getDFSResult()->getSubtreeID(SU))
3730  << '\n'
3731  << "Scheduling " << *SU->getInstr());
3732  return SU;
3733  }
3734 
3735  /// Scheduler callback to notify that a new subtree is scheduled.
3736  void scheduleTree(unsigned SubtreeID) override {
3737  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3738  }
3739 
3740  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3741  /// DFSResults, and resort the priority Q.
3742  void schedNode(SUnit *SU, bool IsTopNode) override {
3743  assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3744  }
3745 
3746  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3747 
3748  void releaseBottomNode(SUnit *SU) override {
3749  ReadyQ.push_back(SU);
3750  std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3751  }
3752 };
3753 
3754 } // end anonymous namespace
3755 
3757  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
3758 }
3760  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
3761 }
3762 
3764  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3766  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3767 
3768 //===----------------------------------------------------------------------===//
3769 // Machine Instruction Shuffler for Correctness Testing
3770 //===----------------------------------------------------------------------===//
3771 
3772 #ifndef NDEBUG
3773 namespace {
3774 
3775 /// Apply a less-than relation on the node order, which corresponds to the
3776 /// instruction order prior to scheduling. IsReverse implements greater-than.
3777 template<bool IsReverse>
3778 struct SUnitOrder {
3779  bool operator()(SUnit *A, SUnit *B) const {
3780  if (IsReverse)
3781  return A->NodeNum > B->NodeNum;
3782  else
3783  return A->NodeNum < B->NodeNum;
3784  }
3785 };
3786 
3787 /// Reorder instructions as much as possible.
3788 class InstructionShuffler : public MachineSchedStrategy {
3789  bool IsAlternating;
3790  bool IsTopDown;
3791 
3792  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3793  // gives nodes with a higher number higher priority causing the latest
3794  // instructions to be scheduled first.
3795  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3796  TopQ;
3797 
3798  // When scheduling bottom-up, use greater-than as the queue priority.
3800  BottomQ;
3801 
3802 public:
3803  InstructionShuffler(bool alternate, bool topdown)
3804  : IsAlternating(alternate), IsTopDown(topdown) {}
3805 
3806  void initialize(ScheduleDAGMI*) override {
3807  TopQ.clear();
3808  BottomQ.clear();
3809  }
3810 
3811  /// Implement MachineSchedStrategy interface.
3812  /// -----------------------------------------
3813 
3814  SUnit *pickNode(bool &IsTopNode) override {
3815  SUnit *SU;
3816  if (IsTopDown) {
3817  do {
3818  if (TopQ.empty()) return nullptr;
3819  SU = TopQ.top();
3820  TopQ.pop();
3821  } while (SU->isScheduled);
3822  IsTopNode = true;
3823  } else {
3824  do {
3825  if (BottomQ.empty()) return nullptr;
3826  SU = BottomQ.top();
3827  BottomQ.pop();
3828  } while (SU->isScheduled);
3829  IsTopNode = false;
3830  }
3831  if (IsAlternating)
3832  IsTopDown = !IsTopDown;
3833  return SU;
3834  }
3835 
3836  void schedNode(SUnit *SU, bool IsTopNode) override {}
3837 
3838  void releaseTopNode(SUnit *SU) override {
3839  TopQ.push(SU);
3840  }
3841  void releaseBottomNode(SUnit *SU) override {
3842  BottomQ.push(SU);
3843  }
3844 };
3845 
3846 } // end anonymous namespace
3847 
3849  bool Alternate = !ForceTopDown && !ForceBottomUp;
3850  bool TopDown = !ForceBottomUp;
3851  assert((TopDown || !ForceTopDown) &&
3852  "-misched-topdown incompatible with -misched-bottomup");
3853  return new ScheduleDAGMILive(
3854  C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
3855 }
3856 
3858  "shuffle", "Shuffle machine instructions alternating directions",
3860 #endif // !NDEBUG
3861 
3862 //===----------------------------------------------------------------------===//
3863 // GraphWriter support for ScheduleDAGMILive.
3864 //===----------------------------------------------------------------------===//
3865 
3866 #ifndef NDEBUG
3867 namespace llvm {
3868 
3869 template<> struct GraphTraits<
3871 
3872 template<>
3875 
3876  static std::string getGraphName(const ScheduleDAG *G) {
3877  return std::string(G->MF.getName());
3878  }
3879 
3880  static bool renderGraphFromBottomUp() {
3881  return true;
3882  }
3883 
3884  static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
3885  if (ViewMISchedCutoff == 0)
3886  return false;
3887  return (Node->Preds.size() > ViewMISchedCutoff
3888  || Node->Succs.size() > ViewMISchedCutoff);
3889  }
3890 
3891  /// If you want to override the dot attributes printed for a particular
3892  /// edge, override this method.
3893  static std::string getEdgeAttributes(const SUnit *Node,
3894  SUnitIterator EI,
3895  const ScheduleDAG *Graph) {
3896  if (EI.isArtificialDep())
3897  return "color=cyan,style=dashed";
3898  if (EI.isCtrlDep())
3899  return "color=blue,style=dashed";
3900  return "";
3901  }
3902 
3903  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3904  std::string Str;
3905  raw_string_ostream SS(Str);
3906  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3907  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3908  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3909  SS << "SU:" << SU->NodeNum;
3910  if (DFS)
3911  SS << " I:" << DFS->getNumInstrs(SU);
3912  return SS.str();
3913  }
3914 
3915  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3916  return G->getGraphNodeLabel(SU);
3917  }
3918 
3919  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3920  std::string Str("shape=Mrecord");
3921  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3922  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3923  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3924  if (DFS) {
3925  Str += ",style=filled,fillcolor=\"#";
3926  Str += DOT::getColorString(DFS->getSubtreeID(N));
3927  Str += '"';
3928  }
3929  return Str;
3930  }
3931 };
3932 
3933 } // end namespace llvm
3934 #endif // NDEBUG
3935 
3936 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3937 /// rendered using 'dot'.
3938 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3939 #ifndef NDEBUG
3940  ViewGraph(this, Name, false, Title);
3941 #else
3942  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3943  << "systems with Graphviz or gv!\n";
3944 #endif // NDEBUG
3945 }
3946 
3947 /// Out-of-line implementation with no arguments is handy for gdb.
3949  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3950 }
llvm::ScheduleDAGMILive::updateScheduledPressure
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
Definition: MachineScheduler.cpp:1086
llvm::ScheduleDAGInstrs::FirstDbgValue
MachineInstr * FirstDbgValue
Definition: ScheduleDAGInstrs.h:250
llvm::LiveRange::find
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
Definition: LiveInterval.cpp:350
ForceFastCluster
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))
i
i
Definition: README.txt:29
llvm::ScheduleDAGMI::updateQueues
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
Definition: MachineScheduler.cpp:896
llvm::PressureChange::getPSetOrMax
unsigned getPSetOrMax() const
Definition: RegisterPressure.h:120
llvm::SparseMultiSet::setUniverse
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
Definition: SparseMultiSet.h:202
llvm::PriorityQueue::clear
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:76
llvm::GenericSchedulerBase::SchedResourceDelta::DemandedResources
unsigned DemandedResources
Definition: MachineScheduler.h:848
llvm::TargetSchedModel::mustEndGroup
bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if current group must end.
Definition: TargetSchedule.cpp:80
llvm::GenericSchedulerBase::Weak
@ Weak
Definition: MachineScheduler.h:816
llvm::ScheduleDAG::MRI
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:560
llvm::GenericSchedulerBase::CandReason
CandReason
Represent the type of SchedCandidate found within a single queue.
Definition: MachineScheduler.h:815
DefaultSchedRegistry
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
isSchedBoundary
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
Definition: MachineScheduler.cpp:461
llvm::GenericSchedulerBase::TRI
const TargetRegisterInfo * TRI
Definition: MachineScheduler.h:912
llvm::SchedRemainder::reset
void reset()
Definition: MachineScheduler.h:597
llvm::MachineSchedContext::~MachineSchedContext
virtual ~MachineSchedContext()
Definition: MachineScheduler.cpp:160
llvm::TargetSchedModel::getWriteProcResBegin
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
Definition: TargetSchedule.h:133
llvm::HexagonInstrInfo::isSchedulingBoundary
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
Definition: HexagonInstrInfo.cpp:1784
llvm::SchedRemainder::RemIssueCount
unsigned RemIssueCount
Definition: MachineScheduler.h:588
MISchedCutoff
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
llvm::MCProcResourceDesc::BufferSize
int BufferSize
Definition: MCSchedule.h:48
llvm::ScheduleDAGMILive::~ScheduleDAGMILive
~ScheduleDAGMILive() override
Definition: MachineScheduler.cpp:943
llvm::RegisterOperands::detectDeadDefs
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Definition: RegisterPressure.cpp:581
ScheduleDAG.h
llvm::GenericSchedulerBase::SchedCandidate::setBest
void setBest(SchedCandidate &Best)
Definition: MachineScheduler.h:896
llvm::ScheduleDAGMI::viewGraph
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
Definition: MachineScheduler.cpp:3948
llvm::SchedDFSResult::scheduleTree
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
Definition: ScheduleDAGInstrs.cpp:1492
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
MachineInstr.h
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
llvm::ReadyQueue::dump
void dump() const
Definition: MachineScheduler.cpp:617
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::GenericSchedulerBase::NodeOrder
@ NodeOrder
Definition: MachineScheduler.h:818
llvm::SchedBoundary::HazardRec
ScheduleHazardRecognizer * HazardRec
Definition: MachineScheduler.h:627
llvm::ScheduleDAGMILive::scheduleMI
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
Definition: MachineScheduler.cpp:1405
llvm::RegPressureTracker::addLiveRegs
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
Definition: RegisterPressure.cpp:707
llvm::SUnit::NumPredsLeft
unsigned NumPredsLeft
Definition: ScheduleDAG.h:268
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
TargetFrameLowering.h
llvm::RegPressureTracker::init
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
Definition: RegisterPressure.cpp:263
llvm::TargetFrameLowering
Information about stack frame layout on the target.
Definition: TargetFrameLowering.h:43
llvm::ReadyQueue::isInQueue
bool isInQueue(SUnit *SU) const
Definition: MachineScheduler.h:547
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:189
llvm::SDep::Artificial
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
llvm::ReadyQueue::find
iterator find(SUnit *SU)
Definition: MachineScheduler.h:563
llvm::printVRegOrUnit
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
Definition: TargetRegisterInfo.cpp:165
llvm::DOTGraphTraits< ScheduleDAGMI * >::getGraphName
static std::string getGraphName(const ScheduleDAG *G)
Definition: MachineScheduler.cpp:3876
llvm::GenericSchedulerBase::Cluster
@ Cluster
Definition: MachineScheduler.h:816
llvm::Latency
@ Latency
Definition: SIMachineScheduler.h:34
createILPMinScheduler
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
Definition: MachineScheduler.cpp:3759
ILPMinRegistry
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
llvm::MachineSchedContext::RegClassInfo
RegisterClassInfo * RegClassInfo
Definition: MachineScheduler.h:135
MachinePassRegistry.h
llvm::SchedBoundary::getCriticalCount
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
Definition: MachineScheduler.h:735
llvm::MachineSchedStrategy::initialize
virtual void initialize(ScheduleDAGMI *DAG)=0
Initialize the strategy after building the DAG for a new region.
llvm::SchedBoundary::DAG
ScheduleDAGMI * DAG
Definition: MachineScheduler.h:620
llvm::ScheduleDAGInstrs::buildSchedGraph
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
Definition: ScheduleDAGInstrs.cpp:720
llvm::ScheduleDAGInstrs::begin
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
Definition: ScheduleDAGInstrs.h:278
llvm::SparseMultiSet::find
iterator find(const KeyT &Key)
Find an element by its key.
Definition: SparseMultiSet.h:375
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:344
llvm::RegisterOperands
List of registers defined and used by a machine instruction.
Definition: RegisterPressure.h:166
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:93
llvm::BitVector::clear
void clear()
clear - Removes all bits from the bitvector.
Definition: BitVector.h:328
llvm::GenericSchedulerBase::Stall
@ Stall
Definition: MachineScheduler.h:816
llvm::TargetSchedModel::getResourceFactor
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
Definition: TargetSchedule.h:143
ScheduleDAGInstrs.h
llvm::PressureDiff
List of PressureChanges in order of increasing, unique PSetID.
Definition: RegisterPressure.h:140
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:628
llvm::GenericSchedulerBase::SchedCandidate::reset
void reset(const CandPolicy &NewPolicy)
Definition: MachineScheduler.h:884
llvm::MachineInstr::mayLoad
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:1056
llvm::SchedRemainder::init
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Definition: MachineScheduler.cpp:2019
llvm::SchedBoundary::getNextResourceCycle
std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles)
Compute the next cycle at which the given processor resource can be scheduled.
Definition: MachineScheduler.cpp:2101
llvm::MachineSchedRegistry
MachineSchedRegistry provides a selection of available machine instruction schedulers.
Definition: MachineScheduler.h:143
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1181
Statistic.h
llvm::SchedBoundary::Pending
ReadyQueue Pending
Definition: MachineScheduler.h:625
llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:117
llvm::GenericSchedulerBase::SchedModel
const TargetSchedModel * SchedModel
Definition: MachineScheduler.h:911
llvm::ForceTopDown
cl::opt< bool > ForceTopDown
llvm::ReadyQueue::begin
iterator begin()
Definition: MachineScheduler.h:557
llvm::LiveIntervals::getInstructionFromIndex
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
Definition: LiveIntervals.h:225
llvm::SparseMultiSet::clear
void clear()
Clears the set.
Definition: SparseMultiSet.h:342
ErrorHandling.h
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::tryGreater
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Definition: MachineScheduler.cpp:2838
llvm::ScheduleDAGMI::hasVRegLiveness
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
Definition: MachineScheduler.h:318
llvm::GenericSchedulerBase::SchedCandidate::RPDelta
RegPressureDelta RPDelta
Definition: MachineScheduler.h:876
llvm::SUnit::NumSuccsLeft
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:269
llvm::SDep::Anti
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
ReadyListLimit
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::PostGenericScheduler::initialize
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
Definition: MachineScheduler.cpp:3512
InvalidCycle
static const unsigned InvalidCycle
Definition: MachineScheduler.cpp:1967
llvm::GenericSchedulerBase::RegCritical
@ RegCritical
Definition: MachineScheduler.h:816
llvm::SUnit::isCall
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
llvm::MCProcResourceDesc::NumUnits
unsigned NumUnits
Definition: MCSchedule.h:33
RegisterClassInfo.h
llvm::GenericScheduler::tryCandidate
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
Definition: MachineScheduler.cpp:3182
MachineBasicBlock.h
llvm::ScheduleDAGMILive::SUPressureDiffs
PressureDiffs SUPressureDiffs
Definition: MachineScheduler.h:409
priorNonDebug
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
Definition: MachineScheduler.cpp:298
llvm::DOTGraphTraits< ScheduleDAGMI * >::getNodeAttributes
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
Definition: MachineScheduler.cpp:3919
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::ScheduleDAGMILive::initRegPressure
void initRegPressure()
Definition: MachineScheduler.cpp:1011
llvm::ScheduleDAGMI::releasePredecessors
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
Definition: MachineScheduler.cpp:703
llvm::VNInfo::def
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
llvm::MemOp
Definition: TargetLowering.h:110
llvm::ScheduleDAGInstrs::addEdge
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
Definition: ScheduleDAGInstrs.cpp:1199
llvm::ScheduleDAGMI::postprocessDAG
void postprocessDAG()
Apply each ScheduleDAGMutation step in order.
Definition: MachineScheduler.cpp:841
getSchedRegions
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
Definition: MachineScheduler.cpp:489
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1835
llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition: RegisterPressure.h:357
tracePick
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
Definition: MachineScheduler.cpp:2888
DenseMap.h
llvm::GenericSchedulerBase::setPolicy
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
Definition: MachineScheduler.cpp:2686
llvm::GenericScheduler::registerRoots
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Definition: MachineScheduler.cpp:3027
llvm::BitVector::resize
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:334
llvm::PressureDiffs::clear
void clear()
Definition: RegisterPressure.h:206
TargetInstrInfo.h
llvm::GenericScheduler::initialize
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
Definition: MachineScheduler.cpp:2897
Groups
static const X86InstrFMA3Group Groups[]
Definition: X86InstrFMA3Info.cpp:73
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::RegisterMaskPair
Definition: RegisterPressure.h:38
llvm::ScheduleDAGMI::moveInstruction
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
Definition: MachineScheduler.cpp:734
llvm::ScheduleDAGInstrs::startBlock
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
Definition: ScheduleDAGInstrs.cpp:174
llvm::initializePostMachineSchedulerPass
void initializePostMachineSchedulerPass(PassRegistry &)
llvm::MachineSchedRegistry::Registry
static MachinePassRegistry< ScheduleDAGCtor > Registry
Definition: MachineScheduler.h:152
llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
llvm::GenericSchedulerBase::SchedCandidate::Reason
CandReason Reason
Definition: MachineScheduler.h:870
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1336
llvm::SchedBoundary::countResource
unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles, unsigned ReadyCycle)
Add the given processor resource to this scheduled zone.
Definition: MachineScheduler.cpp:2343
llvm::MachineRegisterInfo::getNumVirtRegs
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
Definition: MachineRegisterInfo.h:770
llvm::ScheduleDAGMILive::getPressureDiff
PressureDiff & getPressureDiff(const SUnit *SU)
Definition: MachineScheduler.h:460
EnableMachineSched
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
llvm::SUnit::getDepth
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:891
llvm::ScheduleDAGMILive::computeDFSResult
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
Definition: MachineScheduler.cpp:1298
llvm::MCWriteProcResEntry
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63
llvm::ViewGraph
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
llvm::ScheduleDAGMI::CurrentBottom
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
Definition: MachineScheduler.h:286
llvm::GenericSchedulerBase::ResourceDemand
@ ResourceDemand
Definition: MachineScheduler.h:817
llvm::ScheduleDAGMI::checkSchedLimit
bool checkSchedLimit()
Definition: MachineScheduler.cpp:752
STLExtras.h
llvm::RegPressureTracker::closeRegion
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
Definition: RegisterPressure.cpp:343
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::GenericScheduler::initCandidate
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
Definition: MachineScheduler.cpp:3134
llvm::ScheduleDAGMILive::TopRPTracker
RegPressureTracker TopRPTracker
Definition: MachineScheduler.h:424
llvm::MachineSchedStrategy::releaseTopNode
virtual void releaseTopNode(SUnit *SU)=0
When all predecessor dependencies have been resolved, free this node for top-down scheduling.
llvm::ScheduleDAGInstrs::SchedModel
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
Definition: ScheduleDAGInstrs.h:126
nextIfDebug
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
Definition: MachineScheduler.cpp:319
llvm::LiveQueryResult
Result of a LiveRange query.
Definition: LiveInterval.h:90
llvm::ScheduleDAGMI::releaseSucc
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
Definition: MachineScheduler.cpp:638
llvm::TargetSchedModel::getInstrItineraries
const InstrItineraryData * getInstrItineraries() const
Definition: TargetSchedule.h:82
llvm::MachineSchedStrategy::schedNode
virtual void schedNode(SUnit *SU, bool IsTopNode)=0
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
llvm::DOTGraphTraits< ScheduleDAGMI * >::getEdgeAttributes
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method.
Definition: MachineScheduler.cpp:3893
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::SUnit::isTopReady
bool isTopReady() const
Definition: ScheduleDAG.h:446
llvm::SchedDFSResult::getSubtreeID
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:169
llvm::ScheduleDAGMI::enterRegion
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
Definition: MachineScheduler.cpp:722
llvm::ScheduleDAGInstrs::end
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
Definition: ScheduleDAGInstrs.h:281
llvm::SchedBoundary::Rem
SchedRemainder * Rem
Definition: MachineScheduler.h:622
llvm::ScheduleDAGMI::releaseSuccessors
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
Definition: MachineScheduler.cpp:666
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:159
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
RegisterPressure.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: MachineScheduler.cpp:74
llvm::LiveIntervals::getInstructionIndex
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
Definition: LiveIntervals.h:220
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:265
llvm::ScheduleDAGMILive::initQueues
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
Definition: MachineScheduler.cpp:1395
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:89
llvm::SchedDFSResult::clear
void clear()
Clear the results.
Definition: ScheduleDFS.h:128
llvm::TargetSchedModel::getProcResource
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
Definition: TargetSchedule.h:117
MachineRegisterInfo.h
llvm::ReadyQueue::remove
iterator remove(iterator I)
Definition: MachineScheduler.h:570
llvm::ScheduleDAGMI::Mutations
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Definition: MachineScheduler.h:280
llvm::LiveIntervals::handleMove
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
Definition: LiveIntervals.cpp:1508
llvm::LiveRange::beginIndex
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
llvm::ScheduleDAGMILive::RegionCriticalPSets
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
Definition: MachineScheduler.h:420
AliasAnalysis.h
llvm::SDep::isArtificial
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
MachineValueType.h
llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:33
llvm::GenericScheduler::dumpPolicy
void dumpPolicy() const override
Definition: MachineScheduler.cpp:2979
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::SchedRemainder::CriticalPath
unsigned CriticalPath
Definition: MachineScheduler.h:584
llvm::tgtok::Dag
@ Dag
Definition: TGLexer.h:50
llvm::ScheduleDAGMILive::dump
void dump() const override
Definition: MachineScheduler.cpp:1182
CommandLine.h
llvm::SUnitIterator::isCtrlDep
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:656
llvm::RegPressureTracker::setPos
void setPos(MachineBasicBlock::const_iterator Pos)
Definition: RegisterPressure.h:419
TargetLowering.h
llvm::GenericScheduler::reschedulePhysReg
void reschedulePhysReg(SUnit *SU, bool isTop)
Definition: MachineScheduler.cpp:3441
MinSubtreeSize
static const unsigned MinSubtreeSize
Definition: MachineScheduler.cpp:145
llvm::SchedBoundary::isTop
bool isTop() const
Definition: MachineScheduler.h:705
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
bb
< i1 > br i1 label label bb bb
Definition: README.txt:978
llvm::MachineSchedStrategy
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
Definition: MachineScheduler.h:208
llvm::SUnit::BotReadyCycle
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:300
llvm::X86AS::SS
@ SS
Definition: X86.h:201
llvm::SDep::Weak
@ Weak
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:73
ScheduleHazardRecognizer.h
llvm::DOT::getColorString
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:90
checkResourceLimit
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
Definition: MachineScheduler.cpp:1975
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:98
llvm::cl::apply
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1288
llvm::TargetSchedModel::getIssueWidth
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
Definition: TargetSchedule.h:98
MachineLoopInfo.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::PPCISD::SC
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Definition: PPCISelLowering.h:418
llvm::SUnit::isBottomReady
bool isBottomReady() const
Definition: ScheduleDAG.h:449
llvm::SUnit::isUnbuffered
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288
llvm::LiveRange::begin
iterator begin()
Definition: LiveInterval.h:215
llvm::PrintDAGs
cl::opt< bool > PrintDAGs
llvm::SchedBoundary::~SchedBoundary
~SchedBoundary()
Definition: MachineScheduler.cpp:1969
llvm::RegPressureDelta::CurrentMax
PressureChange CurrentMax
Definition: RegisterPressure.h:241
EnableRegPressure
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
llvm::SchedBoundary::getExecutedCount
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
Definition: MachineScheduler.h:744
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::RegPressureTracker::getUpwardPressureDelta
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
Definition: RegisterPressure.cpp:1163
llvm::DstOp::getReg
Register getReg() const
Definition: MachineIRBuilder.h:108
llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::ScheduleDAGMI::NextClusterPred
const SUnit * NextClusterPred
Record the next node in a scheduled cluster.
Definition: MachineScheduler.h:289
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::tryLatency
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
Definition: MachineScheduler.cpp:2854
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3446
llvm::GenericSchedulerBase::Only1
@ Only1
Definition: MachineScheduler.h:816
llvm::SchedRemainder::IsAcyclicLatencyLimited
bool IsAcyclicLatencyLimited
Definition: MachineScheduler.h:590
llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
llvm::MachineBasicBlock::isSuccessor
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
Definition: MachineBasicBlock.cpp:924
llvm::MCSchedClassDesc
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:109
llvm::MachineSchedStrategy::releaseBottomNode
virtual void releaseBottomNode(SUnit *SU)=0
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::LaneBitmask::getNone
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:83
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ScheduleDAGMI::findRootsAndBiasEdges
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
Definition: MachineScheduler.cpp:847
SchedOnlyBlock
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
llvm::createCopyConstrainDAGMutation
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Definition: MachineScheduler.cpp:1793
llvm::SchedDFSResult::getSubtreeLevel
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:180
llvm::GenericSchedulerBase::RegMax
@ RegMax
Definition: MachineScheduler.h:817
llvm::ScheduleDAGInstrs::getSUnit
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
Definition: ScheduleDAGInstrs.h:391
llvm::RegPressureTracker::getLiveThru
ArrayRef< unsigned > getLiveThru() const
Definition: RegisterPressure.h:454
false
Definition: StackSlotColoring.cpp:141
llvm::SchedBoundary::reset
void reset()
Definition: MachineScheduler.cpp:1984
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::ScheduleDAGMILive::updatePressureDiffs
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
Definition: MachineScheduler.cpp:1114
llvm::createGenericSchedPostRA
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
Definition: MachineScheduler.cpp:3645
llvm::ScheduleDAGMILive::getScheduledTrees
BitVector & getScheduledTrees()
Definition: MachineScheduler.h:474
llvm::GenericSchedulerBase::CandPolicy
Policy for scheduling the next instruction in the candidate's zone.
Definition: MachineScheduler.h:825
llvm::Instruction
Definition: Instruction.h:42
ShufflerRegistry
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
llvm::ScheduleDAGMI::addMutation
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
Definition: MachineScheduler.h:325
llvm::SchedBoundary::getCurrCycle
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
Definition: MachineScheduler.h:710
createILPMaxScheduler
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
Definition: MachineScheduler.cpp:3756
llvm::RegisterPressure::LiveOutRegs
SmallVector< RegisterMaskPair, 8 > LiveOutRegs
Definition: RegisterPressure.h:53
llvm::RegPressureTracker::initLiveThru
void initLiveThru(const RegPressureTracker &RPTracker)
Initialize the LiveThru pressure set based on the untied defs found in RPTracker.
Definition: RegisterPressure.cpp:359
llvm::SchedRemainder::CyclicCritPath
unsigned CyclicCritPath
Definition: MachineScheduler.h:585
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
llvm::PrintLaneMask
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:94
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:624
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:403
BitVector.h
llvm::SlotIndexes
SlotIndexes pass.
Definition: SlotIndexes.h:319
llvm::ScheduleDAGMI::startBlock
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
Definition: MachineScheduler.cpp:708
llvm::ArrayRef::rbegin
reverse_iterator rbegin() const
Definition: ArrayRef.h:155
llvm::SDep::Data
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
llvm::RegPressureDelta::Excess
PressureChange Excess
Definition: RegisterPressure.h:239
llvm::ScheduleDAGInstrs::RegionEnd
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
Definition: ScheduleDAGInstrs.h:152
rem
expand large div rem
Definition: ExpandLargeDivRem.cpp:132
llvm::TargetSchedModel::getWriteProcResEnd
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
Definition: TargetSchedule.h:137
llvm::BitVector
Definition: BitVector.h:75
llvm::SchedDFSResult::getNumSubtrees
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:163
llvm::PostGenericScheduler::tryCandidate
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
Definition: MachineScheduler.cpp:3550
llvm::LiveInterval
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:686
llvm::LiveQueryResult::valueIn
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
llvm::ReadyQueue::push
void push(SUnit *SU)
Definition: MachineScheduler.h:565
llvm::SlotIndex
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:82
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_END(MachineScheduler
llvm::VReg2SUnit
An individual mapping from virtual register number to SUnit.
Definition: ScheduleDAGInstrs.h:53
llvm::RegisterPassParser
RegisterPassParser class - Handle the addition of new machine passes.
Definition: MachinePassRegistry.h:135
llvm::SchedBoundary::dumpScheduledState
void dumpScheduledState() const
Definition: MachineScheduler.cpp:2594
llvm::ScheduleDAGMI::initQueues
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
Definition: MachineScheduler.cpp:866
llvm::ArrayRef::rend
reverse_iterator rend() const
Definition: ArrayRef.h:156
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::SchedBoundary::releasePending
void releasePending()
Release pending ready nodes in to the available queue.
Definition: MachineScheduler.cpp:2522
llvm::RegPressureTracker::dump
void dump() const
Definition: RegisterPressure.cpp:117
llvm::GenericSchedulerBase::SchedCandidate::SU
SUnit * SU
Definition: MachineScheduler.h:867
llvm::ScheduleDAGMILive::RPTracker
RegPressureTracker RPTracker
Definition: MachineScheduler.h:415
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::GenericScheduler::initPolicy
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
Definition: MachineScheduler.cpp:2931
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::RegisterOperands::collect
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Definition: RegisterPressure.cpp:570
llvm::RegisterClassInfo::getNumAllocatableRegs
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
Definition: RegisterClassInfo.h:94
llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
llvm::DOTGraphTraits
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
Definition: DOTGraphTraits.h:166
llvm::SUnit::getHeight
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
llvm::SUnit::TopReadyCycle
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:299
llvm::ScheduleDAGMILive::RegClassInfo
RegisterClassInfo * RegClassInfo
Definition: MachineScheduler.h:394
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1538
llvm::initializeMachineSchedulerPass
void initializeMachineSchedulerPass(PassRegistry &)
llvm::SchedBoundary::getZoneCritResIdx
unsigned getZoneCritResIdx() const
Definition: MachineScheduler.h:749
llvm::GenericSchedulerBase::Rem
SchedRemainder Rem
Definition: MachineScheduler.h:914
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::SparseMultiSet::end
iterator end()
Returns an iterator past this container.
Definition: SparseMultiSet.h:319
llvm::ReadyQueue::end
iterator end()
Definition: MachineScheduler.h:559
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
llvm::tryLess
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
Definition: MachineScheduler.cpp:2822
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::RegPressureTracker::getRegSetPressureAtPos
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
Definition: RegisterPressure.h:463
llvm::RegPressureTracker::recedeSkipDebugValues
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
Definition: RegisterPressure.cpp:853
llvm::cl::opt< bool >
llvm::GenericSchedulerBase::SchedCandidate
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
Definition: MachineScheduler.h:863
llvm::GenericSchedulerBase::NoCand
@ NoCand
Definition: MachineScheduler.h:816
llvm::SparseMultiSet::insert
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
Definition: SparseMultiSet.h:419
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:264
llvm::ScheduleDAGMI::LIS
LiveIntervals * LIS
Definition: MachineScheduler.h:276
llvm::SUnit::WeakSuccsLeft
unsigned WeakSuccsLeft
Definition: ScheduleDAG.h:271
LiveDebugValues::DbgValue
Class recording the (high level) value of a variable.
Definition: InstrRefBasedImpl.h:435
llvm::LiveRange::getVNInfoBefore
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx,...
Definition: LiveInterval.h:429
TargetSchedule.h
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:416
llvm::RegPressureTracker::closeBottom
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
Definition: RegisterPressure.cpp:331
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::TargetSubtargetInfo::enablePostRAMachineScheduler
virtual bool enablePostRAMachineScheduler() const
True if the subtarget should run a machine scheduler after register allocation.
Definition: TargetSubtargetInfo.cpp:52
LiveIntervals.h
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:936
llvm::GenericSchedulerBase::CandPolicy::DemandResIdx
unsigned DemandResIdx
Definition: MachineScheduler.h:828
llvm::createGenericSchedLive
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
Definition: MachineScheduler.cpp:3488
llvm::LiveIntervals::getMBBEndIdx
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
Definition: LiveIntervals.h:235
llvm::SchedBoundary::SchedModel
const TargetSchedModel * SchedModel
Definition: MachineScheduler.h:621
llvm::RegPressureTracker::reset
void reset()
Definition: RegisterPressure.cpp:243
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::MachineSchedStrategy::registerRoots
virtual void registerRoots()
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Definition: MachineScheduler.h:246
llvm::MachineSchedStrategy::scheduleTree
virtual void scheduleTree(unsigned SubtreeID)
Scheduler callback to notify that a new subtree is scheduled.
Definition: MachineScheduler.h:254
llvm::SchedBoundary::init
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
Definition: MachineScheduler.cpp:2039
llvm::GenericSchedulerBase::Context
const MachineSchedContext * Context
Definition: MachineScheduler.h:910
llvm::tryPressure
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
Definition: MachineScheduler.cpp:3047
llvm::SchedDFSResult::resize
void resize(unsigned NumSUnits)
Initialize the result data with the size of the DAG.
Definition: ScheduleDFS.h:136
llvm::ScheduleDAGInstrs::IsReachable
bool IsReachable(SUnit *SU, SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
Definition: ScheduleDAGInstrs.h:273
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::PostGenericScheduler::registerRoots
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Definition: MachineScheduler.cpp:3531
llvm::DenseMap
Definition: DenseMap.h:714
llvm::codeview::FrameCookieKind::Copy
@ Copy
llvm::operator<
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:339
llvm::PressureChange::isValid
bool isValid() const
Definition: RegisterPressure.h:112
llvm::SchedBoundary::getLatencyStallCycles
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
Definition: MachineScheduler.cpp:2073
ViewMISchedCutoff
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...
llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition: MachineScheduler.h:127
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::RegPressureTracker::closeTop
void closeTop()
Set the boundary for the top of the region and summarize live ins.
Definition: RegisterPressure.cpp:319
PriorityQueue.h
llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
llvm::SDep::getReg
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
llvm::SparseMultiSet< VReg2SUnit, VirtReg2IndexFunctor >::iterator
iterator_base< SparseMultiSet * > iterator
Definition: SparseMultiSet.h:311
llvm::RegPressureTracker::advance
void advance()
Advance across the current instruction.
Definition: RegisterPressure.cpp:948
llvm::SUnit::isScheduled
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::SchedBoundary::getCurrMOps
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
Definition: MachineScheduler.h:713
llvm::PressureChange::getUnitInc
int getUnitInc() const
Definition: RegisterPressure.h:124
llvm::SchedBoundary::incExecutedResources
void incExecutedResources(unsigned PIdx, unsigned Count)
Definition: MachineScheduler.cpp:2330
llvm::SlotIndex::isSameInstr
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:198
llvm::DOTGraphTraits< ScheduleDAGMI * >::DOTGraphTraits
DOTGraphTraits(bool isSimple=false)
Definition: MachineScheduler.cpp:3874
ArrayRef.h
llvm::LiveRange::isLocal
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries,...
Definition: LiveInterval.h:518
llvm::LiveRange::Query
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:541
TargetPassConfig.h
llvm::ScheduleDAGMI::getLIS
LiveIntervals * getLIS() const
Definition: MachineScheduler.h:315
llvm::SchedRemainder
Summarize the unscheduled region.
Definition: MachineScheduler.h:582
MachineFunctionPass.h
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::SUnit::hasPhysRegUses
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:279
llvm::RegPressureTracker::getPressure
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
Definition: RegisterPressure.h:458
llvm::ScheduleDAGMILive::DFSResult
SchedDFSResult * DFSResult
Information about DAG subtrees.
Definition: MachineScheduler.h:398
llvm::TargetRegisterInfo::getRegPressureSetName
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
llvm::RegPressureDelta::CriticalMax
PressureChange CriticalMax
Definition: RegisterPressure.h:240
llvm::SUnit::biasCriticalPath
void biasCriticalPath()
Orders this node's predecessor edges such that the critical path edge occurs first.
Definition: ScheduleDAG.cpp:325
llvm::RegisterPressure::MaxSetPressure
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
Definition: RegisterPressure.h:49
ILPMaxRegistry
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
llvm::RegPressureTracker::getMaxDownwardPressureDelta
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
Definition: RegisterPressure.cpp:1344
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::RegisterPressure::LiveInRegs
SmallVector< RegisterMaskPair, 8 > LiveInRegs
List of live in virtual registers or physical register units.
Definition: RegisterPressure.h:52
llvm::ScheduleDAGMILive::ScheduledTrees
BitVector ScheduledTrees
Definition: MachineScheduler.h:399
llvm::ScheduleDAGMI::dumpSchedule
void dumpSchedule() const
dump the scheduled Sequence.
Definition: MachineScheduler.cpp:928
llvm::RegisterClassInfo
Definition: RegisterClassInfo.h:29
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::ScheduleDAGInstrs::canAddEdge
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
Definition: ScheduleDAGInstrs.cpp:1195
llvm::ScheduleDAGInstrs::TrackLaneMasks
bool TrackLaneMasks
Whether lane masks should get tracked.
Definition: ScheduleDAGInstrs.h:140
llvm::ScheduleDAGMI::SchedImpl
std::unique_ptr< MachineSchedStrategy > SchedImpl
Definition: MachineScheduler.h:277
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:261
llvm::Ge