LLVM  15.0.0git
MachineScheduler.cpp
Go to the documentation of this file.
1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // MachineScheduler schedules machine instructions after phi elimination. It
10 // preserves LiveIntervals so it can be invoked before register allocation.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/PriorityQueue.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
50 #include "llvm/Config/llvm-config.h"
51 #include "llvm/InitializePasses.h"
52 #include "llvm/MC/LaneBitmask.h"
53 #include "llvm/Pass.h"
55 #include "llvm/Support/Compiler.h"
56 #include "llvm/Support/Debug.h"
61 #include <algorithm>
62 #include <cassert>
63 #include <cstdint>
64 #include <iterator>
65 #include <limits>
66 #include <memory>
67 #include <string>
68 #include <tuple>
69 #include <utility>
70 #include <vector>
71 
72 using namespace llvm;
73 
74 #define DEBUG_TYPE "machine-scheduler"
75 
76 STATISTIC(NumClustered, "Number of load/store pairs clustered");
77 
78 namespace llvm {
79 
80 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
81  cl::desc("Force top-down list scheduling"));
82 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
83  cl::desc("Force bottom-up list scheduling"));
85 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
86  cl::desc("Print critical path length to stdout"));
87 
89  "verify-misched", cl::Hidden,
90  cl::desc("Verify machine instrs before and after machine scheduling"));
91 
92 #ifndef NDEBUG
94  "view-misched-dags", cl::Hidden,
95  cl::desc("Pop up a window to show MISched dags after they are processed"));
96 #else
97 const bool ViewMISchedDAGs = false;
98 #endif // NDEBUG
99 
100 } // end namespace llvm
101 
102 #ifndef NDEBUG
103 /// In some situations a few uninteresting nodes depend on nearly all other
104 /// nodes in the graph, provide a cutoff to hide them.
105 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
106  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
107 
108 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
109  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
110 
111 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
112  cl::desc("Only schedule this function"));
113 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
114  cl::desc("Only schedule this MBB#"));
115 static cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
116  cl::desc("Print schedule DAGs"));
117 #else
118 static const bool PrintDAGs = false;
119 #endif // NDEBUG
120 
121 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
122 /// size of the ready lists.
123 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
124  cl::desc("Limit ready list to N instructions"), cl::init(256));
125 
126 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
127  cl::desc("Enable register pressure scheduling."), cl::init(true));
128 
129 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
130  cl::desc("Enable cyclic critical path analysis."), cl::init(true));
131 
132 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
133  cl::desc("Enable memop clustering."),
134  cl::init(true));
135 static cl::opt<bool>
136  ForceFastCluster("force-fast-cluster", cl::Hidden,
137  cl::desc("Switch to fast cluster algorithm with the lost "
138  "of some fusion opportunities"),
139  cl::init(false));
140 static cl::opt<unsigned>
141  FastClusterThreshold("fast-cluster-threshold", cl::Hidden,
142  cl::desc("The threshold for fast cluster"),
143  cl::init(1000));
144 
145 // DAG subtrees must have at least this many nodes.
146 static const unsigned MinSubtreeSize = 8;
147 
148 // Pin the vtables to this file.
149 void MachineSchedStrategy::anchor() {}
150 
151 void ScheduleDAGMutation::anchor() {}
152 
153 //===----------------------------------------------------------------------===//
154 // Machine Instruction Scheduling Pass and Registry
155 //===----------------------------------------------------------------------===//
156 
159 }
160 
162  delete RegClassInfo;
163 }
164 
165 namespace {
166 
167 /// Base class for a machine scheduler class that can run at any point.
168 class MachineSchedulerBase : public MachineSchedContext,
169  public MachineFunctionPass {
170 public:
171  MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
172 
173  void print(raw_ostream &O, const Module* = nullptr) const override;
174 
175 protected:
176  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
177 };
178 
179 /// MachineScheduler runs after coalescing and before register allocation.
180 class MachineScheduler : public MachineSchedulerBase {
181 public:
182  MachineScheduler();
183 
184  void getAnalysisUsage(AnalysisUsage &AU) const override;
185 
186  bool runOnMachineFunction(MachineFunction&) override;
187 
188  static char ID; // Class identification, replacement for typeinfo
189 
190 protected:
191  ScheduleDAGInstrs *createMachineScheduler();
192 };
193 
194 /// PostMachineScheduler runs after shortly before code emission.
195 class PostMachineScheduler : public MachineSchedulerBase {
196 public:
197  PostMachineScheduler();
198 
199  void getAnalysisUsage(AnalysisUsage &AU) const override;
200 
201  bool runOnMachineFunction(MachineFunction&) override;
202 
203  static char ID; // Class identification, replacement for typeinfo
204 
205 protected:
206  ScheduleDAGInstrs *createPostMachineScheduler();
207 };
208 
209 } // end anonymous namespace
210 
211 char MachineScheduler::ID = 0;
212 
214 
215 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
216  "Machine Instruction Scheduler", false, false)
222 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
224 
225 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
227 }
228 
229 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
230  AU.setPreservesCFG();
235  AU.addRequired<SlotIndexes>();
240 }
241 
242 char PostMachineScheduler::ID = 0;
243 
245 
246 INITIALIZE_PASS_BEGIN(PostMachineScheduler, "postmisched",
247  "PostRA Machine Instruction Scheduler", false, false)
251 INITIALIZE_PASS_END(PostMachineScheduler, "postmisched",
253 
254 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
256 }
257 
258 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
259  AU.setPreservesCFG();
265 }
266 
269 
270 /// A dummy default scheduler factory indicates whether the scheduler
271 /// is overridden on the command line.
273  return nullptr;
274 }
275 
276 /// MachineSchedOpt allows command line selection of the scheduler.
279 MachineSchedOpt("misched",
281  cl::desc("Machine instruction scheduler to use"));
282 
284 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
286 
288  "enable-misched",
289  cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
290  cl::Hidden);
291 
293  "enable-post-misched",
294  cl::desc("Enable the post-ra machine instruction scheduling pass."),
295  cl::init(true), cl::Hidden);
296 
297 /// Decrement this iterator until reaching the top or a non-debug instr.
301  assert(I != Beg && "reached the top of the region, cannot decrement");
302  while (--I != Beg) {
303  if (!I->isDebugOrPseudoInstr())
304  break;
305  }
306  return I;
307 }
308 
309 /// Non-const version.
315 }
316 
317 /// If this iterator is a debug value, increment until reaching the End or a
318 /// non-debug instruction.
322  for(; I != End; ++I) {
323  if (!I->isDebugOrPseudoInstr())
324  break;
325  }
326  return I;
327 }
328 
329 /// Non-const version.
335 }
336 
337 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
338 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
339  // Select the scheduler, or set the default.
341  if (Ctor != useDefaultMachineSched)
342  return Ctor(this);
343 
344  // Get the default scheduler set by the target for this function.
345  ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
346  if (Scheduler)
347  return Scheduler;
348 
349  // Default to GenericScheduler.
350  return createGenericSchedLive(this);
351 }
352 
353 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
354 /// the caller. We don't have a command line option to override the postRA
355 /// scheduler. The Target must configure it.
356 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
357  // Get the postRA scheduler set by the target for this function.
358  ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
359  if (Scheduler)
360  return Scheduler;
361 
362  // Default to GenericScheduler.
363  return createGenericSchedPostRA(this);
364 }
365 
366 /// Top-level MachineScheduler pass driver.
367 ///
368 /// Visit blocks in function order. Divide each block into scheduling regions
369 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
370 /// consistent with the DAG builder, which traverses the interior of the
371 /// scheduling regions bottom-up.
372 ///
373 /// This design avoids exposing scheduling boundaries to the DAG builder,
374 /// simplifying the DAG builder's support for "special" target instructions.
375 /// At the same time the design allows target schedulers to operate across
376 /// scheduling boundaries, for example to bundle the boundary instructions
377 /// without reordering them. This creates complexity, because the target
378 /// scheduler must update the RegionBegin and RegionEnd positions cached by
379 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
380 /// design would be to split blocks at scheduling boundaries, but LLVM has a
381 /// general bias against block splitting purely for implementation simplicity.
382 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
383  if (skipFunction(mf.getFunction()))
384  return false;
385 
387  if (!EnableMachineSched)
388  return false;
389  } else if (!mf.getSubtarget().enableMachineScheduler())
390  return false;
391 
392  LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
393 
394  // Initialize the context of the pass.
395  MF = &mf;
396  MLI = &getAnalysis<MachineLoopInfo>();
397  MDT = &getAnalysis<MachineDominatorTree>();
398  PassConfig = &getAnalysis<TargetPassConfig>();
399  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
400 
401  LIS = &getAnalysis<LiveIntervals>();
402 
403  if (VerifyScheduling) {
404  LLVM_DEBUG(LIS->dump());
405  MF->verify(this, "Before machine scheduling.");
406  }
407  RegClassInfo->runOnMachineFunction(*MF);
408 
409  // Instantiate the selected scheduler for this target, function, and
410  // optimization level.
411  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
412  scheduleRegions(*Scheduler, false);
413 
414  LLVM_DEBUG(LIS->dump());
415  if (VerifyScheduling)
416  MF->verify(this, "After machine scheduling.");
417  return true;
418 }
419 
420 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
421  if (skipFunction(mf.getFunction()))
422  return false;
423 
426  return false;
427  } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
428  LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
429  return false;
430  }
431  LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
432 
433  // Initialize the context of the pass.
434  MF = &mf;
435  MLI = &getAnalysis<MachineLoopInfo>();
436  PassConfig = &getAnalysis<TargetPassConfig>();
437  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
438 
439  if (VerifyScheduling)
440  MF->verify(this, "Before post machine scheduling.");
441 
442  // Instantiate the selected scheduler for this target, function, and
443  // optimization level.
444  std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
445  scheduleRegions(*Scheduler, true);
446 
447  if (VerifyScheduling)
448  MF->verify(this, "After post machine scheduling.");
449  return true;
450 }
451 
452 /// Return true of the given instruction should not be included in a scheduling
453 /// region.
454 ///
455 /// MachineScheduler does not currently support scheduling across calls. To
456 /// handle calls, the DAG builder needs to be modified to create register
457 /// anti/output dependencies on the registers clobbered by the call's regmask
458 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
459 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
460 /// the boundary, but there would be no benefit to postRA scheduling across
461 /// calls this late anyway.
464  MachineFunction *MF,
465  const TargetInstrInfo *TII) {
466  return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
467 }
468 
469 /// A region of an MBB for scheduling.
470 namespace {
471 struct SchedRegion {
472  /// RegionBegin is the first instruction in the scheduling region, and
473  /// RegionEnd is either MBB->end() or the scheduling boundary after the
474  /// last instruction in the scheduling region. These iterators cannot refer
475  /// to instructions outside of the identified scheduling region because
476  /// those may be reordered before scheduling this region.
477  MachineBasicBlock::iterator RegionBegin;
478  MachineBasicBlock::iterator RegionEnd;
479  unsigned NumRegionInstrs;
480 
482  unsigned N) :
483  RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
484 };
485 } // end anonymous namespace
486 
488 
489 static void
491  MBBRegionsVector &Regions,
492  bool RegionsTopDown) {
493  MachineFunction *MF = MBB->getParent();
494  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
495 
496  MachineBasicBlock::iterator I = nullptr;
497  for(MachineBasicBlock::iterator RegionEnd = MBB->end();
498  RegionEnd != MBB->begin(); RegionEnd = I) {
499 
500  // Avoid decrementing RegionEnd for blocks with no terminator.
501  if (RegionEnd != MBB->end() ||
502  isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
503  --RegionEnd;
504  }
505 
506  // The next region starts above the previous region. Look backward in the
507  // instruction stream until we find the nearest boundary.
508  unsigned NumRegionInstrs = 0;
509  I = RegionEnd;
510  for (;I != MBB->begin(); --I) {
511  MachineInstr &MI = *std::prev(I);
512  if (isSchedBoundary(&MI, &*MBB, MF, TII))
513  break;
514  if (!MI.isDebugOrPseudoInstr()) {
515  // MBB::size() uses instr_iterator to count. Here we need a bundle to
516  // count as a single instruction.
517  ++NumRegionInstrs;
518  }
519  }
520 
521  // It's possible we found a scheduling region that only has debug
522  // instructions. Don't bother scheduling these.
523  if (NumRegionInstrs != 0)
524  Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
525  }
526 
527  if (RegionsTopDown)
528  std::reverse(Regions.begin(), Regions.end());
529 }
530 
531 /// Main driver for both MachineScheduler and PostMachineScheduler.
532 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
533  bool FixKillFlags) {
534  // Visit all machine basic blocks.
535  //
536  // TODO: Visit blocks in global postorder or postorder within the bottom-up
537  // loop tree. Then we can optionally compute global RegPressure.
538  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
539  MBB != MBBEnd; ++MBB) {
540 
541  Scheduler.startBlock(&*MBB);
542 
543 #ifndef NDEBUG
544  if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
545  continue;
546  if (SchedOnlyBlock.getNumOccurrences()
547  && (int)SchedOnlyBlock != MBB->getNumber())
548  continue;
549 #endif
550 
551  // Break the block into scheduling regions [I, RegionEnd). RegionEnd
552  // points to the scheduling boundary at the bottom of the region. The DAG
553  // does not include RegionEnd, but the region does (i.e. the next
554  // RegionEnd is above the previous RegionBegin). If the current block has
555  // no terminator then RegionEnd == MBB->end() for the bottom region.
556  //
557  // All the regions of MBB are first found and stored in MBBRegions, which
558  // will be processed (MBB) top-down if initialized with true.
559  //
560  // The Scheduler may insert instructions during either schedule() or
561  // exitRegion(), even for empty regions. So the local iterators 'I' and
562  // 'RegionEnd' are invalid across these calls. Instructions must not be
563  // added to other regions than the current one without updating MBBRegions.
564 
565  MBBRegionsVector MBBRegions;
566  getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
567  for (const SchedRegion &R : MBBRegions) {
568  MachineBasicBlock::iterator I = R.RegionBegin;
569  MachineBasicBlock::iterator RegionEnd = R.RegionEnd;
570  unsigned NumRegionInstrs = R.NumRegionInstrs;
571 
572  // Notify the scheduler of the region, even if we may skip scheduling
573  // it. Perhaps it still needs to be bundled.
574  Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
575 
576  // Skip empty scheduling regions (0 or 1 schedulable instructions).
577  if (I == RegionEnd || I == std::prev(RegionEnd)) {
578  // Close the current region. Bundle the terminator if needed.
579  // This invalidates 'RegionEnd' and 'I'.
580  Scheduler.exitRegion();
581  continue;
582  }
583  LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
584  LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
585  << " " << MBB->getName() << "\n From: " << *I
586  << " To: ";
587  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
588  else dbgs() << "End\n";
589  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
591  errs() << MF->getName();
592  errs() << ":%bb. " << MBB->getNumber();
593  errs() << " " << MBB->getName() << " \n";
594  }
595 
596  // Schedule a region: possibly reorder instructions.
597  // This invalidates the original region iterators.
598  Scheduler.schedule();
599 
600  // Close the current region.
601  Scheduler.exitRegion();
602  }
603  Scheduler.finishBlock();
604  // FIXME: Ideally, no further passes should rely on kill flags. However,
605  // thumb2 size reduction is currently an exception, so the PostMIScheduler
606  // needs to do this.
607  if (FixKillFlags)
608  Scheduler.fixupKills(*MBB);
609  }
610  Scheduler.finalizeSchedule();
611 }
612 
613 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
614  // unimplemented
615 }
616 
617 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
619  dbgs() << "Queue " << Name << ": ";
620  for (const SUnit *SU : Queue)
621  dbgs() << SU->NodeNum << " ";
622  dbgs() << "\n";
623 }
624 #endif
625 
626 //===----------------------------------------------------------------------===//
627 // ScheduleDAGMI - Basic machine instruction scheduling. This is
628 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
629 // virtual registers.
630 // ===----------------------------------------------------------------------===/
631 
632 // Provide a vtable anchor.
634 
635 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
636 /// NumPredsLeft reaches zero, release the successor node.
637 ///
638 /// FIXME: Adjust SuccSU height based on MinLatency.
639 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
640  SUnit *SuccSU = SuccEdge->getSUnit();
641 
642  if (SuccEdge->isWeak()) {
643  --SuccSU->WeakPredsLeft;
644  if (SuccEdge->isCluster())
645  NextClusterSucc = SuccSU;
646  return;
647  }
648 #ifndef NDEBUG
649  if (SuccSU->NumPredsLeft == 0) {
650  dbgs() << "*** Scheduling failed! ***\n";
651  dumpNode(*SuccSU);
652  dbgs() << " has been released too many times!\n";
653  llvm_unreachable(nullptr);
654  }
655 #endif
656  // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
657  // CurrCycle may have advanced since then.
658  if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
659  SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
660 
661  --SuccSU->NumPredsLeft;
662  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
663  SchedImpl->releaseTopNode(SuccSU);
664 }
665 
666 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
668  for (SDep &Succ : SU->Succs)
669  releaseSucc(SU, &Succ);
670 }
671 
672 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
673 /// NumSuccsLeft reaches zero, release the predecessor node.
674 ///
675 /// FIXME: Adjust PredSU height based on MinLatency.
676 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
677  SUnit *PredSU = PredEdge->getSUnit();
678 
679  if (PredEdge->isWeak()) {
680  --PredSU->WeakSuccsLeft;
681  if (PredEdge->isCluster())
682  NextClusterPred = PredSU;
683  return;
684  }
685 #ifndef NDEBUG
686  if (PredSU->NumSuccsLeft == 0) {
687  dbgs() << "*** Scheduling failed! ***\n";
688  dumpNode(*PredSU);
689  dbgs() << " has been released too many times!\n";
690  llvm_unreachable(nullptr);
691  }
692 #endif
693  // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
694  // CurrCycle may have advanced since then.
695  if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
696  PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
697 
698  --PredSU->NumSuccsLeft;
699  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
700  SchedImpl->releaseBottomNode(PredSU);
701 }
702 
703 /// releasePredecessors - Call releasePred on each of SU's predecessors.
705  for (SDep &Pred : SU->Preds)
706  releasePred(SU, &Pred);
707 }
708 
711  SchedImpl->enterMBB(bb);
712 }
713 
715  SchedImpl->leaveMBB();
717 }
718 
719 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
720 /// crossing a scheduling boundary. [begin, end) includes all instructions in
721 /// the region, including the boundary itself and single-instruction regions
722 /// that don't get scheduled.
726  unsigned regioninstrs)
727 {
728  ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
729 
730  SchedImpl->initPolicy(begin, end, regioninstrs);
731 }
732 
733 /// This is normally called from the main scheduler loop but may also be invoked
734 /// by the scheduling strategy to perform additional code motion.
737  // Advance RegionBegin if the first instruction moves down.
738  if (&*RegionBegin == MI)
739  ++RegionBegin;
740 
741  // Update the instruction stream.
742  BB->splice(InsertPos, BB, MI);
743 
744  // Update LiveIntervals
745  if (LIS)
746  LIS->handleMove(*MI, /*UpdateFlags=*/true);
747 
748  // Recede RegionBegin if an instruction moves above the first.
749  if (RegionBegin == InsertPos)
750  RegionBegin = MI;
751 }
752 
754 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
755  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
757  return false;
758  }
759  ++NumInstrsScheduled;
760 #endif
761  return true;
762 }
763 
764 /// Per-region scheduling driver, called back from
765 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
766 /// does not consider liveness or register pressure. It is useful for PostRA
767 /// scheduling and potentially other custom schedulers.
769  LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
770  LLVM_DEBUG(SchedImpl->dumpPolicy());
771 
772  // Build the DAG.
774 
775  postprocessDAG();
776 
777  SmallVector<SUnit*, 8> TopRoots, BotRoots;
778  findRootsAndBiasEdges(TopRoots, BotRoots);
779 
780  LLVM_DEBUG(dump());
781  if (PrintDAGs) dump();
782  if (ViewMISchedDAGs) viewGraph();
783 
784  // Initialize the strategy before modifying the DAG.
785  // This may initialize a DFSResult to be used for queue priority.
786  SchedImpl->initialize(this);
787 
788  // Initialize ready queues now that the DAG and priority data are finalized.
789  initQueues(TopRoots, BotRoots);
790 
791  bool IsTopNode = false;
792  while (true) {
793  LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
794  SUnit *SU = SchedImpl->pickNode(IsTopNode);
795  if (!SU) break;
796 
797  assert(!SU->isScheduled && "Node already scheduled");
798  if (!checkSchedLimit())
799  break;
800 
801  MachineInstr *MI = SU->getInstr();
802  if (IsTopNode) {
803  assert(SU->isTopReady() && "node still has unscheduled dependencies");
804  if (&*CurrentTop == MI)
806  else
808  } else {
809  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
812  if (&*priorII == MI)
813  CurrentBottom = priorII;
814  else {
815  if (&*CurrentTop == MI)
816  CurrentTop = nextIfDebug(++CurrentTop, priorII);
818  CurrentBottom = MI;
819  }
820  }
821  // Notify the scheduling strategy before updating the DAG.
822  // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
823  // runs, it can then use the accurate ReadyCycle time to determine whether
824  // newly released nodes can move to the readyQ.
825  SchedImpl->schedNode(SU, IsTopNode);
826 
827  updateQueues(SU, IsTopNode);
828  }
829  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
830 
832 
833  LLVM_DEBUG({
834  dbgs() << "*** Final schedule for "
835  << printMBBReference(*begin()->getParent()) << " ***\n";
836  dumpSchedule();
837  dbgs() << '\n';
838  });
839 }
840 
841 /// Apply each ScheduleDAGMutation step in order.
843  for (auto &m : Mutations)
844  m->apply(this);
845 }
846 
847 void ScheduleDAGMI::
849  SmallVectorImpl<SUnit*> &BotRoots) {
850  for (SUnit &SU : SUnits) {
851  assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
852 
853  // Order predecessors so DFSResult follows the critical path.
854  SU.biasCriticalPath();
855 
856  // A SUnit is ready to top schedule if it has no predecessors.
857  if (!SU.NumPredsLeft)
858  TopRoots.push_back(&SU);
859  // A SUnit is ready to bottom schedule if it has no successors.
860  if (!SU.NumSuccsLeft)
861  BotRoots.push_back(&SU);
862  }
864 }
865 
866 /// Identify DAG roots and setup scheduler queues.
868  ArrayRef<SUnit*> BotRoots) {
869  NextClusterSucc = nullptr;
870  NextClusterPred = nullptr;
871 
872  // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
873  //
874  // Nodes with unreleased weak edges can still be roots.
875  // Release top roots in forward order.
876  for (SUnit *SU : TopRoots)
877  SchedImpl->releaseTopNode(SU);
878 
879  // Release bottom roots in reverse order so the higher priority nodes appear
880  // first. This is more natural and slightly more efficient.
882  I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
883  SchedImpl->releaseBottomNode(*I);
884  }
885 
888 
889  SchedImpl->registerRoots();
890 
891  // Advance past initial DebugValues.
894 }
895 
896 /// Update scheduler queues after scheduling an instruction.
897 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
898  // Release dependent instructions for scheduling.
899  if (IsTopNode)
900  releaseSuccessors(SU);
901  else
903 
904  SU->isScheduled = true;
905 }
906 
907 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
909  // If first instruction was a DBG_VALUE then put it back.
910  if (FirstDbgValue) {
913  }
914 
915  for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
916  DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
917  std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
918  MachineInstr *DbgValue = P.first;
919  MachineBasicBlock::iterator OrigPrevMI = P.second;
920  if (&*RegionBegin == DbgValue)
921  ++RegionBegin;
922  BB->splice(std::next(OrigPrevMI), BB, DbgValue);
923  if (RegionEnd != BB->end() && OrigPrevMI == &*RegionEnd)
925  }
926 }
927 
928 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
930  for (MachineInstr &MI : *this) {
931  if (SUnit *SU = getSUnit(&MI))
932  dumpNode(*SU);
933  else
934  dbgs() << "Missing SUnit\n";
935  }
936 }
937 #endif
938 
939 //===----------------------------------------------------------------------===//
940 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
941 // preservation.
942 //===----------------------------------------------------------------------===//
943 
945  delete DFSResult;
946 }
947 
949  const MachineInstr &MI = *SU.getInstr();
950  for (const MachineOperand &MO : MI.operands()) {
951  if (!MO.isReg())
952  continue;
953  if (!MO.readsReg())
954  continue;
955  if (TrackLaneMasks && !MO.isUse())
956  continue;
957 
958  Register Reg = MO.getReg();
960  continue;
961 
962  // Ignore re-defs.
963  if (TrackLaneMasks) {
964  bool FoundDef = false;
965  for (const MachineOperand &MO2 : MI.operands()) {
966  if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
967  FoundDef = true;
968  break;
969  }
970  }
971  if (FoundDef)
972  continue;
973  }
974 
975  // Record this local VReg use.
977  for (; UI != VRegUses.end(); ++UI) {
978  if (UI->SU == &SU)
979  break;
980  }
981  if (UI == VRegUses.end())
983  }
984 }
985 
986 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
987 /// crossing a scheduling boundary. [begin, end) includes all instructions in
988 /// the region, including the boundary itself and single-instruction regions
989 /// that don't get scheduled.
993  unsigned regioninstrs)
994 {
995  // ScheduleDAGMI initializes SchedImpl's per-region policy.
996  ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
997 
998  // For convenience remember the end of the liveness region.
999  LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
1000 
1002 
1003  ShouldTrackPressure = SchedImpl->shouldTrackPressure();
1004  ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
1005 
1007  "ShouldTrackLaneMasks requires ShouldTrackPressure");
1008 }
1009 
1010 // Setup the register pressure trackers for the top scheduled and bottom
1011 // scheduled regions.
1013  VRegUses.clear();
1015  for (SUnit &SU : SUnits)
1016  collectVRegUses(SU);
1017 
1019  ShouldTrackLaneMasks, false);
1021  ShouldTrackLaneMasks, false);
1022 
1023  // Close the RPTracker to finalize live ins.
1025 
1027 
1028  // Initialize the live ins and live outs.
1031 
1032  // Close one end of the tracker so we can call
1033  // getMaxUpward/DownwardPressureDelta before advancing across any
1034  // instructions. This converts currently live regs into live ins/outs.
1037 
1039  if (!BotRPTracker.getLiveThru().empty()) {
1041  LLVM_DEBUG(dbgs() << "Live Thru: ";
1043  };
1044 
1045  // For each live out vreg reduce the pressure change associated with other
1046  // uses of the same vreg below the live-out reaching def.
1048 
1049  // Account for liveness generated by the region boundary.
1050  if (LiveRegionEnd != RegionEnd) {
1052  BotRPTracker.recede(&LiveUses);
1053  updatePressureDiffs(LiveUses);
1054  }
1055 
1056  LLVM_DEBUG(dbgs() << "Top Pressure:\n";
1058  dbgs() << "Bottom Pressure:\n";
1060 
1062  (RegionEnd->isDebugInstr() &&
1064  "Can't find the region bottom");
1065 
1066  // Cache the list of excess pressure sets in this region. This will also track
1067  // the max pressure in the scheduled code for these sets.
1068  RegionCriticalPSets.clear();
1069  const std::vector<unsigned> &RegionPressure =
1071  for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
1072  unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
1073  if (RegionPressure[i] > Limit) {
1074  LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
1075  << " Actual " << RegionPressure[i] << "\n");
1076  RegionCriticalPSets.push_back(PressureChange(i));
1077  }
1078  }
1079  LLVM_DEBUG(dbgs() << "Excess PSets: ";
1080  for (const PressureChange &RCPS
1082  << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
1083  dbgs() << "\n");
1084 }
1085 
1088  const std::vector<unsigned> &NewMaxPressure) {
1089  const PressureDiff &PDiff = getPressureDiff(SU);
1090  unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
1091  for (const PressureChange &PC : PDiff) {
1092  if (!PC.isValid())
1093  break;
1094  unsigned ID = PC.getPSet();
1095  while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
1096  ++CritIdx;
1097  if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
1098  if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
1099  && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
1100  RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
1101  }
1102  unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
1103  if (NewMaxPressure[ID] >= Limit - 2) {
1104  LLVM_DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": "
1105  << NewMaxPressure[ID]
1106  << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
1107  << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
1108  << " livethru)\n");
1109  }
1110  }
1111 }
1112 
1113 /// Update the PressureDiff array for liveness after scheduling this
1114 /// instruction.
1116  ArrayRef<RegisterMaskPair> LiveUses) {
1117  for (const RegisterMaskPair &P : LiveUses) {
1118  Register Reg = P.RegUnit;
1119  /// FIXME: Currently assuming single-use physregs.
1121  continue;
1122 
1123  if (ShouldTrackLaneMasks) {
1124  // If the register has just become live then other uses won't change
1125  // this fact anymore => decrement pressure.
1126  // If the register has just become dead then other uses make it come
1127  // back to life => increment pressure.
1128  bool Decrement = P.LaneMask.any();
1129 
1130  for (const VReg2SUnit &V2SU
1132  SUnit &SU = *V2SU.SU;
1133  if (SU.isScheduled || &SU == &ExitSU)
1134  continue;
1135 
1136  PressureDiff &PDiff = getPressureDiff(&SU);
1137  PDiff.addPressureChange(Reg, Decrement, &MRI);
1138  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU.NodeNum << ") "
1139  << printReg(Reg, TRI) << ':'
1140  << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
1141  dbgs() << " to "; PDiff.dump(*TRI););
1142  }
1143  } else {
1144  assert(P.LaneMask.any());
1145  LLVM_DEBUG(dbgs() << " LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
1146  // This may be called before CurrentBottom has been initialized. However,
1147  // BotRPTracker must have a valid position. We want the value live into the
1148  // instruction or live out of the block, so ask for the previous
1149  // instruction's live-out.
1150  const LiveInterval &LI = LIS->getInterval(Reg);
1151  VNInfo *VNI;
1154  if (I == BB->end())
1155  VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1156  else {
1158  VNI = LRQ.valueIn();
1159  }
1160  // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
1161  assert(VNI && "No live value at use.");
1162  for (const VReg2SUnit &V2SU
1164  SUnit *SU = V2SU.SU;
1165  // If this use comes before the reaching def, it cannot be a last use,
1166  // so decrease its pressure change.
1167  if (!SU->isScheduled && SU != &ExitSU) {
1168  LiveQueryResult LRQ =
1169  LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
1170  if (LRQ.valueIn() == VNI) {
1171  PressureDiff &PDiff = getPressureDiff(SU);
1172  PDiff.addPressureChange(Reg, true, &MRI);
1173  LLVM_DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") "
1174  << *SU->getInstr();
1175  dbgs() << " to "; PDiff.dump(*TRI););
1176  }
1177  }
1178  }
1179  }
1180  }
1181 }
1182 
1184 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1185  if (EntrySU.getInstr() != nullptr)
1187  for (const SUnit &SU : SUnits) {
1188  dumpNodeAll(SU);
1189  if (ShouldTrackPressure) {
1190  dbgs() << " Pressure Diff : ";
1191  getPressureDiff(&SU).dump(*TRI);
1192  }
1193  dbgs() << " Single Issue : ";
1194  if (SchedModel.mustBeginGroup(SU.getInstr()) &&
1195  SchedModel.mustEndGroup(SU.getInstr()))
1196  dbgs() << "true;";
1197  else
1198  dbgs() << "false;";
1199  dbgs() << '\n';
1200  }
1201  if (ExitSU.getInstr() != nullptr)
1203 #endif
1204 }
1205 
1206 /// schedule - Called back from MachineScheduler::runOnMachineFunction
1207 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
1208 /// only includes instructions that have DAG nodes, not scheduling boundaries.
1209 ///
1210 /// This is a skeletal driver, with all the functionality pushed into helpers,
1211 /// so that it can be easily extended by experimental schedulers. Generally,
1212 /// implementing MachineSchedStrategy should be sufficient to implement a new
1213 /// scheduling algorithm. However, if a scheduler further subclasses
1214 /// ScheduleDAGMILive then it will want to override this virtual method in order
1215 /// to update any specialized state.
1217  LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
1218  LLVM_DEBUG(SchedImpl->dumpPolicy());
1220 
1221  postprocessDAG();
1222 
1223  SmallVector<SUnit*, 8> TopRoots, BotRoots;
1224  findRootsAndBiasEdges(TopRoots, BotRoots);
1225 
1226  // Initialize the strategy before modifying the DAG.
1227  // This may initialize a DFSResult to be used for queue priority.
1228  SchedImpl->initialize(this);
1229 
1230  LLVM_DEBUG(dump());
1231  if (PrintDAGs) dump();
1232  if (ViewMISchedDAGs) viewGraph();
1233 
1234  // Initialize ready queues now that the DAG and priority data are finalized.
1235  initQueues(TopRoots, BotRoots);
1236 
1237  bool IsTopNode = false;
1238  while (true) {
1239  LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
1240  SUnit *SU = SchedImpl->pickNode(IsTopNode);
1241  if (!SU) break;
1242 
1243  assert(!SU->isScheduled && "Node already scheduled");
1244  if (!checkSchedLimit())
1245  break;
1246 
1247  scheduleMI(SU, IsTopNode);
1248 
1249  if (DFSResult) {
1250  unsigned SubtreeID = DFSResult->getSubtreeID(SU);
1251  if (!ScheduledTrees.test(SubtreeID)) {
1252  ScheduledTrees.set(SubtreeID);
1253  DFSResult->scheduleTree(SubtreeID);
1254  SchedImpl->scheduleTree(SubtreeID);
1255  }
1256  }
1257 
1258  // Notify the scheduling strategy after updating the DAG.
1259  SchedImpl->schedNode(SU, IsTopNode);
1260 
1261  updateQueues(SU, IsTopNode);
1262  }
1263  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
1264 
1265  placeDebugValues();
1266 
1267  LLVM_DEBUG({
1268  dbgs() << "*** Final schedule for "
1269  << printMBBReference(*begin()->getParent()) << " ***\n";
1270  dumpSchedule();
1271  dbgs() << '\n';
1272  });
1273 }
1274 
1275 /// Build the DAG and setup three register pressure trackers.
1277  if (!ShouldTrackPressure) {
1278  RPTracker.reset();
1279  RegionCriticalPSets.clear();
1281  return;
1282  }
1283 
1284  // Initialize the register pressure tracker used by buildSchedGraph.
1286  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
1287 
1288  // Account for liveness generate by the region boundary.
1289  if (LiveRegionEnd != RegionEnd)
1290  RPTracker.recede();
1291 
1292  // Build the DAG, and compute current register pressure.
1294 
1295  // Initialize top/bottom trackers after computing region pressure.
1296  initRegPressure();
1297 }
1298 
1300  if (!DFSResult)
1301  DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
1302  DFSResult->clear();
1304  DFSResult->resize(SUnits.size());
1307 }
1308 
1309 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
1310 /// only provides the critical path for single block loops. To handle loops that
1311 /// span blocks, we could use the vreg path latencies provided by
1312 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
1313 /// available for use in the scheduler.
1314 ///
1315 /// The cyclic path estimation identifies a def-use pair that crosses the back
1316 /// edge and considers the depth and height of the nodes. For example, consider
1317 /// the following instruction sequence where each instruction has unit latency
1318 /// and defines an eponymous virtual register:
1319 ///
1320 /// a->b(a,c)->c(b)->d(c)->exit
1321 ///
1322 /// The cyclic critical path is a two cycles: b->c->b
1323 /// The acyclic critical path is four cycles: a->b->c->d->exit
1324 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
1325 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
1326 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
1327 /// LiveInDepth = depth(b) = len(a->b) = 1
1328 ///
1329 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
1330 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
1331 /// CyclicCriticalPath = min(2, 2) = 2
1332 ///
1333 /// This could be relevant to PostRA scheduling, but is currently implemented
1334 /// assuming LiveIntervals.
1336  // This only applies to single block loop.
1337  if (!BB->isSuccessor(BB))
1338  return 0;
1339 
1340  unsigned MaxCyclicLatency = 0;
1341  // Visit each live out vreg def to find def/use pairs that cross iterations.
1343  Register Reg = P.RegUnit;
1345  continue;
1346  const LiveInterval &LI = LIS->getInterval(Reg);
1347  const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
1348  if (!DefVNI)
1349  continue;
1350 
1352  const SUnit *DefSU = getSUnit(DefMI);
1353  if (!DefSU)
1354  continue;
1355 
1356  unsigned LiveOutHeight = DefSU->getHeight();
1357  unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
1358  // Visit all local users of the vreg def.
1359  for (const VReg2SUnit &V2SU
1361  SUnit *SU = V2SU.SU;
1362  if (SU == &ExitSU)
1363  continue;
1364 
1365  // Only consider uses of the phi.
1367  if (!LRQ.valueIn()->isPHIDef())
1368  continue;
1369 
1370  // Assume that a path spanning two iterations is a cycle, which could
1371  // overestimate in strange cases. This allows cyclic latency to be
1372  // estimated as the minimum slack of the vreg's depth or height.
1373  unsigned CyclicLatency = 0;
1374  if (LiveOutDepth > SU->getDepth())
1375  CyclicLatency = LiveOutDepth - SU->getDepth();
1376 
1377  unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
1378  if (LiveInHeight > LiveOutHeight) {
1379  if (LiveInHeight - LiveOutHeight < CyclicLatency)
1380  CyclicLatency = LiveInHeight - LiveOutHeight;
1381  } else
1382  CyclicLatency = 0;
1383 
1384  LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
1385  << SU->NodeNum << ") = " << CyclicLatency << "c\n");
1386  if (CyclicLatency > MaxCyclicLatency)
1387  MaxCyclicLatency = CyclicLatency;
1388  }
1389  }
1390  LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
1391  return MaxCyclicLatency;
1392 }
1393 
1394 /// Release ExitSU predecessors and setup scheduler queues. Re-position
1395 /// the Top RP tracker in case the region beginning has changed.
1397  ArrayRef<SUnit*> BotRoots) {
1398  ScheduleDAGMI::initQueues(TopRoots, BotRoots);
1399  if (ShouldTrackPressure) {
1400  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
1402  }
1403 }
1404 
1405 /// Move an instruction and update register pressure.
1406 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
1407  // Move the instruction to its new location in the instruction stream.
1408  MachineInstr *MI = SU->getInstr();
1409 
1410  if (IsTopNode) {
1411  assert(SU->isTopReady() && "node still has unscheduled dependencies");
1412  if (&*CurrentTop == MI)
1414  else {
1417  }
1418 
1419  if (ShouldTrackPressure) {
1420  // Update top scheduled pressure.
1421  RegisterOperands RegOpers;
1422  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1423  if (ShouldTrackLaneMasks) {
1424  // Adjust liveness and add missing dead+read-undef flags.
1425  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1426  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1427  } else {
1428  // Adjust for missing dead-def flags.
1429  RegOpers.detectDeadDefs(*MI, *LIS);
1430  }
1431 
1432  TopRPTracker.advance(RegOpers);
1433  assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
1434  LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
1436 
1438  }
1439  } else {
1440  assert(SU->isBottomReady() && "node still has unscheduled dependencies");
1441  MachineBasicBlock::iterator priorII =
1443  if (&*priorII == MI)
1444  CurrentBottom = priorII;
1445  else {
1446  if (&*CurrentTop == MI) {
1447  CurrentTop = nextIfDebug(++CurrentTop, priorII);
1449  }
1451  CurrentBottom = MI;
1453  }
1454  if (ShouldTrackPressure) {
1455  RegisterOperands RegOpers;
1456  RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
1457  if (ShouldTrackLaneMasks) {
1458  // Adjust liveness and add missing dead+read-undef flags.
1459  SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
1460  RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
1461  } else {
1462  // Adjust for missing dead-def flags.
1463  RegOpers.detectDeadDefs(*MI, *LIS);
1464  }
1465 
1469  BotRPTracker.recede(RegOpers, &LiveUses);
1470  assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
1471  LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
1473 
1475  updatePressureDiffs(LiveUses);
1476  }
1477  }
1478 }
1479 
1480 //===----------------------------------------------------------------------===//
1481 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
1482 //===----------------------------------------------------------------------===//
1483 
1484 namespace {
1485 
1486 /// Post-process the DAG to create cluster edges between neighboring
1487 /// loads or between neighboring stores.
1488 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
1489  struct MemOpInfo {
1490  SUnit *SU;
1492  int64_t Offset;
1493  unsigned Width;
1494 
1495  MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
1496  int64_t Offset, unsigned Width)
1497  : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
1498  Width(Width) {}
1499 
1500  static bool Compare(const MachineOperand *const &A,
1501  const MachineOperand *const &B) {
1502  if (A->getType() != B->getType())
1503  return A->getType() < B->getType();
1504  if (A->isReg())
1505  return A->getReg() < B->getReg();
1506  if (A->isFI()) {
1507  const MachineFunction &MF = *A->getParent()->getParent()->getParent();
1508  const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
1509  bool StackGrowsDown = TFI.getStackGrowthDirection() ==
1511  return StackGrowsDown ? A->getIndex() > B->getIndex()
1512  : A->getIndex() < B->getIndex();
1513  }
1514 
1515  llvm_unreachable("MemOpClusterMutation only supports register or frame "
1516  "index bases.");
1517  }
1518 
1519  bool operator<(const MemOpInfo &RHS) const {
1520  // FIXME: Don't compare everything twice. Maybe use C++20 three way
1521  // comparison instead when it's available.
1522  if (std::lexicographical_compare(BaseOps.begin(), BaseOps.end(),
1523  RHS.BaseOps.begin(), RHS.BaseOps.end(),
1524  Compare))
1525  return true;
1526  if (std::lexicographical_compare(RHS.BaseOps.begin(), RHS.BaseOps.end(),
1527  BaseOps.begin(), BaseOps.end(), Compare))
1528  return false;
1529  if (Offset != RHS.Offset)
1530  return Offset < RHS.Offset;
1531  return SU->NodeNum < RHS.SU->NodeNum;
1532  }
1533  };
1534 
1535  const TargetInstrInfo *TII;
1536  const TargetRegisterInfo *TRI;
1537  bool IsLoad;
1538 
1539 public:
1540  BaseMemOpClusterMutation(const TargetInstrInfo *tii,
1541  const TargetRegisterInfo *tri, bool IsLoad)
1542  : TII(tii), TRI(tri), IsLoad(IsLoad) {}
1543 
1544  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1545 
1546 protected:
1547  void clusterNeighboringMemOps(ArrayRef<MemOpInfo> MemOps, bool FastCluster,
1548  ScheduleDAGInstrs *DAG);
1549  void collectMemOpRecords(std::vector<SUnit> &SUnits,
1550  SmallVectorImpl<MemOpInfo> &MemOpRecords);
1551  bool groupMemOps(ArrayRef<MemOpInfo> MemOps, ScheduleDAGInstrs *DAG,
1552  DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups);
1553 };
1554 
1555 class StoreClusterMutation : public BaseMemOpClusterMutation {
1556 public:
1557  StoreClusterMutation(const TargetInstrInfo *tii,
1558  const TargetRegisterInfo *tri)
1559  : BaseMemOpClusterMutation(tii, tri, false) {}
1560 };
1561 
1562 class LoadClusterMutation : public BaseMemOpClusterMutation {
1563 public:
1564  LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
1565  : BaseMemOpClusterMutation(tii, tri, true) {}
1566 };
1567 
1568 } // end anonymous namespace
1569 
1570 namespace llvm {
1571 
1572 std::unique_ptr<ScheduleDAGMutation>
1574  const TargetRegisterInfo *TRI) {
1575  return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
1576  : nullptr;
1577 }
1578 
1579 std::unique_ptr<ScheduleDAGMutation>
1581  const TargetRegisterInfo *TRI) {
1582  return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
1583  : nullptr;
1584 }
1585 
1586 } // end namespace llvm
1587 
1588 // Sorting all the loads/stores first, then for each load/store, checking the
1589 // following load/store one by one, until reach the first non-dependent one and
1590 // call target hook to see if they can cluster.
1591 // If FastCluster is enabled, we assume that, all the loads/stores have been
1592 // preprocessed and now, they didn't have dependencies on each other.
1593 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
1594  ArrayRef<MemOpInfo> MemOpRecords, bool FastCluster,
1595  ScheduleDAGInstrs *DAG) {
1596  // Keep track of the current cluster length and bytes for each SUnit.
1598 
1599  // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
1600  // cluster mem ops collected within `MemOpRecords` array.
1601  for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
1602  // Decision to cluster mem ops is taken based on target dependent logic
1603  auto MemOpa = MemOpRecords[Idx];
1604 
1605  // Seek for the next load/store to do the cluster.
1606  unsigned NextIdx = Idx + 1;
1607  for (; NextIdx < End; ++NextIdx)
1608  // Skip if MemOpb has been clustered already or has dependency with
1609  // MemOpa.
1610  if (!SUnit2ClusterInfo.count(MemOpRecords[NextIdx].SU->NodeNum) &&
1611  (FastCluster ||
1612  (!DAG->IsReachable(MemOpRecords[NextIdx].SU, MemOpa.SU) &&
1613  !DAG->IsReachable(MemOpa.SU, MemOpRecords[NextIdx].SU))))
1614  break;
1615  if (NextIdx == End)
1616  continue;
1617 
1618  auto MemOpb = MemOpRecords[NextIdx];
1619  unsigned ClusterLength = 2;
1620  unsigned CurrentClusterBytes = MemOpa.Width + MemOpb.Width;
1621  if (SUnit2ClusterInfo.count(MemOpa.SU->NodeNum)) {
1622  ClusterLength = SUnit2ClusterInfo[MemOpa.SU->NodeNum].first + 1;
1623  CurrentClusterBytes =
1624  SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
1625  }
1626 
1627  if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
1628  CurrentClusterBytes))
1629  continue;
1630 
1631  SUnit *SUa = MemOpa.SU;
1632  SUnit *SUb = MemOpb.SU;
1633  if (SUa->NodeNum > SUb->NodeNum)
1634  std::swap(SUa, SUb);
1635 
1636  // FIXME: Is this check really required?
1637  if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)))
1638  continue;
1639 
1640  LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
1641  << SUb->NodeNum << ")\n");
1642  ++NumClustered;
1643 
1644  if (IsLoad) {
1645  // Copy successor edges from SUa to SUb. Interleaving computation
1646  // dependent on SUa can prevent load combining due to register reuse.
1647  // Predecessor edges do not need to be copied from SUb to SUa since
1648  // nearby loads should have effectively the same inputs.
1649  for (const SDep &Succ : SUa->Succs) {
1650  if (Succ.getSUnit() == SUb)
1651  continue;
1652  LLVM_DEBUG(dbgs() << " Copy Succ SU(" << Succ.getSUnit()->NodeNum
1653  << ")\n");
1654  DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
1655  }
1656  } else {
1657  // Copy predecessor edges from SUb to SUa to avoid the SUnits that
1658  // SUb dependent on scheduled in-between SUb and SUa. Successor edges
1659  // do not need to be copied from SUa to SUb since no one will depend
1660  // on stores.
1661  // Notice that, we don't need to care about the memory dependency as
1662  // we won't try to cluster them if they have any memory dependency.
1663  for (const SDep &Pred : SUb->Preds) {
1664  if (Pred.getSUnit() == SUa)
1665  continue;
1666  LLVM_DEBUG(dbgs() << " Copy Pred SU(" << Pred.getSUnit()->NodeNum
1667  << ")\n");
1668  DAG->addEdge(SUa, SDep(Pred.getSUnit(), SDep::Artificial));
1669  }
1670  }
1671 
1672  SUnit2ClusterInfo[MemOpb.SU->NodeNum] = {ClusterLength,
1673  CurrentClusterBytes};
1674 
1675  LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength
1676  << ", Curr cluster bytes: " << CurrentClusterBytes
1677  << "\n");
1678  }
1679 }
1680 
1681 void BaseMemOpClusterMutation::collectMemOpRecords(
1682  std::vector<SUnit> &SUnits, SmallVectorImpl<MemOpInfo> &MemOpRecords) {
1683  for (auto &SU : SUnits) {
1684  if ((IsLoad && !SU.getInstr()->mayLoad()) ||
1685  (!IsLoad && !SU.getInstr()->mayStore()))
1686  continue;
1687 
1688  const MachineInstr &MI = *SU.getInstr();
1690  int64_t Offset;
1691  bool OffsetIsScalable;
1692  unsigned Width;
1693  if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
1694  OffsetIsScalable, Width, TRI)) {
1695  MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width));
1696 
1697  LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
1698  << Offset << ", OffsetIsScalable: " << OffsetIsScalable
1699  << ", Width: " << Width << "\n");
1700  }
1701 #ifndef NDEBUG
1702  for (auto *Op : BaseOps)
1703  assert(Op);
1704 #endif
1705  }
1706 }
1707 
1708 bool BaseMemOpClusterMutation::groupMemOps(
1710  DenseMap<unsigned, SmallVector<MemOpInfo, 32>> &Groups) {
1711  bool FastCluster =
1712  ForceFastCluster ||
1713  MemOps.size() * DAG->SUnits.size() / 1000 > FastClusterThreshold;
1714 
1715  for (const auto &MemOp : MemOps) {
1716  unsigned ChainPredID = DAG->SUnits.size();
1717  if (FastCluster) {
1718  for (const SDep &Pred : MemOp.SU->Preds) {
1719  // We only want to cluster the mem ops that have the same ctrl(non-data)
1720  // pred so that they didn't have ctrl dependency for each other. But for
1721  // store instrs, we can still cluster them if the pred is load instr.
1722  if ((Pred.isCtrl() &&
1723  (IsLoad ||
1724  (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
1725  !Pred.isArtificial()) {
1726  ChainPredID = Pred.getSUnit()->NodeNum;
1727  break;
1728  }
1729  }
1730  } else
1731  ChainPredID = 0;
1732 
1733  Groups[ChainPredID].push_back(MemOp);
1734  }
1735  return FastCluster;
1736 }
1737 
1738 /// Callback from DAG postProcessing to create cluster edges for loads/stores.
1740  // Collect all the clusterable loads/stores
1741  SmallVector<MemOpInfo, 32> MemOpRecords;
1742  collectMemOpRecords(DAG->SUnits, MemOpRecords);
1743 
1744  if (MemOpRecords.size() < 2)
1745  return;
1746 
1747  // Put the loads/stores without dependency into the same group with some
1748  // heuristic if the DAG is too complex to avoid compiling time blow up.
1749  // Notice that, some fusion pair could be lost with this.
1751  bool FastCluster = groupMemOps(MemOpRecords, DAG, Groups);
1752 
1753  for (auto &Group : Groups) {
1754  // Sorting the loads/stores, so that, we can stop the cluster as early as
1755  // possible.
1756  llvm::sort(Group.second);
1757 
1758  // Trying to cluster all the neighboring loads/stores.
1759  clusterNeighboringMemOps(Group.second, FastCluster, DAG);
1760  }
1761 }
1762 
1763 //===----------------------------------------------------------------------===//
1764 // CopyConstrain - DAG post-processing to encourage copy elimination.
1765 //===----------------------------------------------------------------------===//
1766 
1767 namespace {
1768 
1769 /// Post-process the DAG to create weak edges from all uses of a copy to
1770 /// the one use that defines the copy's source vreg, most likely an induction
1771 /// variable increment.
1772 class CopyConstrain : public ScheduleDAGMutation {
1773  // Transient state.
1774  SlotIndex RegionBeginIdx;
1775 
1776  // RegionEndIdx is the slot index of the last non-debug instruction in the
1777  // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
1778  SlotIndex RegionEndIdx;
1779 
1780 public:
1781  CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
1782 
1783  void apply(ScheduleDAGInstrs *DAGInstrs) override;
1784 
1785 protected:
1786  void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
1787 };
1788 
1789 } // end anonymous namespace
1790 
1791 namespace llvm {
1792 
1793 std::unique_ptr<ScheduleDAGMutation>
1795  const TargetRegisterInfo *TRI) {
1796  return std::make_unique<CopyConstrain>(TII, TRI);
1797 }
1798 
1799 } // end namespace llvm
1800 
1801 /// constrainLocalCopy handles two possibilities:
1802 /// 1) Local src:
1803 /// I0: = dst
1804 /// I1: src = ...
1805 /// I2: = dst
1806 /// I3: dst = src (copy)
1807 /// (create pred->succ edges I0->I1, I2->I1)
1808 ///
1809 /// 2) Local copy:
1810 /// I0: dst = src (copy)
1811 /// I1: = dst
1812 /// I2: src = ...
1813 /// I3: = dst
1814 /// (create pred->succ edges I1->I2, I3->I2)
1815 ///
1816 /// Although the MachineScheduler is currently constrained to single blocks,
1817 /// this algorithm should handle extended blocks. An EBB is a set of
1818 /// contiguously numbered blocks such that the previous block in the EBB is
1819 /// always the single predecessor.
1820 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
1821  LiveIntervals *LIS = DAG->getLIS();
1822  MachineInstr *Copy = CopySU->getInstr();
1823 
1824  // Check for pure vreg copies.
1825  const MachineOperand &SrcOp = Copy->getOperand(1);
1826  Register SrcReg = SrcOp.getReg();
1827  if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
1828  return;
1829 
1830  const MachineOperand &DstOp = Copy->getOperand(0);
1831  Register DstReg = DstOp.getReg();
1832  if (!Register::isVirtualRegister(DstReg) || DstOp.isDead())
1833  return;
1834 
1835  // Check if either the dest or source is local. If it's live across a back
1836  // edge, it's not local. Note that if both vregs are live across the back
1837  // edge, we cannot successfully contrain the copy without cyclic scheduling.
1838  // If both the copy's source and dest are local live intervals, then we
1839  // should treat the dest as the global for the purpose of adding
1840  // constraints. This adds edges from source's other uses to the copy.
1841  unsigned LocalReg = SrcReg;
1842  unsigned GlobalReg = DstReg;
1843  LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
1844  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
1845  LocalReg = DstReg;
1846  GlobalReg = SrcReg;
1847  LocalLI = &LIS->getInterval(LocalReg);
1848  if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
1849  return;
1850  }
1851  LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
1852 
1853  // Find the global segment after the start of the local LI.
1854  LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
1855  // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
1856  // local live range. We could create edges from other global uses to the local
1857  // start, but the coalescer should have already eliminated these cases, so
1858  // don't bother dealing with it.
1859  if (GlobalSegment == GlobalLI->end())
1860  return;
1861 
1862  // If GlobalSegment is killed at the LocalLI->start, the call to find()
1863  // returned the next global segment. But if GlobalSegment overlaps with
1864  // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
1865  // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
1866  if (GlobalSegment->contains(LocalLI->beginIndex()))
1867  ++GlobalSegment;
1868 
1869  if (GlobalSegment == GlobalLI->end())
1870  return;
1871 
1872  // Check if GlobalLI contains a hole in the vicinity of LocalLI.
1873  if (GlobalSegment != GlobalLI->begin()) {
1874  // Two address defs have no hole.
1875  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
1876  GlobalSegment->start)) {
1877  return;
1878  }
1879  // If the prior global segment may be defined by the same two-address
1880  // instruction that also defines LocalLI, then can't make a hole here.
1881  if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
1882  LocalLI->beginIndex())) {
1883  return;
1884  }
1885  // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
1886  // it would be a disconnected component in the live range.
1887  assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
1888  "Disconnected LRG within the scheduling region.");
1889  }
1890  MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
1891  if (!GlobalDef)
1892  return;
1893 
1894  SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
1895  if (!GlobalSU)
1896  return;
1897 
1898  // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
1899  // constraining the uses of the last local def to precede GlobalDef.
1900  SmallVector<SUnit*,8> LocalUses;
1901  const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
1902  MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
1903  SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
1904  for (const SDep &Succ : LastLocalSU->Succs) {
1905  if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
1906  continue;
1907  if (Succ.getSUnit() == GlobalSU)
1908  continue;
1909  if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
1910  return;
1911  LocalUses.push_back(Succ.getSUnit());
1912  }
1913  // Open the top of the GlobalLI hole by constraining any earlier global uses
1914  // to precede the start of LocalLI.
1915  SmallVector<SUnit*,8> GlobalUses;
1916  MachineInstr *FirstLocalDef =
1917  LIS->getInstructionFromIndex(LocalLI->beginIndex());
1918  SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
1919  for (const SDep &Pred : GlobalSU->Preds) {
1920  if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
1921  continue;
1922  if (Pred.getSUnit() == FirstLocalSU)
1923  continue;
1924  if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
1925  return;
1926  GlobalUses.push_back(Pred.getSUnit());
1927  }
1928  LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
1929  // Add the weak edges.
1930  for (SUnit *LU : LocalUses) {
1931  LLVM_DEBUG(dbgs() << " Local use SU(" << LU->NodeNum << ") -> SU("
1932  << GlobalSU->NodeNum << ")\n");
1933  DAG->addEdge(GlobalSU, SDep(LU, SDep::Weak));
1934  }
1935  for (SUnit *GU : GlobalUses) {
1936  LLVM_DEBUG(dbgs() << " Global use SU(" << GU->NodeNum << ") -> SU("
1937  << FirstLocalSU->NodeNum << ")\n");
1938  DAG->addEdge(FirstLocalSU, SDep(GU, SDep::Weak));
1939  }
1940 }
1941 
1942 /// Callback from DAG postProcessing to create weak edges to encourage
1943 /// copy elimination.
1944 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
1945  ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
1946  assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
1947 
1948  MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
1949  if (FirstPos == DAG->end())
1950  return;
1951  RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
1952  RegionEndIdx = DAG->getLIS()->getInstructionIndex(
1953  *priorNonDebug(DAG->end(), DAG->begin()));
1954 
1955  for (SUnit &SU : DAG->SUnits) {
1956  if (!SU.getInstr()->isCopy())
1957  continue;
1958 
1959  constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
1960  }
1961 }
1962 
1963 //===----------------------------------------------------------------------===//
1964 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
1965 // and possibly other custom schedulers.
1966 //===----------------------------------------------------------------------===//
1967 
1968 static const unsigned InvalidCycle = ~0U;
1969 
1971 
1972 /// Given a Count of resource usage and a Latency value, return true if a
1973 /// SchedBoundary becomes resource limited.
1974 /// If we are checking after scheduling a node, we should return true when
1975 /// we just reach the resource limit.
1976 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
1977  unsigned Latency, bool AfterSchedNode) {
1978  int ResCntFactor = (int)(Count - (Latency * LFactor));
1979  if (AfterSchedNode)
1980  return ResCntFactor >= (int)LFactor;
1981  else
1982  return ResCntFactor > (int)LFactor;
1983 }
1984 
1986  // A new HazardRec is created for each DAG and owned by SchedBoundary.
1987  // Destroying and reconstructing it is very expensive though. So keep
1988  // invalid, placeholder HazardRecs.
1989  if (HazardRec && HazardRec->isEnabled()) {
1990  delete HazardRec;
1991  HazardRec = nullptr;
1992  }
1993  Available.clear();
1994  Pending.clear();
1995  CheckPending = false;
1996  CurrCycle = 0;
1997  CurrMOps = 0;
1998  MinReadyCycle = std::numeric_limits<unsigned>::max();
1999  ExpectedLatency = 0;
2000  DependentLatency = 0;
2001  RetiredMOps = 0;
2002  MaxExecutedResCount = 0;
2003  ZoneCritResIdx = 0;
2004  IsResourceLimited = false;
2005  ReservedCycles.clear();
2006  ReservedCyclesIndex.clear();
2007  ResourceGroupSubUnitMasks.clear();
2008 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
2009  // Track the maximum number of stall cycles that could arise either from the
2010  // latency of a DAG edge or the number of cycles that a processor resource is
2011  // reserved (SchedBoundary::ReservedCycles).
2012  MaxObservedStall = 0;
2013 #endif
2014  // Reserve a zero-count for invalid CritResIdx.
2015  ExecutedResCounts.resize(1);
2016  assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
2017 }
2018 
2019 void SchedRemainder::
2020 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
2021  reset();
2022  if (!SchedModel->hasInstrSchedModel())
2023  return;
2025  for (SUnit &SU : DAG->SUnits) {
2026  const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
2027  RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
2028  * SchedModel->getMicroOpFactor();
2030  PI = SchedModel->getWriteProcResBegin(SC),
2031  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2032  unsigned PIdx = PI->ProcResourceIdx;
2033  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2034  RemainingCounts[PIdx] += (Factor * PI->Cycles);
2035  }
2036  }
2037 }
2038 
2039 void SchedBoundary::
2041  reset();
2042  DAG = dag;
2043  SchedModel = smodel;
2044  Rem = rem;
2045  if (SchedModel->hasInstrSchedModel()) {
2046  unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
2047  ReservedCyclesIndex.resize(ResourceCount);
2048  ExecutedResCounts.resize(ResourceCount);
2049  ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0));
2050  unsigned NumUnits = 0;
2051 
2052  for (unsigned i = 0; i < ResourceCount; ++i) {
2053  ReservedCyclesIndex[i] = NumUnits;
2054  NumUnits += SchedModel->getProcResource(i)->NumUnits;
2055  if (isUnbufferedGroup(i)) {
2056  auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin;
2057  for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits;
2058  U != UE; ++U)
2059  ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]);
2060  }
2061  }
2062 
2063  ReservedCycles.resize(NumUnits, InvalidCycle);
2064  }
2065 }
2066 
2067 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
2068 /// these "soft stalls" differently than the hard stall cycles based on CPU
2069 /// resources and computed by checkHazard(). A fully in-order model
2070 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
2071 /// available for scheduling until they are ready. However, a weaker in-order
2072 /// model may use this for heuristics. For example, if a processor has in-order
2073 /// behavior when reading certain resources, this may come into play.
2075  if (!SU->isUnbuffered)
2076  return 0;
2077 
2078  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2079  if (ReadyCycle > CurrCycle)
2080  return ReadyCycle - CurrCycle;
2081  return 0;
2082 }
2083 
2084 /// Compute the next cycle at which the given processor resource unit
2085 /// can be scheduled.
2087  unsigned Cycles) {
2088  unsigned NextUnreserved = ReservedCycles[InstanceIdx];
2089  // If this resource has never been used, always return cycle zero.
2090  if (NextUnreserved == InvalidCycle)
2091  return 0;
2092  // For bottom-up scheduling add the cycles needed for the current operation.
2093  if (!isTop())
2094  NextUnreserved += Cycles;
2095  return NextUnreserved;
2096 }
2097 
2098 /// Compute the next cycle at which the given processor resource can be
2099 /// scheduled. Returns the next cycle and the index of the processor resource
2100 /// instance in the reserved cycles vector.
2101 std::pair<unsigned, unsigned>
2103  unsigned Cycles) {
2104 
2105  unsigned MinNextUnreserved = InvalidCycle;
2106  unsigned InstanceIdx = 0;
2107  unsigned StartIndex = ReservedCyclesIndex[PIdx];
2108  unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
2109  assert(NumberOfInstances > 0 &&
2110  "Cannot have zero instances of a ProcResource");
2111 
2112  if (isUnbufferedGroup(PIdx)) {
2113  // If any subunits are used by the instruction, report that the resource
2114  // group is available at 0, effectively removing the group record from
2115  // hazarding and basing the hazarding decisions on the subunit records.
2116  // Otherwise, choose the first available instance from among the subunits.
2117  // Specifications which assign cycles to both the subunits and the group or
2118  // which use an unbuffered group with buffered subunits will appear to
2119  // schedule strangely. In the first case, the additional cycles for the
2120  // group will be ignored. In the second, the group will be ignored
2121  // entirely.
2122  for (const MCWriteProcResEntry &PE :
2125  if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx])
2126  return std::make_pair(0u, StartIndex);
2127 
2128  auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin;
2129  for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
2130  unsigned NextUnreserved, NextInstanceIdx;
2131  std::tie(NextUnreserved, NextInstanceIdx) =
2132  getNextResourceCycle(SC, SubUnits[I], Cycles);
2133  if (MinNextUnreserved > NextUnreserved) {
2134  InstanceIdx = NextInstanceIdx;
2135  MinNextUnreserved = NextUnreserved;
2136  }
2137  }
2138  return std::make_pair(MinNextUnreserved, InstanceIdx);
2139  }
2140 
2141  for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
2142  ++I) {
2143  unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
2144  if (MinNextUnreserved > NextUnreserved) {
2145  InstanceIdx = I;
2146  MinNextUnreserved = NextUnreserved;
2147  }
2148  }
2149  return std::make_pair(MinNextUnreserved, InstanceIdx);
2150 }
2151 
2152 /// Does this SU have a hazard within the current instruction group.
2153 ///
2154 /// The scheduler supports two modes of hazard recognition. The first is the
2155 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
2156 /// supports highly complicated in-order reservation tables
2157 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
2158 ///
2159 /// The second is a streamlined mechanism that checks for hazards based on
2160 /// simple counters that the scheduler itself maintains. It explicitly checks
2161 /// for instruction dispatch limitations, including the number of micro-ops that
2162 /// can dispatch per cycle.
2163 ///
2164 /// TODO: Also check whether the SU must start a new group.
2166  if (HazardRec->isEnabled()
2168  return true;
2169  }
2170 
2171  unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
2172  if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
2173  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops="
2174  << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
2175  return true;
2176  }
2177 
2178  if (CurrMOps > 0 &&
2179  ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
2180  (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
2181  LLVM_DEBUG(dbgs() << " hazard: SU(" << SU->NodeNum << ") must "
2182  << (isTop() ? "begin" : "end") << " group\n");
2183  return true;
2184  }
2185 
2187  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2188  for (const MCWriteProcResEntry &PE :
2191  unsigned ResIdx = PE.ProcResourceIdx;
2192  unsigned Cycles = PE.Cycles;
2193  unsigned NRCycle, InstanceIdx;
2194  std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles);
2195  if (NRCycle > CurrCycle) {
2196 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
2197  MaxObservedStall = std::max(Cycles, MaxObservedStall);
2198 #endif
2199  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
2200  << SchedModel->getResourceName(ResIdx)
2201  << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
2202  << "=" << NRCycle << "c\n");
2203  return true;
2204  }
2205  }
2206  }
2207  return false;
2208 }
2209 
2210 // Find the unscheduled node in ReadySUs with the highest latency.
2211 unsigned SchedBoundary::
2213  SUnit *LateSU = nullptr;
2214  unsigned RemLatency = 0;
2215  for (SUnit *SU : ReadySUs) {
2216  unsigned L = getUnscheduledLatency(SU);
2217  if (L > RemLatency) {
2218  RemLatency = L;
2219  LateSU = SU;
2220  }
2221  }
2222  if (LateSU) {
2223  LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
2224  << LateSU->NodeNum << ") " << RemLatency << "c\n");
2225  }
2226  return RemLatency;
2227 }
2228 
2229 // Count resources in this zone and the remaining unscheduled
2230 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
2231 // resource index, or zero if the zone is issue limited.
2232 unsigned SchedBoundary::
2233 getOtherResourceCount(unsigned &OtherCritIdx) {
2234  OtherCritIdx = 0;
2236  return 0;
2237 
2238  unsigned OtherCritCount = Rem->RemIssueCount
2239  + (RetiredMOps * SchedModel->getMicroOpFactor());
2240  LLVM_DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: "
2241  << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
2242  for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
2243  PIdx != PEnd; ++PIdx) {
2244  unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
2245  if (OtherCount > OtherCritCount) {
2246  OtherCritCount = OtherCount;
2247  OtherCritIdx = PIdx;
2248  }
2249  }
2250  if (OtherCritIdx) {
2251  LLVM_DEBUG(
2252  dbgs() << " " << Available.getName() << " + Remain CritRes: "
2253  << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
2254  << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
2255  }
2256  return OtherCritCount;
2257 }
2258 
2259 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue,
2260  unsigned Idx) {
2261  assert(SU->getInstr() && "Scheduled SUnit must have instr");
2262 
2263 #if LLVM_ENABLE_ABI_BREAKING_CHECKS
2264  // ReadyCycle was been bumped up to the CurrCycle when this node was
2265  // scheduled, but CurrCycle may have been eagerly advanced immediately after
2266  // scheduling, so may now be greater than ReadyCycle.
2267  if (ReadyCycle > CurrCycle)
2268  MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
2269 #endif
2270 
2271  if (ReadyCycle < MinReadyCycle)
2272  MinReadyCycle = ReadyCycle;
2273 
2274  // Check for interlocks first. For the purpose of other heuristics, an
2275  // instruction that cannot issue appears as if it's not in the ReadyQueue.
2276  bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
2277  bool HazardDetected = (!IsBuffered && ReadyCycle > CurrCycle) ||
2278  checkHazard(SU) || (Available.size() >= ReadyListLimit);
2279 
2280  if (!HazardDetected) {
2281  Available.push(SU);
2282 
2283  if (InPQueue)
2284  Pending.remove(Pending.begin() + Idx);
2285  return;
2286  }
2287 
2288  if (!InPQueue)
2289  Pending.push(SU);
2290 }
2291 
2292 /// Move the boundary of scheduled code by one cycle.
2293 void SchedBoundary::bumpCycle(unsigned NextCycle) {
2294  if (SchedModel->getMicroOpBufferSize() == 0) {
2295  assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
2296  "MinReadyCycle uninitialized");
2297  if (MinReadyCycle > NextCycle)
2298  NextCycle = MinReadyCycle;
2299  }
2300  // Update the current micro-ops, which will issue in the next cycle.
2301  unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
2302  CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
2303 
2304  // Decrement DependentLatency based on the next cycle.
2305  if ((NextCycle - CurrCycle) > DependentLatency)
2306  DependentLatency = 0;
2307  else
2308  DependentLatency -= (NextCycle - CurrCycle);
2309 
2310  if (!HazardRec->isEnabled()) {
2311  // Bypass HazardRec virtual calls.
2312  CurrCycle = NextCycle;
2313  } else {
2314  // Bypass getHazardType calls in case of long latency.
2315  for (; CurrCycle != NextCycle; ++CurrCycle) {
2316  if (isTop())
2318  else
2320  }
2321  }
2322  CheckPending = true;
2323  IsResourceLimited =
2325  getScheduledLatency(), true);
2326 
2327  LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
2328  << '\n');
2329 }
2330 
2331 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
2332  ExecutedResCounts[PIdx] += Count;
2333  if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
2334  MaxExecutedResCount = ExecutedResCounts[PIdx];
2335 }
2336 
2337 /// Add the given processor resource to this scheduled zone.
2338 ///
2339 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
2340 /// during which this resource is consumed.
2341 ///
2342 /// \return the next cycle at which the instruction may execute without
2343 /// oversubscribing resources.
2344 unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
2345  unsigned Cycles, unsigned NextCycle) {
2346  unsigned Factor = SchedModel->getResourceFactor(PIdx);
2347  unsigned Count = Factor * Cycles;
2348  LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
2349  << Cycles << "x" << Factor << "u\n");
2350 
2351  // Update Executed resources counts.
2352  incExecutedResources(PIdx, Count);
2353  assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
2354  Rem->RemainingCounts[PIdx] -= Count;
2355 
2356  // Check if this resource exceeds the current critical resource. If so, it
2357  // becomes the critical resource.
2358  if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
2359  ZoneCritResIdx = PIdx;
2360  LLVM_DEBUG(dbgs() << " *** Critical resource "
2361  << SchedModel->getResourceName(PIdx) << ": "
2363  << "c\n");
2364  }
2365  // For reserved resources, record the highest cycle using the resource.
2366  unsigned NextAvailable, InstanceIdx;
2367  std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(SC, PIdx, Cycles);
2368  if (NextAvailable > CurrCycle) {
2369  LLVM_DEBUG(dbgs() << " Resource conflict: "
2370  << SchedModel->getResourceName(PIdx)
2371  << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
2372  << " reserved until @" << NextAvailable << "\n");
2373  }
2374  return NextAvailable;
2375 }
2376 
2377 /// Move the boundary of scheduled code by one SUnit.
2379  // Update the reservation table.
2380  if (HazardRec->isEnabled()) {
2381  if (!isTop() && SU->isCall) {
2382  // Calls are scheduled with their preceding instructions. For bottom-up
2383  // scheduling, clear the pipeline state before emitting.
2384  HazardRec->Reset();
2385  }
2387  // Scheduling an instruction may have made pending instructions available.
2388  CheckPending = true;
2389  }
2390  // checkHazard should prevent scheduling multiple instructions per cycle that
2391  // exceed the issue width.
2392  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2393  unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
2394  assert(
2395  (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
2396  "Cannot schedule this instruction's MicroOps in the current cycle.");
2397 
2398  unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
2399  LLVM_DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n");
2400 
2401  unsigned NextCycle = CurrCycle;
2402  switch (SchedModel->getMicroOpBufferSize()) {
2403  case 0:
2404  assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
2405  break;
2406  case 1:
2407  if (ReadyCycle > NextCycle) {
2408  NextCycle = ReadyCycle;
2409  LLVM_DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n");
2410  }
2411  break;
2412  default:
2413  // We don't currently model the OOO reorder buffer, so consider all
2414  // scheduled MOps to be "retired". We do loosely model in-order resource
2415  // latency. If this instruction uses an in-order resource, account for any
2416  // likely stall cycles.
2417  if (SU->isUnbuffered && ReadyCycle > NextCycle)
2418  NextCycle = ReadyCycle;
2419  break;
2420  }
2421  RetiredMOps += IncMOps;
2422 
2423  // Update resource counts and critical resource.
2424  if (SchedModel->hasInstrSchedModel()) {
2425  unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
2426  assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
2427  Rem->RemIssueCount -= DecRemIssue;
2428  if (ZoneCritResIdx) {
2429  // Scale scheduled micro-ops for comparing with the critical resource.
2430  unsigned ScaledMOps =
2431  RetiredMOps * SchedModel->getMicroOpFactor();
2432 
2433  // If scaled micro-ops are now more than the previous critical resource by
2434  // a full cycle, then micro-ops issue becomes critical.
2435  if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
2436  >= (int)SchedModel->getLatencyFactor()) {
2437  ZoneCritResIdx = 0;
2438  LLVM_DEBUG(dbgs() << " *** Critical resource NumMicroOps: "
2439  << ScaledMOps / SchedModel->getLatencyFactor()
2440  << "c\n");
2441  }
2442  }
2445  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2446  unsigned RCycle =
2447  countResource(SC, PI->ProcResourceIdx, PI->Cycles, NextCycle);
2448  if (RCycle > NextCycle)
2449  NextCycle = RCycle;
2450  }
2451  if (SU->hasReservedResource) {
2452  // For reserved resources, record the highest cycle using the resource.
2453  // For top-down scheduling, this is the cycle in which we schedule this
2454  // instruction plus the number of cycles the operations reserves the
2455  // resource. For bottom-up is it simply the instruction's cycle.
2458  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2459  unsigned PIdx = PI->ProcResourceIdx;
2460  if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2461  unsigned ReservedUntil, InstanceIdx;
2462  std::tie(ReservedUntil, InstanceIdx) =
2463  getNextResourceCycle(SC, PIdx, 0);
2464  if (isTop()) {
2465  ReservedCycles[InstanceIdx] =
2466  std::max(ReservedUntil, NextCycle + PI->Cycles);
2467  } else
2468  ReservedCycles[InstanceIdx] = NextCycle;
2469  }
2470  }
2471  }
2472  }
2473  // Update ExpectedLatency and DependentLatency.
2474  unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
2475  unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
2476  if (SU->getDepth() > TopLatency) {
2477  TopLatency = SU->getDepth();
2478  LLVM_DEBUG(dbgs() << " " << Available.getName() << " TopLatency SU("
2479  << SU->NodeNum << ") " << TopLatency << "c\n");
2480  }
2481  if (SU->getHeight() > BotLatency) {
2482  BotLatency = SU->getHeight();
2483  LLVM_DEBUG(dbgs() << " " << Available.getName() << " BotLatency SU("
2484  << SU->NodeNum << ") " << BotLatency << "c\n");
2485  }
2486  // If we stall for any reason, bump the cycle.
2487  if (NextCycle > CurrCycle)
2488  bumpCycle(NextCycle);
2489  else
2490  // After updating ZoneCritResIdx and ExpectedLatency, check if we're
2491  // resource limited. If a stall occurred, bumpCycle does this.
2492  IsResourceLimited =
2494  getScheduledLatency(), true);
2495 
2496  // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
2497  // resets CurrMOps. Loop to handle instructions with more MOps than issue in
2498  // one cycle. Since we commonly reach the max MOps here, opportunistically
2499  // bump the cycle to avoid uselessly checking everything in the readyQ.
2500  CurrMOps += IncMOps;
2501 
2502  // Bump the cycle count for issue group constraints.
2503  // This must be done after NextCycle has been adjust for all other stalls.
2504  // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
2505  // currCycle to X.
2506  if ((isTop() && SchedModel->mustEndGroup(SU->getInstr())) ||
2507  (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
2508  LLVM_DEBUG(dbgs() << " Bump cycle to " << (isTop() ? "end" : "begin")
2509  << " group\n");
2510  bumpCycle(++NextCycle);
2511  }
2512 
2513  while (CurrMOps >= SchedModel->getIssueWidth()) {
2514  LLVM_DEBUG(dbgs() << " *** Max MOps " << CurrMOps << " at cycle "
2515  << CurrCycle << '\n');
2516  bumpCycle(++NextCycle);
2517  }
2519 }
2520 
2521 /// Release pending ready nodes in to the available queue. This makes them
2522 /// visible to heuristics.
2524  // If the available queue is empty, it is safe to reset MinReadyCycle.
2525  if (Available.empty())
2526  MinReadyCycle = std::numeric_limits<unsigned>::max();
2527 
2528  // Check to see if any of the pending instructions are ready to issue. If
2529  // so, add them to the available queue.
2530  for (unsigned I = 0, E = Pending.size(); I < E; ++I) {
2531  SUnit *SU = *(Pending.begin() + I);
2532  unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
2533 
2534  if (ReadyCycle < MinReadyCycle)
2535  MinReadyCycle = ReadyCycle;
2536 
2537  if (Available.size() >= ReadyListLimit)
2538  break;
2539 
2540  releaseNode(SU, ReadyCycle, true, I);
2541  if (E != Pending.size()) {
2542  --I;
2543  --E;
2544  }
2545  }
2546  CheckPending = false;
2547 }
2548 
2549 /// Remove SU from the ready set for this boundary.
2551  if (Available.isInQueue(SU))
2553  else {
2554  assert(Pending.isInQueue(SU) && "bad ready count");
2555  Pending.remove(Pending.find(SU));
2556  }
2557 }
2558 
2559 /// If this queue only has one ready candidate, return it. As a side effect,
2560 /// defer any nodes that now hit a hazard, and advance the cycle until at least
2561 /// one node is ready. If multiple instructions are ready, return NULL.
2563  if (CheckPending)
2564  releasePending();
2565 
2566  // Defer any ready instrs that now have a hazard.
2567  for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
2568  if (checkHazard(*I)) {
2569  Pending.push(*I);
2570  I = Available.remove(I);
2571  continue;
2572  }
2573  ++I;
2574  }
2575  for (unsigned i = 0; Available.empty(); ++i) {
2576 // FIXME: Re-enable assert once PR20057 is resolved.
2577 // assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
2578 // "permanent hazard");
2579  (void)i;
2580  bumpCycle(CurrCycle + 1);
2581  releasePending();
2582  }
2583 
2584  LLVM_DEBUG(Pending.dump());
2586 
2587  if (Available.size() == 1)
2588  return *Available.begin();
2589  return nullptr;
2590 }
2591 
2592 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2593 // This is useful information to dump after bumpNode.
2594 // Note that the Queue contents are more useful before pickNodeFromQueue.
2596  unsigned ResFactor;
2597  unsigned ResCount;
2598  if (ZoneCritResIdx) {
2599  ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
2600  ResCount = getResourceCount(ZoneCritResIdx);
2601  } else {
2602  ResFactor = SchedModel->getMicroOpFactor();
2603  ResCount = RetiredMOps * ResFactor;
2604  }
2605  unsigned LFactor = SchedModel->getLatencyFactor();
2606  dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
2607  << " Retired: " << RetiredMOps;
2608  dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c";
2609  dbgs() << "\n Critical: " << ResCount / LFactor << "c, "
2610  << ResCount / ResFactor << " "
2611  << SchedModel->getResourceName(ZoneCritResIdx)
2612  << "\n ExpectedLatency: " << ExpectedLatency << "c\n"
2613  << (IsResourceLimited ? " - Resource" : " - Latency")
2614  << " limited.\n";
2615 }
2616 #endif
2617 
2618 //===----------------------------------------------------------------------===//
2619 // GenericScheduler - Generic implementation of MachineSchedStrategy.
2620 //===----------------------------------------------------------------------===//
2621 
2624  const TargetSchedModel *SchedModel) {
2626  return;
2627 
2628  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
2631  PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
2632  if (PI->ProcResourceIdx == Policy.ReduceResIdx)
2633  ResDelta.CritResources += PI->Cycles;
2634  if (PI->ProcResourceIdx == Policy.DemandResIdx)
2635  ResDelta.DemandedResources += PI->Cycles;
2636  }
2637 }
2638 
2639 /// Compute remaining latency. We need this both to determine whether the
2640 /// overall schedule has become latency-limited and whether the instructions
2641 /// outside this zone are resource or latency limited.
2642 ///
2643 /// The "dependent" latency is updated incrementally during scheduling as the
2644 /// max height/depth of scheduled nodes minus the cycles since it was
2645 /// scheduled:
2646 /// DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
2647 ///
2648 /// The "independent" latency is the max ready queue depth:
2649 /// ILat = max N.depth for N in Available|Pending
2650 ///
2651 /// RemainingLatency is the greater of independent and dependent latency.
2652 ///
2653 /// These computations are expensive, especially in DAGs with many edges, so
2654 /// only do them if necessary.
2655 static unsigned computeRemLatency(SchedBoundary &CurrZone) {
2656  unsigned RemLatency = CurrZone.getDependentLatency();
2657  RemLatency = std::max(RemLatency,
2658  CurrZone.findMaxLatency(CurrZone.Available.elements()));
2659  RemLatency = std::max(RemLatency,
2660  CurrZone.findMaxLatency(CurrZone.Pending.elements()));
2661  return RemLatency;
2662 }
2663 
2664 /// Returns true if the current cycle plus remaning latency is greater than
2665 /// the critical path in the scheduling region.
2666 bool GenericSchedulerBase::shouldReduceLatency(const CandPolicy &Policy,
2667  SchedBoundary &CurrZone,
2668  bool ComputeRemLatency,
2669  unsigned &RemLatency) const {
2670  // The current cycle is already greater than the critical path, so we are
2671  // already latency limited and don't need to compute the remaining latency.
2672  if (CurrZone.getCurrCycle() > Rem.CriticalPath)
2673  return true;
2674 
2675  // If we haven't scheduled anything yet, then we aren't latency limited.
2676  if (CurrZone.getCurrCycle() == 0)
2677  return false;
2678 
2679  if (ComputeRemLatency)
2680  RemLatency = computeRemLatency(CurrZone);
2681 
2682  return RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath;
2683 }
2684 
2685 /// Set the CandPolicy given a scheduling zone given the current resources and
2686 /// latencies inside and outside the zone.
2687 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
2688  SchedBoundary &CurrZone,
2689  SchedBoundary *OtherZone) {
2690  // Apply preemptive heuristics based on the total latency and resources
2691  // inside and outside this zone. Potential stalls should be considered before
2692  // following this policy.
2693 
2694  // Compute the critical resource outside the zone.
2695  unsigned OtherCritIdx = 0;
2696  unsigned OtherCount =
2697  OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
2698 
2699  bool OtherResLimited = false;
2700  unsigned RemLatency = 0;
2701  bool RemLatencyComputed = false;
2702  if (SchedModel->hasInstrSchedModel() && OtherCount != 0) {
2703  RemLatency = computeRemLatency(CurrZone);
2704  RemLatencyComputed = true;
2705  OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
2706  OtherCount, RemLatency, false);
2707  }
2708 
2709  // Schedule aggressively for latency in PostRA mode. We don't check for
2710  // acyclic latency during PostRA, and highly out-of-order processors will
2711  // skip PostRA scheduling.
2712  if (!OtherResLimited &&
2713  (IsPostRA || shouldReduceLatency(Policy, CurrZone, !RemLatencyComputed,
2714  RemLatency))) {
2715  Policy.ReduceLatency |= true;
2716  LLVM_DEBUG(dbgs() << " " << CurrZone.Available.getName()
2717  << " RemainingLatency " << RemLatency << " + "
2718  << CurrZone.getCurrCycle() << "c > CritPath "
2719  << Rem.CriticalPath << "\n");
2720  }
2721  // If the same resource is limiting inside and outside the zone, do nothing.
2722  if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
2723  return;
2724 
2725  LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
2726  dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: "
2727  << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
2728  } if (OtherResLimited) dbgs()
2729  << " RemainingLimit: "
2730  << SchedModel->getResourceName(OtherCritIdx) << "\n";
2731  if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
2732  << " Latency limited both directions.\n");
2733 
2734  if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
2735  Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
2736 
2737  if (OtherResLimited)
2738  Policy.DemandResIdx = OtherCritIdx;
2739 }
2740 
2741 #ifndef NDEBUG
2744  switch (Reason) {
2745  case NoCand: return "NOCAND ";
2746  case Only1: return "ONLY1 ";
2747  case PhysReg: return "PHYS-REG ";
2748  case RegExcess: return "REG-EXCESS";
2749  case RegCritical: return "REG-CRIT ";
2750  case Stall: return "STALL ";
2751  case Cluster: return "CLUSTER ";
2752  case Weak: return "WEAK ";
2753  case RegMax: return "REG-MAX ";
2754  case ResourceReduce: return "RES-REDUCE";
2755  case ResourceDemand: return "RES-DEMAND";
2756  case TopDepthReduce: return "TOP-DEPTH ";
2757  case TopPathReduce: return "TOP-PATH ";
2758  case BotHeightReduce:return "BOT-HEIGHT";
2759  case BotPathReduce: return "BOT-PATH ";
2760  case NextDefUse: return "DEF-USE ";
2761  case NodeOrder: return "ORDER ";
2762  };
2763  llvm_unreachable("Unknown reason!");
2764 }
2765 
2767  PressureChange P;
2768  unsigned ResIdx = 0;
2769  unsigned Latency = 0;
2770  switch (Cand.Reason) {
2771  default:
2772  break;
2773  case RegExcess:
2774  P = Cand.RPDelta.Excess;
2775  break;
2776  case RegCritical:
2777  P = Cand.RPDelta.CriticalMax;
2778  break;
2779  case RegMax:
2780  P = Cand.RPDelta.CurrentMax;
2781  break;
2782  case ResourceReduce:
2783  ResIdx = Cand.Policy.ReduceResIdx;
2784  break;
2785  case ResourceDemand:
2786  ResIdx = Cand.Policy.DemandResIdx;
2787  break;
2788  case TopDepthReduce:
2789  Latency = Cand.SU->getDepth();
2790  break;
2791  case TopPathReduce:
2792  Latency = Cand.SU->getHeight();
2793  break;
2794  case BotHeightReduce:
2795  Latency = Cand.SU->getHeight();
2796  break;
2797  case BotPathReduce:
2798  Latency = Cand.SU->getDepth();
2799  break;
2800  }
2801  dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
2802  if (P.isValid())
2803  dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
2804  << ":" << P.getUnitInc() << " ";
2805  else
2806  dbgs() << " ";
2807  if (ResIdx)
2808  dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
2809  else
2810  dbgs() << " ";
2811  if (Latency)
2812  dbgs() << " " << Latency << " cycles ";
2813  else
2814  dbgs() << " ";
2815  dbgs() << '\n';
2816 }
2817 #endif
2818 
2819 namespace llvm {
2820 /// Return true if this heuristic determines order.
2821 /// TODO: Consider refactor return type of these functions as integer or enum,
2822 /// as we may need to differentiate whether TryCand is better than Cand.
2823 bool tryLess(int TryVal, int CandVal,
2827  if (TryVal < CandVal) {
2828  TryCand.Reason = Reason;
2829  return true;
2830  }
2831  if (TryVal > CandVal) {
2832  if (Cand.Reason > Reason)
2833  Cand.Reason = Reason;
2834  return true;
2835  }
2836  return false;
2837 }
2838 
2839 bool tryGreater(int TryVal, int CandVal,
2843  if (TryVal > CandVal) {
2844  TryCand.Reason = Reason;
2845  return true;
2846  }
2847  if (TryVal < CandVal) {
2848  if (Cand.Reason > Reason)
2849  Cand.Reason = Reason;
2850  return true;
2851  }
2852  return false;
2853 }
2854 
2857  SchedBoundary &Zone) {
2858  if (Zone.isTop()) {
2859  // Prefer the candidate with the lesser depth, but only if one of them has
2860  // depth greater than the total latency scheduled so far, otherwise either
2861  // of them could be scheduled now with no stall.
2862  if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
2863  Zone.getScheduledLatency()) {
2864  if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2865  TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
2866  return true;
2867  }
2868  if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2869  TryCand, Cand, GenericSchedulerBase::TopPathReduce))
2870  return true;
2871  } else {
2872  // Prefer the candidate with the lesser height, but only if one of them has
2873  // height greater than the total latency scheduled so far, otherwise either
2874  // of them could be scheduled now with no stall.
2875  if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
2876  Zone.getScheduledLatency()) {
2877  if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
2878  TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
2879  return true;
2880  }
2881  if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
2882  TryCand, Cand, GenericSchedulerBase::BotPathReduce))
2883  return true;
2884  }
2885  return false;
2886 }
2887 } // end namespace llvm
2888 
2889 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
2890  LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
2891  << GenericSchedulerBase::getReasonStr(Reason) << '\n');
2892 }
2893 
2895  tracePick(Cand.Reason, Cand.AtTop);
2896 }
2897 
2899  assert(dag->hasVRegLiveness() &&
2900  "(PreRA)GenericScheduler needs vreg liveness");
2901  DAG = static_cast<ScheduleDAGMILive*>(dag);
2902  SchedModel = DAG->getSchedModel();
2903  TRI = DAG->TRI;
2904 
2905  if (RegionPolicy.ComputeDFSResult)
2906  DAG->computeDFSResult();
2907 
2908  Rem.init(DAG, SchedModel);
2909  Top.init(DAG, SchedModel, &Rem);
2910  Bot.init(DAG, SchedModel, &Rem);
2911 
2912  // Initialize resource counts.
2913 
2914  // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
2915  // are disabled, then these HazardRecs will be disabled.
2917  if (!Top.HazardRec) {
2918  Top.HazardRec =
2920  Itin, DAG);
2921  }
2922  if (!Bot.HazardRec) {
2923  Bot.HazardRec =
2925  Itin, DAG);
2926  }
2927  TopCand.SU = nullptr;
2928  BotCand.SU = nullptr;
2929 }
2930 
2931 /// Initialize the per-region scheduling policy.
2934  unsigned NumRegionInstrs) {
2935  const MachineFunction &MF = *Begin->getMF();
2936  const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
2937 
2938  // Avoid setting up the register pressure tracker for small regions to save
2939  // compile time. As a rough heuristic, only track pressure when the number of
2940  // schedulable instructions exceeds half the integer register file.
2941  RegionPolicy.ShouldTrackPressure = true;
2942  for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
2943  MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
2944  if (TLI->isTypeLegal(LegalIntVT)) {
2945  unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
2946  TLI->getRegClassFor(LegalIntVT));
2947  RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
2948  }
2949  }
2950 
2951  // For generic targets, we default to bottom-up, because it's simpler and more
2952  // compile-time optimizations have been implemented in that direction.
2953  RegionPolicy.OnlyBottomUp = true;
2954 
2955  // Allow the subtarget to override default policy.
2956  MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
2957 
2958  // After subtarget overrides, apply command line options.
2959  if (!EnableRegPressure) {
2960  RegionPolicy.ShouldTrackPressure = false;
2961  RegionPolicy.ShouldTrackLaneMasks = false;
2962  }
2963 
2964  // Check -misched-topdown/bottomup can force or unforce scheduling direction.
2965  // e.g. -misched-bottomup=false allows scheduling in both directions.
2967  "-misched-topdown incompatible with -misched-bottomup");
2968  if (ForceBottomUp.getNumOccurrences() > 0) {
2969  RegionPolicy.OnlyBottomUp = ForceBottomUp;
2970  if (RegionPolicy.OnlyBottomUp)
2971  RegionPolicy.OnlyTopDown = false;
2972  }
2973  if (ForceTopDown.getNumOccurrences() > 0) {
2974  RegionPolicy.OnlyTopDown = ForceTopDown;
2975  if (RegionPolicy.OnlyTopDown)
2976  RegionPolicy.OnlyBottomUp = false;
2977  }
2978 }
2979 
2981  // Cannot completely remove virtual function even in release mode.
2982 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2983  dbgs() << "GenericScheduler RegionPolicy: "
2984  << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
2985  << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
2986  << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
2987  << "\n";
2988 #endif
2989 }
2990 
2991 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
2992 /// critical path by more cycles than it takes to drain the instruction buffer.
2993 /// We estimate an upper bounds on in-flight instructions as:
2994 ///
2995 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
2996 /// InFlightIterations = AcyclicPath / CyclesPerIteration
2997 /// InFlightResources = InFlightIterations * LoopResources
2998 ///
2999 /// TODO: Check execution resources in addition to IssueCount.
3002  return;
3003 
3004  // Scaled number of cycles per loop iteration.
3005  unsigned IterCount =
3007  Rem.RemIssueCount);
3008  // Scaled acyclic critical path.
3009  unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
3010  // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
3011  unsigned InFlightCount =
3012  (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
3013  unsigned BufferLimit =
3015 
3016  Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
3017 
3018  LLVM_DEBUG(
3019  dbgs() << "IssueCycles="
3021  << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
3022  << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
3023  << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
3024  << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
3025  if (Rem.IsAcyclicLatencyLimited) dbgs() << " ACYCLIC LATENCY LIMIT\n");
3026 }
3027 
3029  Rem.CriticalPath = DAG->ExitSU.getDepth();
3030 
3031  // Some roots may not feed into ExitSU. Check all of them in case.
3032  for (const SUnit *SU : Bot.Available) {
3033  if (SU->getDepth() > Rem.CriticalPath)
3034  Rem.CriticalPath = SU->getDepth();
3035  }
3036  LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
3037  if (DumpCriticalPathLength) {
3038  errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
3039  }
3040 
3042  Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
3043  checkAcyclicLatency();
3044  }
3045 }
3046 
3047 namespace llvm {
3048 bool tryPressure(const PressureChange &TryP,
3049  const PressureChange &CandP,
3053  const TargetRegisterInfo *TRI,
3054  const MachineFunction &MF) {
3055  // If one candidate decreases and the other increases, go with it.
3056  // Invalid candidates have UnitInc==0.
3057  if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
3058  Reason)) {
3059  return true;
3060  }
3061  // Do not compare the magnitude of pressure changes between top and bottom
3062  // boundary.
3063  if (Cand.AtTop != TryCand.AtTop)
3064  return false;
3065 
3066  // If both candidates affect the same set in the same boundary, go with the
3067  // smallest increase.
3068  unsigned TryPSet = TryP.getPSetOrMax();
3069  unsigned CandPSet = CandP.getPSetOrMax();
3070  if (TryPSet == CandPSet) {
3071  return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
3072  Reason);
3073  }
3074 
3075  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
3077 
3078  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
3080 
3081  // If the candidates are decreasing pressure, reverse priority.
3082  if (TryP.getUnitInc() < 0)
3083  std::swap(TryRank, CandRank);
3084  return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
3085 }
3086 
3087 unsigned getWeakLeft(const SUnit *SU, bool isTop) {
3088  return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
3089 }
3090 
3091 /// Minimize physical register live ranges. Regalloc wants them adjacent to
3092 /// their physreg def/use.
3093 ///
3094 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
3095 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
3096 /// with the operation that produces or consumes the physreg. We'll do this when
3097 /// regalloc has support for parallel copies.
3098 int biasPhysReg(const SUnit *SU, bool isTop) {
3099  const MachineInstr *MI = SU->getInstr();
3100 
3101  if (MI->isCopy()) {
3102  unsigned ScheduledOper = isTop ? 1 : 0;
3103  unsigned UnscheduledOper = isTop ? 0 : 1;
3104  // If we have already scheduled the physreg produce/consumer, immediately
3105  // schedule the copy.
3106  if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg()))
3107  return 1;
3108  // If the physreg is at the boundary, defer it. Otherwise schedule it
3109  // immediately to free the dependent. We can hoist the copy later.
3110  bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
3111  if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg()))
3112  return AtBoundary ? -1 : 1;
3113  }
3114 
3115  if (MI->isMoveImmediate()) {
3116  // If we have a move immediate and all successors have been assigned, bias
3117  // towards scheduling this later. Make sure all register defs are to
3118  // physical registers.
3119  bool DoBias = true;
3120  for (const MachineOperand &Op : MI->defs()) {
3121  if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) {
3122  DoBias = false;
3123  break;
3124  }
3125  }
3126 
3127  if (DoBias)
3128  return isTop ? -1 : 1;
3129  }
3130 
3131  return 0;
3132 }
3133 } // end namespace llvm
3134 
3136  bool AtTop,
3137  const RegPressureTracker &RPTracker,
3138  RegPressureTracker &TempTracker) {
3139  Cand.SU = SU;
3140  Cand.AtTop = AtTop;
3141  if (DAG->isTrackingPressure()) {
3142  if (AtTop) {
3143  TempTracker.getMaxDownwardPressureDelta(
3144  Cand.SU->getInstr(),
3145  Cand.RPDelta,
3146  DAG->getRegionCriticalPSets(),
3147  DAG->getRegPressure().MaxSetPressure);
3148  } else {
3149  if (VerifyScheduling) {
3150  TempTracker.getMaxUpwardPressureDelta(
3151  Cand.SU->getInstr(),
3152  &DAG->getPressureDiff(Cand.SU),
3153  Cand.RPDelta,
3154  DAG->getRegionCriticalPSets(),
3155  DAG->getRegPressure().MaxSetPressure);
3156  } else {
3157  RPTracker.getUpwardPressureDelta(
3158  Cand.SU->getInstr(),
3159  DAG->getPressureDiff(Cand.SU),
3160  Cand.RPDelta,
3161  DAG->getRegionCriticalPSets(),
3162  DAG->getRegPressure().MaxSetPressure);
3163  }
3164  }
3165  }
3166  LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
3167  << " Try SU(" << Cand.SU->NodeNum << ") "
3168  << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
3169  << Cand.RPDelta.Excess.getUnitInc() << "\n");
3170 }
3171 
3172 /// Apply a set of heuristics to a new candidate. Heuristics are currently
3173 /// hierarchical. This may be more efficient than a graduated cost model because
3174 /// we don't need to evaluate all aspects of the model for each node in the
3175 /// queue. But it's really done to make the heuristics easier to debug and
3176 /// statistically analyze.
3177 ///
3178 /// \param Cand provides the policy and current best candidate.
3179 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3180 /// \param Zone describes the scheduled zone that we are extending, or nullptr
3181 /// if Cand is from a different zone than TryCand.
3182 /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3184  SchedCandidate &TryCand,
3185  SchedBoundary *Zone) const {
3186  // Initialize the candidate if needed.
3187  if (!Cand.isValid()) {
3188  TryCand.Reason = NodeOrder;
3189  return true;
3190  }
3191 
3192  // Bias PhysReg Defs and copies to their uses and defined respectively.
3193  if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop),
3194  biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg))
3195  return TryCand.Reason != NoCand;
3196 
3197  // Avoid exceeding the target's limit.
3198  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
3199  Cand.RPDelta.Excess,
3200  TryCand, Cand, RegExcess, TRI,
3201  DAG->MF))
3202  return TryCand.Reason != NoCand;
3203 
3204  // Avoid increasing the max critical pressure in the scheduled region.
3205  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
3206  Cand.RPDelta.CriticalMax,
3207  TryCand, Cand, RegCritical, TRI,
3208  DAG->MF))
3209  return TryCand.Reason != NoCand;
3210 
3211  // We only compare a subset of features when comparing nodes between
3212  // Top and Bottom boundary. Some properties are simply incomparable, in many
3213  // other instances we should only override the other boundary if something
3214  // is a clear good pick on one boundary. Skip heuristics that are more
3215  // "tie-breaking" in nature.
3216  bool SameBoundary = Zone != nullptr;
3217  if (SameBoundary) {
3218  // For loops that are acyclic path limited, aggressively schedule for
3219  // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
3220  // heuristics to take precedence.
3221  if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
3222  tryLatency(TryCand, Cand, *Zone))
3223  return TryCand.Reason != NoCand;
3224 
3225  // Prioritize instructions that read unbuffered resources by stall cycles.
3226  if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
3227  Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3228  return TryCand.Reason != NoCand;
3229  }
3230 
3231  // Keep clustered nodes together to encourage downstream peephole
3232  // optimizations which may reduce resource requirements.
3233  //
3234  // This is a best effort to set things up for a post-RA pass. Optimizations
3235  // like generating loads of multiple registers should ideally be done within
3236  // the scheduler pass by combining the loads during DAG postprocessing.
3237  const SUnit *CandNextClusterSU =
3238  Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3239  const SUnit *TryCandNextClusterSU =
3240  TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
3241  if (tryGreater(TryCand.SU == TryCandNextClusterSU,
3242  Cand.SU == CandNextClusterSU,
3243  TryCand, Cand, Cluster))
3244  return TryCand.Reason != NoCand;
3245 
3246  if (SameBoundary) {
3247  // Weak edges are for clustering and other constraints.
3248  if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
3249  getWeakLeft(Cand.SU, Cand.AtTop),
3250  TryCand, Cand, Weak))
3251  return TryCand.Reason != NoCand;
3252  }
3253 
3254  // Avoid increasing the max pressure of the entire region.
3255  if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
3256  Cand.RPDelta.CurrentMax,
3257  TryCand, Cand, RegMax, TRI,
3258  DAG->MF))
3259  return TryCand.Reason != NoCand;
3260 
3261  if (SameBoundary) {
3262  // Avoid critical resource consumption and balance the schedule.
3263  TryCand.initResourceDelta(DAG, SchedModel);
3265  TryCand, Cand, ResourceReduce))
3266  return TryCand.Reason != NoCand;
3269  TryCand, Cand, ResourceDemand))
3270  return TryCand.Reason != NoCand;
3271 
3272  // Avoid serializing long latency dependence chains.
3273  // For acyclic path limited loops, latency was already checked above.
3274  if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
3275  !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
3276  return TryCand.Reason != NoCand;
3277 
3278  // Fall through to original instruction order.
3279  if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
3280  || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
3281  TryCand.Reason = NodeOrder;
3282  return true;
3283  }
3284  }
3285 
3286  return false;
3287 }
3288 
3289 /// Pick the best candidate from the queue.
3290 ///
3291 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
3292 /// DAG building. To adjust for the current scheduling location we need to
3293 /// maintain the number of vreg uses remaining to be top-scheduled.
3295  const CandPolicy &ZonePolicy,
3296  const RegPressureTracker &RPTracker,
3297  SchedCandidate &Cand) {
3298  // getMaxPressureDelta temporarily modifies the tracker.
3299  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
3300 
3301  ReadyQueue &Q = Zone.Available;
3302  for (SUnit *SU : Q) {
3303 
3304  SchedCandidate TryCand(ZonePolicy);
3305  initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
3306  // Pass SchedBoundary only when comparing nodes from the same boundary.
3307  SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
3308  if (tryCandidate(Cand, TryCand, ZoneArg)) {
3309  // Initialize resource delta if needed in case future heuristics query it.
3310  if (TryCand.ResDelta == SchedResourceDelta())
3311  TryCand.initResourceDelta(DAG, SchedModel);
3312  Cand.setBest(TryCand);
3313  LLVM_DEBUG(traceCandidate(Cand));
3314  }
3315  }
3316 }
3317 
3318 /// Pick the best candidate node from either the top or bottom queue.
3320  // Schedule as far as possible in the direction of no choice. This is most
3321  // efficient, but also provides the best heuristics for CriticalPSets.
3322  if (SUnit *SU = Bot.pickOnlyChoice()) {
3323  IsTopNode = false;
3324  tracePick(Only1, false);
3325  return SU;
3326  }
3327  if (SUnit *SU = Top.pickOnlyChoice()) {
3328  IsTopNode = true;
3329  tracePick(Only1, true);
3330  return SU;
3331  }
3332  // Set the bottom-up policy based on the state of the current bottom zone and
3333  // the instructions outside the zone, including the top zone.
3334  CandPolicy BotPolicy;
3335  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
3336  // Set the top-down policy based on the state of the current top zone and
3337  // the instructions outside the zone, including the bottom zone.
3338  CandPolicy TopPolicy;
3339  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
3340 
3341  // See if BotCand is still valid (because we previously scheduled from Top).
3342  LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
3343  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
3344  BotCand.Policy != BotPolicy) {
3345  BotCand.reset(CandPolicy());
3346  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
3347  assert(BotCand.Reason != NoCand && "failed to find the first candidate");
3348  } else {
3349  LLVM_DEBUG(traceCandidate(BotCand));
3350 #ifndef NDEBUG
3351  if (VerifyScheduling) {
3352  SchedCandidate TCand;
3353  TCand.reset(CandPolicy());
3354  pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
3355  assert(TCand.SU == BotCand.SU &&
3356  "Last pick result should correspond to re-picking right now");
3357  }
3358 #endif
3359  }
3360 
3361  // Check if the top Q has a better candidate.
3362  LLVM_DEBUG(dbgs() << "Picking from Top:\n");
3363  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
3364  TopCand.Policy != TopPolicy) {
3365  TopCand.reset(CandPolicy());
3366  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
3367  assert(TopCand.Reason != NoCand && "failed to find the first candidate");
3368  } else {
3369  LLVM_DEBUG(traceCandidate(TopCand));
3370 #ifndef NDEBUG
3371  if (VerifyScheduling) {
3372  SchedCandidate TCand;
3373  TCand.reset(CandPolicy());
3374  pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
3375  assert(TCand.SU == TopCand.SU &&
3376  "Last pick result should correspond to re-picking right now");
3377  }
3378 #endif
3379  }
3380 
3381  // Pick best from BotCand and TopCand.
3382  assert(BotCand.isValid());
3383  assert(TopCand.isValid());
3384  SchedCandidate Cand = BotCand;
3385  TopCand.Reason = NoCand;
3386  if (tryCandidate(Cand, TopCand, nullptr)) {
3387  Cand.setBest(TopCand);
3388  LLVM_DEBUG(traceCandidate(Cand));
3389  }
3390 
3391  IsTopNode = Cand.AtTop;
3392  tracePick(Cand);
3393  return Cand.SU;
3394 }
3395 
3396 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
3398  if (DAG->top() == DAG->bottom()) {
3399  assert(Top.Available.empty() && Top.Pending.empty() &&
3400  Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
3401  return nullptr;
3402  }
3403  SUnit *SU;
3404  do {
3405  if (RegionPolicy.OnlyTopDown) {
3406  SU = Top.pickOnlyChoice();
3407  if (!SU) {
3408  CandPolicy NoPolicy;
3409  TopCand.reset(NoPolicy);
3410  pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
3411  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3412  tracePick(TopCand);
3413  SU = TopCand.SU;
3414  }
3415  IsTopNode = true;
3416  } else if (RegionPolicy.OnlyBottomUp) {
3417  SU = Bot.pickOnlyChoice();
3418  if (!SU) {
3419  CandPolicy NoPolicy;
3420  BotCand.reset(NoPolicy);
3421  pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
3422  assert(BotCand.Reason != NoCand && "failed to find a candidate");
3423  tracePick(BotCand);
3424  SU = BotCand.SU;
3425  }
3426  IsTopNode = false;
3427  } else {
3428  SU = pickNodeBidirectional(IsTopNode);
3429  }
3430  } while (SU->isScheduled);
3431 
3432  if (SU->isTopReady())
3433  Top.removeReady(SU);
3434  if (SU->isBottomReady())
3435  Bot.removeReady(SU);
3436 
3437  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3438  << *SU->getInstr());
3439  return SU;
3440 }
3441 
3443  MachineBasicBlock::iterator InsertPos = SU->getInstr();
3444  if (!isTop)
3445  ++InsertPos;
3446  SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
3447 
3448  // Find already scheduled copies with a single physreg dependence and move
3449  // them just above the scheduled instruction.
3450  for (SDep &Dep : Deps) {
3451  if (Dep.getKind() != SDep::Data ||
3452  !Register::isPhysicalRegister(Dep.getReg()))
3453  continue;
3454  SUnit *DepSU = Dep.getSUnit();
3455  if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
3456  continue;
3457  MachineInstr *Copy = DepSU->getInstr();
3458  if (!Copy->isCopy() && !Copy->isMoveImmediate())
3459  continue;
3460  LLVM_DEBUG(dbgs() << " Rescheduling physreg copy ";
3461  DAG->dumpNode(*Dep.getSUnit()));
3462  DAG->moveInstruction(Copy, InsertPos);
3463  }
3464 }
3465 
3466 /// Update the scheduler's state after scheduling a node. This is the same node
3467 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
3468 /// update it's state based on the current cycle before MachineSchedStrategy
3469 /// does.
3470 ///
3471 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
3472 /// them here. See comments in biasPhysReg.
3473 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3474  if (IsTopNode) {
3475  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3476  Top.bumpNode(SU);
3477  if (SU->hasPhysRegUses)
3478  reschedulePhysReg(SU, true);
3479  } else {
3480  SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
3481  Bot.bumpNode(SU);
3482  if (SU->hasPhysRegDefs)
3483  reschedulePhysReg(SU, false);
3484  }
3485 }
3486 
3487 /// Create the standard converging machine scheduler. This will be used as the
3488 /// default scheduler if the target does not set a default.
3490  ScheduleDAGMILive *DAG =
3491  new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
3492  // Register DAG post-processors.
3493  //
3494  // FIXME: extend the mutation API to allow earlier mutations to instantiate
3495  // data and pass it to later mutations. Have a single mutation that gathers
3496  // the interesting nodes in one pass.
3498  return DAG;
3499 }
3500 
3502  return createGenericSchedLive(C);
3503 }
3504 
3505 static MachineSchedRegistry
3506 GenericSchedRegistry("converge", "Standard converging scheduler.",
3508 
3509 //===----------------------------------------------------------------------===//
3510 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
3511 //===----------------------------------------------------------------------===//
3512 
3514  DAG = Dag;
3515  SchedModel = DAG->getSchedModel();
3516  TRI = DAG->TRI;
3517 
3518  Rem.init(DAG, SchedModel);
3519  Top.init(DAG, SchedModel, &Rem);
3520  BotRoots.clear();
3521 
3522  // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
3523  // or are disabled, then these HazardRecs will be disabled.
3525  if (!Top.HazardRec) {
3526  Top.HazardRec =
3528  Itin, DAG);
3529  }
3530 }
3531 
3533  Rem.CriticalPath = DAG->ExitSU.getDepth();
3534 
3535  // Some roots may not feed into ExitSU. Check all of them in case.
3536  for (const SUnit *SU : BotRoots) {
3537  if (SU->getDepth() > Rem.CriticalPath)
3538  Rem.CriticalPath = SU->getDepth();
3539  }
3540  LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
3541  if (DumpCriticalPathLength) {
3542  errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
3543  }
3544 }
3545 
3546 /// Apply a set of heuristics to a new candidate for PostRA scheduling.
3547 ///
3548 /// \param Cand provides the policy and current best candidate.
3549 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
3550 /// \return \c true if TryCand is better than Cand (Reason is NOT NoCand)
3552  SchedCandidate &TryCand) {
3553  // Initialize the candidate if needed.
3554  if (!Cand.isValid()) {
3555  TryCand.Reason = NodeOrder;
3556  return true;
3557  }
3558 
3559  // Prioritize instructions that read unbuffered resources by stall cycles.
3560  if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
3561  Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
3562  return TryCand.Reason != NoCand;
3563 
3564  // Keep clustered nodes together.
3565  if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
3566  Cand.SU == DAG->getNextClusterSucc(),
3567  TryCand, Cand, Cluster))
3568  return TryCand.Reason != NoCand;
3569 
3570  // Avoid critical resource consumption and balance the schedule.
3572  TryCand, Cand, ResourceReduce))
3573  return TryCand.Reason != NoCand;
3576  TryCand, Cand, ResourceDemand))
3577  return TryCand.Reason != NoCand;
3578 
3579  // Avoid serializing long latency dependence chains.
3580  if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
3581  return TryCand.Reason != NoCand;
3582  }
3583 
3584  // Fall through to original instruction order.
3585  if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
3586  TryCand.Reason = NodeOrder;
3587  return true;
3588  }
3589 
3590  return false;
3591 }
3592 
3594  ReadyQueue &Q = Top.Available;
3595  for (SUnit *SU : Q) {
3596  SchedCandidate TryCand(Cand.Policy);
3597  TryCand.SU = SU;
3598  TryCand.AtTop = true;
3599  TryCand.initResourceDelta(DAG, SchedModel);
3600  if (tryCandidate(Cand, TryCand)) {
3601  Cand.setBest(TryCand);
3602  LLVM_DEBUG(traceCandidate(Cand));
3603  }
3604  }
3605 }
3606 
3607 /// Pick the next node to schedule.
3609  if (DAG->top() == DAG->bottom()) {
3610  assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
3611  return nullptr;
3612  }
3613  SUnit *SU;
3614  do {
3615  SU = Top.pickOnlyChoice();
3616  if (SU) {
3617  tracePick(Only1, true);
3618  } else {
3619  CandPolicy NoPolicy;
3620  SchedCandidate TopCand(NoPolicy);
3621  // Set the top-down policy based on the state of the current top zone and
3622  // the instructions outside the zone, including the bottom zone.
3623  setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
3624  pickNodeFromQueue(TopCand);
3625  assert(TopCand.Reason != NoCand && "failed to find a candidate");
3626  tracePick(TopCand);
3627  SU = TopCand.SU;
3628  }
3629  } while (SU->isScheduled);
3630 
3631  IsTopNode = true;
3632  Top.removeReady(SU);
3633 
3634  LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
3635  << *SU->getInstr());
3636  return SU;
3637 }
3638 
3639 /// Called after ScheduleDAGMI has scheduled an instruction and updated
3640 /// scheduled/remaining flags in the DAG nodes.
3641 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
3642  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
3643  Top.bumpNode(SU);
3644 }
3645 
3647  return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
3648  /*RemoveKillFlags=*/true);
3649 }
3650 
3651 //===----------------------------------------------------------------------===//
3652 // ILP Scheduler. Currently for experimental analysis of heuristics.
3653 //===----------------------------------------------------------------------===//
3654 
3655 namespace {
3656 
3657 /// Order nodes by the ILP metric.
3658 struct ILPOrder {
3659  const SchedDFSResult *DFSResult = nullptr;
3660  const BitVector *ScheduledTrees = nullptr;
3661  bool MaximizeILP;
3662 
3663  ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
3664 
3665  /// Apply a less-than relation on node priority.
3666  ///
3667  /// (Return true if A comes after B in the Q.)
3668  bool operator()(const SUnit *A, const SUnit *B) const {
3669  unsigned SchedTreeA = DFSResult->getSubtreeID(A);
3670  unsigned SchedTreeB = DFSResult->getSubtreeID(B);
3671  if (SchedTreeA != SchedTreeB) {
3672  // Unscheduled trees have lower priority.
3673  if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
3674  return ScheduledTrees->test(SchedTreeB);
3675 
3676  // Trees with shallower connections have have lower priority.
3677  if (DFSResult->getSubtreeLevel(SchedTreeA)
3678  != DFSResult->getSubtreeLevel(SchedTreeB)) {
3679  return DFSResult->getSubtreeLevel(SchedTreeA)
3680  < DFSResult->getSubtreeLevel(SchedTreeB);
3681  }
3682  }
3683  if (MaximizeILP)
3684  return DFSResult->getILP(A) < DFSResult->getILP(B);
3685  else
3686  return DFSResult->getILP(A) > DFSResult->getILP(B);
3687  }
3688 };
3689 
3690 /// Schedule based on the ILP metric.
3691 class ILPScheduler : public MachineSchedStrategy {
3692  ScheduleDAGMILive *DAG = nullptr;
3693  ILPOrder Cmp;
3694 
3695  std::vector<SUnit*> ReadyQ;
3696 
3697 public:
3698  ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
3699 
3700  void initialize(ScheduleDAGMI *dag) override {
3701  assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
3702  DAG = static_cast<ScheduleDAGMILive*>(dag);
3703  DAG->computeDFSResult();
3704  Cmp.DFSResult = DAG->getDFSResult();
3705  Cmp.ScheduledTrees = &DAG->getScheduledTrees();
3706  ReadyQ.clear();
3707  }
3708 
3709  void registerRoots() override {
3710  // Restore the heap in ReadyQ with the updated DFS results.
3711  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3712  }
3713 
3714  /// Implement MachineSchedStrategy interface.
3715  /// -----------------------------------------
3716 
3717  /// Callback to select the highest priority node from the ready Q.
3718  SUnit *pickNode(bool &IsTopNode) override {
3719  if (ReadyQ.empty()) return nullptr;
3720  std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3721  SUnit *SU = ReadyQ.back();
3722  ReadyQ.pop_back();
3723  IsTopNode = false;
3724  LLVM_DEBUG(dbgs() << "Pick node "
3725  << "SU(" << SU->NodeNum << ") "
3726  << " ILP: " << DAG->getDFSResult()->getILP(SU)
3727  << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
3728  << " @"
3729  << DAG->getDFSResult()->getSubtreeLevel(
3730  DAG->getDFSResult()->getSubtreeID(SU))
3731  << '\n'
3732  << "Scheduling " << *SU->getInstr());
3733  return SU;
3734  }
3735 
3736  /// Scheduler callback to notify that a new subtree is scheduled.
3737  void scheduleTree(unsigned SubtreeID) override {
3738  std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3739  }
3740 
3741  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
3742  /// DFSResults, and resort the priority Q.
3743  void schedNode(SUnit *SU, bool IsTopNode) override {
3744  assert(!IsTopNode && "SchedDFSResult needs bottom-up");
3745  }
3746 
3747  void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
3748 
3749  void releaseBottomNode(SUnit *SU) override {
3750  ReadyQ.push_back(SU);
3751  std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
3752  }
3753 };
3754 
3755 } // end anonymous namespace
3756 
3758  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
3759 }
3761  return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
3762 }
3763 
3765  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
3767  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
3768 
3769 //===----------------------------------------------------------------------===//
3770 // Machine Instruction Shuffler for Correctness Testing
3771 //===----------------------------------------------------------------------===//
3772 
3773 #ifndef NDEBUG
3774 namespace {
3775 
3776 /// Apply a less-than relation on the node order, which corresponds to the
3777 /// instruction order prior to scheduling. IsReverse implements greater-than.
3778 template<bool IsReverse>
3779 struct SUnitOrder {
3780  bool operator()(SUnit *A, SUnit *B) const {
3781  if (IsReverse)
3782  return A->NodeNum > B->NodeNum;
3783  else
3784  return A->NodeNum < B->NodeNum;
3785  }
3786 };
3787 
3788 /// Reorder instructions as much as possible.
3789 class InstructionShuffler : public MachineSchedStrategy {
3790  bool IsAlternating;
3791  bool IsTopDown;
3792 
3793  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
3794  // gives nodes with a higher number higher priority causing the latest
3795  // instructions to be scheduled first.
3796  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
3797  TopQ;
3798 
3799  // When scheduling bottom-up, use greater-than as the queue priority.
3801  BottomQ;
3802 
3803 public:
3804  InstructionShuffler(bool alternate, bool topdown)
3805  : IsAlternating(alternate), IsTopDown(topdown) {}
3806 
3807  void initialize(ScheduleDAGMI*) override {
3808  TopQ.clear();
3809  BottomQ.clear();
3810  }
3811 
3812  /// Implement MachineSchedStrategy interface.
3813  /// -----------------------------------------
3814 
3815  SUnit *pickNode(bool &IsTopNode) override {
3816  SUnit *SU;
3817  if (IsTopDown) {
3818  do {
3819  if (TopQ.empty()) return nullptr;
3820  SU = TopQ.top();
3821  TopQ.pop();
3822  } while (SU->isScheduled);
3823  IsTopNode = true;
3824  } else {
3825  do {
3826  if (BottomQ.empty()) return nullptr;
3827  SU = BottomQ.top();
3828  BottomQ.pop();
3829  } while (SU->isScheduled);
3830  IsTopNode = false;
3831  }
3832  if (IsAlternating)
3833  IsTopDown = !IsTopDown;
3834  return SU;
3835  }
3836 
3837  void schedNode(SUnit *SU, bool IsTopNode) override {}
3838 
3839  void releaseTopNode(SUnit *SU) override {
3840  TopQ.push(SU);
3841  }
3842  void releaseBottomNode(SUnit *SU) override {
3843  BottomQ.push(SU);
3844  }
3845 };
3846 
3847 } // end anonymous namespace
3848 
3850  bool Alternate = !ForceTopDown && !ForceBottomUp;
3851  bool TopDown = !ForceBottomUp;
3852  assert((TopDown || !ForceTopDown) &&
3853  "-misched-topdown incompatible with -misched-bottomup");
3854  return new ScheduleDAGMILive(
3855  C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
3856 }
3857 
3859  "shuffle", "Shuffle machine instructions alternating directions",
3861 #endif // !NDEBUG
3862 
3863 //===----------------------------------------------------------------------===//
3864 // GraphWriter support for ScheduleDAGMILive.
3865 //===----------------------------------------------------------------------===//
3866 
3867 #ifndef NDEBUG
3868 namespace llvm {
3869 
3870 template<> struct GraphTraits<
3872 
3873 template<>
3876 
3877  static std::string getGraphName(const ScheduleDAG *G) {
3878  return std::string(G->MF.getName());
3879  }
3880 
3881  static bool renderGraphFromBottomUp() {
3882  return true;
3883  }
3884 
3885  static bool isNodeHidden(const SUnit *Node, const ScheduleDAG *G) {
3886  if (ViewMISchedCutoff == 0)
3887  return false;
3888  return (Node->Preds.size() > ViewMISchedCutoff
3889  || Node->Succs.size() > ViewMISchedCutoff);
3890  }
3891 
3892  /// If you want to override the dot attributes printed for a particular
3893  /// edge, override this method.
3894  static std::string getEdgeAttributes(const SUnit *Node,
3895  SUnitIterator EI,
3896  const ScheduleDAG *Graph) {
3897  if (EI.isArtificialDep())
3898  return "color=cyan,style=dashed";
3899  if (EI.isCtrlDep())
3900  return "color=blue,style=dashed";
3901  return "";
3902  }
3903 
3904  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
3905  std::string Str;
3906  raw_string_ostream SS(Str);
3907  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3908  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3909  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3910  SS << "SU:" << SU->NodeNum;
3911  if (DFS)
3912  SS << " I:" << DFS->getNumInstrs(SU);
3913  return SS.str();
3914  }
3915 
3916  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
3917  return G->getGraphNodeLabel(SU);
3918  }
3919 
3920  static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
3921  std::string Str("shape=Mrecord");
3922  const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
3923  const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
3924  static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
3925  if (DFS) {
3926  Str += ",style=filled,fillcolor=\"#";
3927  Str += DOT::getColorString(DFS->getSubtreeID(N));
3928  Str += '"';
3929  }
3930  return Str;
3931  }
3932 };
3933 
3934 } // end namespace llvm
3935 #endif // NDEBUG
3936 
3937 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
3938 /// rendered using 'dot'.
3939 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
3940 #ifndef NDEBUG
3941  ViewGraph(this, Name, false, Title);
3942 #else
3943  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
3944  << "systems with Graphviz or gv!\n";
3945 #endif // NDEBUG
3946 }
3947 
3948 /// Out-of-line implementation with no arguments is handy for gdb.
3950  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
3951 }
llvm::ScheduleDAGMILive::updateScheduledPressure
void updateScheduledPressure(const SUnit *SU, const std::vector< unsigned > &NewMaxPressure)
Definition: MachineScheduler.cpp:1087
llvm::ScheduleDAGInstrs::FirstDbgValue
MachineInstr * FirstDbgValue
Definition: ScheduleDAGInstrs.h:249
llvm::LiveRange::find
iterator find(SlotIndex Pos)
find - Return an iterator pointing to the first segment that ends after Pos, or end().
Definition: LiveInterval.cpp:350
ForceFastCluster
static cl::opt< bool > ForceFastCluster("force-fast-cluster", cl::Hidden, cl::desc("Switch to fast cluster algorithm with the lost " "of some fusion opportunities"), cl::init(false))
i
i
Definition: README.txt:29
llvm::ScheduleDAGMI::updateQueues
void updateQueues(SUnit *SU, bool IsTopNode)
Update scheduler DAG and queues after scheduling an instruction.
Definition: MachineScheduler.cpp:897
llvm::PressureChange::getPSetOrMax
unsigned getPSetOrMax() const
Definition: RegisterPressure.h:120
llvm::SparseMultiSet::setUniverse
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
Definition: SparseMultiSet.h:203
llvm::PriorityQueue::clear
void clear()
clear - Erase all elements from the queue.
Definition: PriorityQueue.h:76
llvm::GenericSchedulerBase::SchedResourceDelta::DemandedResources
unsigned DemandedResources
Definition: MachineScheduler.h:846
llvm::TargetSchedModel::mustEndGroup
bool mustEndGroup(const MachineInstr *MI, const MCSchedClassDesc *SC=nullptr) const
Return true if current group must end.
Definition: TargetSchedule.cpp:95
llvm::GenericSchedulerBase::Weak
@ Weak
Definition: MachineScheduler.h:814
llvm::ScheduleDAG::MRI
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:561
llvm::GenericSchedulerBase::CandReason
CandReason
Represent the type of SchedCandidate found within a single queue.
Definition: MachineScheduler.h:813
DefaultSchedRegistry
static MachineSchedRegistry DefaultSchedRegistry("default", "Use the target's default scheduler choice.", useDefaultMachineSched)
isSchedBoundary
static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII)
Return true of the given instruction should not be included in a scheduling region.
Definition: MachineScheduler.cpp:462
llvm::GenericSchedulerBase::TRI
const TargetRegisterInfo * TRI
Definition: MachineScheduler.h:910
llvm::SchedRemainder::reset
void reset()
Definition: MachineScheduler.h:595
llvm::MachineSchedContext::~MachineSchedContext
virtual ~MachineSchedContext()
Definition: MachineScheduler.cpp:161
llvm::TargetSchedModel::getWriteProcResBegin
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
Definition: TargetSchedule.h:133
llvm::HexagonInstrInfo::isSchedulingBoundary
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
Test if the given instruction should be considered a scheduling boundary.
Definition: HexagonInstrInfo.cpp:1783
llvm::SchedRemainder::RemIssueCount
unsigned RemIssueCount
Definition: MachineScheduler.h:586
MISchedCutoff
static cl::opt< unsigned > MISchedCutoff("misched-cutoff", cl::Hidden, cl::desc("Stop scheduling after N instructions"), cl::init(~0U))
llvm::MCProcResourceDesc::BufferSize
int BufferSize
Definition: MCSchedule.h:48
llvm::ScheduleDAGMILive::~ScheduleDAGMILive
~ScheduleDAGMILive() override
Definition: MachineScheduler.cpp:944
llvm::RegisterOperands::detectDeadDefs
void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS)
Use liveness information to find dead defs not marked with a dead flag and move them to the DeadDefs ...
Definition: RegisterPressure.cpp:581
ScheduleDAG.h
llvm::GenericSchedulerBase::SchedCandidate::setBest
void setBest(SchedCandidate &Best)
Definition: MachineScheduler.h:894
llvm::ScheduleDAGMI::viewGraph
void viewGraph() override
Out-of-line implementation with no arguments is handy for gdb.
Definition: MachineScheduler.cpp:3949
llvm::SchedDFSResult::scheduleTree
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
Definition: ScheduleDAGInstrs.cpp:1488
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
MachineInstr.h
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:494
llvm::ReadyQueue::dump
void dump() const
Definition: MachineScheduler.cpp:618
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::GenericSchedulerBase::NodeOrder
@ NodeOrder
Definition: MachineScheduler.h:816
llvm::SchedBoundary::HazardRec
ScheduleHazardRecognizer * HazardRec
Definition: MachineScheduler.h:625
llvm::ScheduleDAGMILive::scheduleMI
void scheduleMI(SUnit *SU, bool IsTopNode)
Move an instruction and update register pressure.
Definition: MachineScheduler.cpp:1406
llvm::RegPressureTracker::addLiveRegs
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
Definition: RegisterPressure.cpp:707
llvm::SUnit::NumPredsLeft
unsigned NumPredsLeft
Definition: ScheduleDAG.h:268
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
TargetFrameLowering.h
llvm::RegPressureTracker::init
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
Definition: RegisterPressure.cpp:263
llvm::TargetFrameLowering
Information about stack frame layout on the target.
Definition: TargetFrameLowering.h:43
llvm::ReadyQueue::isInQueue
bool isInQueue(SUnit *SU) const
Definition: MachineScheduler.h:545
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:189
llvm::SDep::Artificial
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
llvm::ReadyQueue::find
iterator find(SUnit *SU)
Definition: MachineScheduler.h:561
llvm::printVRegOrUnit
Printable printVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI)
Create Printable object to print virtual registers and physical registers on a raw_ostream.
Definition: TargetRegisterInfo.cpp:165
llvm::DOTGraphTraits< ScheduleDAGMI * >::getGraphName
static std::string getGraphName(const ScheduleDAG *G)
Definition: MachineScheduler.cpp:3877
llvm::GenericSchedulerBase::Cluster
@ Cluster
Definition: MachineScheduler.h:814
llvm::Latency
@ Latency
Definition: SIMachineScheduler.h:34
createILPMinScheduler
static ScheduleDAGInstrs * createILPMinScheduler(MachineSchedContext *C)
Definition: MachineScheduler.cpp:3760
ILPMinRegistry
static MachineSchedRegistry ILPMinRegistry("ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler)
llvm::MachineSchedContext::RegClassInfo
RegisterClassInfo * RegClassInfo
Definition: MachineScheduler.h:133
MachinePassRegistry.h
llvm::SchedBoundary::getCriticalCount
unsigned getCriticalCount() const
Get the scaled count of scheduled micro-ops and resources, including executed resources.
Definition: MachineScheduler.h:733
llvm::MachineSchedStrategy::initialize
virtual void initialize(ScheduleDAGMI *DAG)=0
Initialize the strategy after building the DAG for a new region.
llvm::SchedBoundary::DAG
ScheduleDAGMI * DAG
Definition: MachineScheduler.h:618
llvm::ScheduleDAGInstrs::buildSchedGraph
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
Definition: ScheduleDAGInstrs.cpp:720
llvm::ScheduleDAGInstrs::begin
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
Definition: ScheduleDAGInstrs.h:277
llvm::SparseMultiSet::find
iterator find(const KeyT &Key)
Find an element by its key.
Definition: SparseMultiSet.h:376
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:344
llvm::RegisterOperands
List of registers defined and used by a machine instruction.
Definition: RegisterPressure.h:166
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:93
llvm::BitVector::clear
void clear()
clear - Removes all bits from the bitvector.
Definition: BitVector.h:328
llvm::GenericSchedulerBase::Stall
@ Stall
Definition: MachineScheduler.h:814
llvm::TargetSchedModel::getResourceFactor
unsigned getResourceFactor(unsigned ResIdx) const
Multiply the number of units consumed for a resource by this factor to normalize it relative to other...
Definition: TargetSchedule.h:143
ScheduleDAGInstrs.h
llvm::X86AS::SS
@ SS
Definition: X86.h:189
llvm::PressureDiff
List of PressureChanges in order of increasing, unique PSetID.
Definition: RegisterPressure.h:140
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:632
llvm::GenericSchedulerBase::SchedCandidate::reset
void reset(const CandPolicy &NewPolicy)
Definition: MachineScheduler.h:882
llvm::MachineInstr::mayLoad
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:1013
llvm::SchedRemainder::init
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel)
Definition: MachineScheduler.cpp:2020
llvm::SchedBoundary::getNextResourceCycle
std::pair< unsigned, unsigned > getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles)
Compute the next cycle at which the given processor resource can be scheduled.
Definition: MachineScheduler.cpp:2102
llvm::MachineSchedRegistry
MachineSchedRegistry provides a selection of available machine instruction schedulers.
Definition: MachineScheduler.h:141
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::SchedBoundary::Pending
ReadyQueue Pending
Definition: MachineScheduler.h:623
llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:116
llvm::GenericSchedulerBase::SchedModel
const TargetSchedModel * SchedModel
Definition: MachineScheduler.h:909
llvm::ForceTopDown
cl::opt< bool > ForceTopDown
llvm::ReadyQueue::begin
iterator begin()
Definition: MachineScheduler.h:555
llvm::LiveIntervals::getInstructionFromIndex
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
Definition: LiveIntervals.h:231
llvm::SparseMultiSet::clear
void clear()
Clears the set.
Definition: SparseMultiSet.h:343
ErrorHandling.h
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::tryGreater
bool tryGreater(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Definition: MachineScheduler.cpp:2839
llvm::ScheduleDAGMI::hasVRegLiveness
virtual bool hasVRegLiveness() const
Return true if this DAG supports VReg liveness and RegPressure.
Definition: MachineScheduler.h:316
llvm::GenericSchedulerBase::SchedCandidate::RPDelta
RegPressureDelta RPDelta
Definition: MachineScheduler.h:874
llvm::SUnit::NumSuccsLeft
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:269
llvm::SDep::Anti
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
ReadyListLimit
static cl::opt< unsigned > ReadyListLimit("misched-limit", cl::Hidden, cl::desc("Limit ready list to N instructions"), cl::init(256))
Avoid quadratic complexity in unusually large basic blocks by limiting the size of the ready lists.
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::PostGenericScheduler::initialize
void initialize(ScheduleDAGMI *Dag) override
Initialize the strategy after building the DAG for a new region.
Definition: MachineScheduler.cpp:3513
InvalidCycle
static const unsigned InvalidCycle
Definition: MachineScheduler.cpp:1968
llvm::GenericSchedulerBase::RegCritical
@ RegCritical
Definition: MachineScheduler.h:814
llvm::SUnit::isCall
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
llvm::MCProcResourceDesc::NumUnits
unsigned NumUnits
Definition: MCSchedule.h:33
RegisterClassInfo.h
llvm::GenericScheduler::tryCandidate
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Apply a set of heuristics to a new candidate.
Definition: MachineScheduler.cpp:3183
MachineBasicBlock.h
llvm::ScheduleDAGMILive::SUPressureDiffs
PressureDiffs SUPressureDiffs
Definition: MachineScheduler.h:407
priorNonDebug
static MachineBasicBlock::const_iterator priorNonDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator Beg)
Decrement this iterator until reaching the top or a non-debug instr.
Definition: MachineScheduler.cpp:299
llvm::DOTGraphTraits< ScheduleDAGMI * >::getNodeAttributes
static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G)
Definition: MachineScheduler.cpp:3920
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::ScheduleDAGMILive::initRegPressure
void initRegPressure()
Definition: MachineScheduler.cpp:1012
llvm::ScheduleDAGMI::releasePredecessors
void releasePredecessors(SUnit *SU)
releasePredecessors - Call releasePred on each of SU's predecessors.
Definition: MachineScheduler.cpp:704
llvm::VNInfo::def
SlotIndex def
The index of the defining instruction.
Definition: LiveInterval.h:61
llvm::MemOp
Definition: TargetLowering.h:111
llvm::ScheduleDAGInstrs::addEdge
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
Definition: ScheduleDAGInstrs.cpp:1195
llvm::ScheduleDAGMI::postprocessDAG
void postprocessDAG()
Apply each ScheduleDAGMutation step in order.
Definition: MachineScheduler.cpp:842
getSchedRegions
static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, bool RegionsTopDown)
Definition: MachineScheduler.cpp:490
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:234
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1909
llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition: RegisterPressure.h:357
tracePick
static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop)
Definition: MachineScheduler.cpp:2889
DenseMap.h
llvm::GenericSchedulerBase::setPolicy
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone)
Set the CandPolicy given a scheduling zone given the current resources and latencies inside and outsi...
Definition: MachineScheduler.cpp:2687
llvm::GenericScheduler::registerRoots
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Definition: MachineScheduler.cpp:3028
llvm::BitVector::resize
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:334
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::PressureDiffs::clear
void clear()
Definition: RegisterPressure.h:206
TargetInstrInfo.h
llvm::GenericScheduler::initialize
void initialize(ScheduleDAGMI *dag) override
Initialize the strategy after building the DAG for a new region.
Definition: MachineScheduler.cpp:2898
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::RegisterMaskPair
Definition: RegisterPressure.h:38
llvm::ScheduleDAGMI::moveInstruction
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos)
Change the position of an instruction within the basic block and update live ranges and region bounda...
Definition: MachineScheduler.cpp:735
llvm::ScheduleDAGInstrs::startBlock
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
Definition: ScheduleDAGInstrs.cpp:174
llvm::initializePostMachineSchedulerPass
void initializePostMachineSchedulerPass(PassRegistry &)
llvm::MachineSchedRegistry::Registry
static MachinePassRegistry< ScheduleDAGCtor > Registry
Definition: MachineScheduler.h:150
llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
llvm::GenericSchedulerBase::SchedCandidate::Reason
CandReason Reason
Definition: MachineScheduler.h:868
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:147
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1293
llvm::SchedBoundary::countResource
unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, unsigned Cycles, unsigned ReadyCycle)
Add the given processor resource to this scheduled zone.
Definition: MachineScheduler.cpp:2344
llvm::MachineRegisterInfo::getNumVirtRegs
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
Definition: MachineRegisterInfo.h:765
llvm::ScheduleDAGMILive::getPressureDiff
PressureDiff & getPressureDiff(const SUnit *SU)
Definition: MachineScheduler.h:458
EnableMachineSched
static cl::opt< bool > EnableMachineSched("enable-misched", cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), cl::Hidden)
llvm::SUnit::getDepth
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:893
llvm::ScheduleDAGMILive::computeDFSResult
void computeDFSResult()
Compute a DFSResult after DAG building is complete, and before any queue comparisons.
Definition: MachineScheduler.cpp:1299
llvm::MCWriteProcResEntry
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63
llvm::ViewGraph
void ViewGraph(const GraphType &G, const Twine &Name, bool ShortNames=false, const Twine &Title="", GraphProgram::Name Program=GraphProgram::DOT)
ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file, then cleanup.
Definition: GraphWriter.h:427
llvm::ScheduleDAGMI::CurrentBottom
MachineBasicBlock::iterator CurrentBottom
The bottom of the unscheduled zone.
Definition: MachineScheduler.h:284
llvm::GenericSchedulerBase::ResourceDemand
@ ResourceDemand
Definition: MachineScheduler.h:815
llvm::ScheduleDAGMI::checkSchedLimit
bool checkSchedLimit()
Definition: MachineScheduler.cpp:753
STLExtras.h
llvm::RegPressureTracker::closeRegion
void closeRegion()
Finalize the region boundaries and recored live ins and live outs.
Definition: RegisterPressure.cpp:343
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::GenericScheduler::initCandidate
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker)
Definition: MachineScheduler.cpp:3135
llvm::ScheduleDAGMILive::TopRPTracker
RegPressureTracker TopRPTracker
Definition: MachineScheduler.h:422
llvm::MachineSchedStrategy::releaseTopNode
virtual void releaseTopNode(SUnit *SU)=0
When all predecessor dependencies have been resolved, free this node for top-down scheduling.
llvm::ScheduleDAGInstrs::SchedModel
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
Definition: ScheduleDAGInstrs.h:125
nextIfDebug
static MachineBasicBlock::const_iterator nextIfDebug(MachineBasicBlock::const_iterator I, MachineBasicBlock::const_iterator End)
If this iterator is a debug value, increment until reaching the End or a non-debug instruction.
Definition: MachineScheduler.cpp:320
llvm::LiveQueryResult
Result of a LiveRange query.
Definition: LiveInterval.h:90
llvm::ScheduleDAGMI::releaseSucc
void releaseSucc(SUnit *SU, SDep *SuccEdge)
ReleaseSucc - Decrement the NumPredsLeft count of a successor.
Definition: MachineScheduler.cpp:639
llvm::TargetSchedModel::getInstrItineraries
const InstrItineraryData * getInstrItineraries() const
Definition: TargetSchedule.h:82
llvm::MachineSchedStrategy::schedNode
virtual void schedNode(SUnit *SU, bool IsTopNode)=0
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
llvm::DOTGraphTraits< ScheduleDAGMI * >::getEdgeAttributes
static std::string getEdgeAttributes(const SUnit *Node, SUnitIterator EI, const ScheduleDAG *Graph)
If you want to override the dot attributes printed for a particular edge, override this method.
Definition: MachineScheduler.cpp:3894
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1618
llvm::SUnit::isTopReady
bool isTopReady() const
Definition: ScheduleDAG.h:446
llvm::SchedDFSResult::getSubtreeID
unsigned getSubtreeID(const SUnit *SU) const
Get the ID of the subtree the given DAG node belongs to.
Definition: ScheduleDFS.h:169
llvm::ScheduleDAGMI::enterRegion
void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs) override
Implement the ScheduleDAGInstrs interface for handling the next scheduling region.
Definition: MachineScheduler.cpp:723
llvm::ScheduleDAGInstrs::end
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
Definition: ScheduleDAGInstrs.h:280
llvm::SchedBoundary::Rem
SchedRemainder * Rem
Definition: MachineScheduler.h:620
llvm::ScheduleDAGMI::releaseSuccessors
void releaseSuccessors(SUnit *SU)
releaseSuccessors - Call releaseSucc on each of SU's successors.
Definition: MachineScheduler.cpp:667
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:159
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:103
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
RegisterPressure.h
DEBUG_TYPE
#define DEBUG_TYPE
Definition: MachineScheduler.cpp:74
llvm::LiveIntervals::getInstructionIndex
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
Definition: LiveIntervals.h:226
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:240
llvm::ScheduleDAGMILive::initQueues
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
Definition: MachineScheduler.cpp:1396
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:89
llvm::SchedDFSResult::clear
void clear()
Clear the results.
Definition: ScheduleDFS.h:128
llvm::TargetSchedModel::getProcResource
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
Definition: TargetSchedule.h:117
MachineRegisterInfo.h
llvm::ReadyQueue::remove
iterator remove(iterator I)
Definition: MachineScheduler.h:568
llvm::ScheduleDAGMI::Mutations
std::vector< std::unique_ptr< ScheduleDAGMutation > > Mutations
Ordered list of DAG postprocessing steps.
Definition: MachineScheduler.h:278
llvm::LiveIntervals::handleMove
void handleMove(MachineInstr &MI, bool UpdateFlags=false)
Call this method to notify LiveIntervals that instruction MI has been moved within a basic block.
Definition: LiveIntervals.cpp:1512
llvm::LiveRange::beginIndex
SlotIndex beginIndex() const
beginIndex - Return the lowest numbered slot covered.
Definition: LiveInterval.h:385
llvm::ScheduleDAGMILive::RegionCriticalPSets
std::vector< PressureChange > RegionCriticalPSets
List of pressure sets that exceed the target's pressure limit before scheduling, listed in increasing...
Definition: MachineScheduler.h:418
AliasAnalysis.h
llvm::SDep::isArtificial
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
MachineValueType.h
llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:33
llvm::GenericScheduler::dumpPolicy
void dumpPolicy() const override
Definition: MachineScheduler.cpp:2980
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::SchedRemainder::CriticalPath
unsigned CriticalPath
Definition: MachineScheduler.h:582
llvm::tgtok::Dag
@ Dag
Definition: TGLexer.h:50
llvm::ScheduleDAGMILive::dump
void dump() const override
Definition: MachineScheduler.cpp:1183
CommandLine.h
llvm::SUnitIterator::isCtrlDep
bool isCtrlDep() const
Tests if this is not an SDep::Data dependence.
Definition: ScheduleDAG.h:657
llvm::RegPressureTracker::setPos
void setPos(MachineBasicBlock::const_iterator Pos)
Definition: RegisterPressure.h:419
TargetLowering.h
llvm::GenericScheduler::reschedulePhysReg
void reschedulePhysReg(SUnit *SU, bool isTop)
Definition: MachineScheduler.cpp:3442
MinSubtreeSize
static const unsigned MinSubtreeSize
Definition: MachineScheduler.cpp:146
llvm::SchedBoundary::isTop
bool isTop() const
Definition: MachineScheduler.h:703
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
bb
< i1 > br i1 label label bb bb
Definition: README.txt:978
llvm::MachineSchedStrategy
MachineSchedStrategy - Interface to the scheduling algorithm used by ScheduleDAGMI.
Definition: MachineScheduler.h:206
llvm::SUnit::BotReadyCycle
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:300
llvm::SDep::Weak
@ Weak
Arbitrary weak DAG edge.
Definition: ScheduleDAG.h:73
ScheduleHazardRecognizer.h
llvm::DOT::getColorString
StringRef getColorString(unsigned NodeNumber)
Get a color string for this node number.
Definition: GraphWriter.cpp:90
checkResourceLimit
static bool checkResourceLimit(unsigned LFactor, unsigned Count, unsigned Latency, bool AfterSchedNode)
Given a Count of resource usage and a Latency value, return true if a SchedBoundary becomes resource ...
Definition: MachineScheduler.cpp:1976
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
llvm::cl::apply
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1280
llvm::TargetSchedModel::getIssueWidth
unsigned getIssueWidth() const
Maximum number of micro-ops that may be scheduled per cycle.
Definition: TargetSchedule.h:98
MachineLoopInfo.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::PPCISD::SC
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
Definition: PPCISelLowering.h:418
llvm::SUnit::isBottomReady
bool isBottomReady() const
Definition: ScheduleDAG.h:449
llvm::SUnit::isUnbuffered
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288
llvm::LiveRange::begin
iterator begin()
Definition: LiveInterval.h:215
llvm::SchedBoundary::~SchedBoundary
~SchedBoundary()
Definition: MachineScheduler.cpp:1970
llvm::RegPressureDelta::CurrentMax
PressureChange CurrentMax
Definition: RegisterPressure.h:241
EnableRegPressure
static cl::opt< bool > EnableRegPressure("misched-regpressure", cl::Hidden, cl::desc("Enable register pressure scheduling."), cl::init(true))
llvm::SchedBoundary::getExecutedCount
unsigned getExecutedCount() const
Get a scaled count for the minimum execution time of the scheduled micro-ops that are ready to execut...
Definition: MachineScheduler.h:742
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::RegPressureTracker::getUpwardPressureDelta
void getUpwardPressureDelta(const MachineInstr *MI, PressureDiff &PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit) const
This is the fast version of querying register pressure that does not directly depend on current liven...
Definition: RegisterPressure.cpp:1163
llvm::DstOp::getReg
Register getReg() const
Definition: MachineIRBuilder.h:106
llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::ScheduleDAGMI::NextClusterPred
const SUnit * NextClusterPred
Record the next node in a scheduled cluster.
Definition: MachineScheduler.h:287
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::tryLatency
bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, SchedBoundary &Zone)
Definition: MachineScheduler.cpp:2855
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3362
llvm::GenericSchedulerBase::Only1
@ Only1
Definition: MachineScheduler.h:814
llvm::SchedRemainder::IsAcyclicLatencyLimited
bool IsAcyclicLatencyLimited
Definition: MachineScheduler.h:588
llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
llvm::MachineBasicBlock::isSuccessor
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
Definition: MachineBasicBlock.cpp:909
llvm::MCSchedClassDesc
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:109
llvm::MachineSchedStrategy::releaseBottomNode
virtual void releaseBottomNode(SUnit *SU)=0
When all successor dependencies have been resolved, free this node for bottom-up scheduling.
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::LaneBitmask::getNone
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:83
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::ScheduleDAGMI::findRootsAndBiasEdges
void findRootsAndBiasEdges(SmallVectorImpl< SUnit * > &TopRoots, SmallVectorImpl< SUnit * > &BotRoots)
Definition: MachineScheduler.cpp:848
SchedOnlyBlock
static cl::opt< unsigned > SchedOnlyBlock("misched-only-block", cl::Hidden, cl::desc("Only schedule this MBB#"))
llvm::createCopyConstrainDAGMutation
std::unique_ptr< ScheduleDAGMutation > createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
Definition: MachineScheduler.cpp:1794
llvm::SchedDFSResult::getSubtreeLevel
unsigned getSubtreeLevel(unsigned SubtreeID) const
Get the connection level of a subtree.
Definition: ScheduleDFS.h:180
llvm::GenericSchedulerBase::RegMax
@ RegMax
Definition: MachineScheduler.h:815
llvm::ScheduleDAGInstrs::getSUnit
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
Definition: ScheduleDAGInstrs.h:390
llvm::RegPressureTracker::getLiveThru
ArrayRef< unsigned > getLiveThru() const
Definition: RegisterPressure.h:454
false
Definition: StackSlotColoring.cpp:141
llvm::SchedBoundary::reset
void reset()
Definition: MachineScheduler.cpp:1985
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:127
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::ScheduleDAGMILive::updatePressureDiffs
void updatePressureDiffs(ArrayRef< RegisterMaskPair > LiveUses)
Update the PressureDiff array for liveness after scheduling this instruction.
Definition: MachineScheduler.cpp:1115
llvm::createGenericSchedPostRA
ScheduleDAGMI * createGenericSchedPostRA(MachineSchedContext *C)
Create a generic scheduler with no vreg liveness or DAG mutation passes.
Definition: MachineScheduler.cpp:3646
llvm::ScheduleDAGMILive::getScheduledTrees
BitVector & getScheduledTrees()
Definition: MachineScheduler.h:472
llvm::GenericSchedulerBase::CandPolicy
Policy for scheduling the next instruction in the candidate's zone.
Definition: MachineScheduler.h:823
llvm::Instruction
Definition: Instruction.h:42
ShufflerRegistry
static MachineSchedRegistry ShufflerRegistry("shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler)
llvm::ScheduleDAGMI::addMutation
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
Definition: MachineScheduler.h:323
llvm::SchedBoundary::getCurrCycle
unsigned getCurrCycle() const
Number of cycles to issue the instructions scheduled in this zone.
Definition: MachineScheduler.h:708
createILPMaxScheduler
static ScheduleDAGInstrs * createILPMaxScheduler(MachineSchedContext *C)
Definition: MachineScheduler.cpp:3757
llvm::RegisterPressure::LiveOutRegs
SmallVector< RegisterMaskPair, 8 > LiveOutRegs
Definition: RegisterPressure.h:53
llvm::RegPressureTracker::initLiveThru
void initLiveThru(const RegPressureTracker &RPTracker)
Initialize the LiveThru pressure set based on the untied defs found in RPTracker.
Definition: RegisterPressure.cpp:359
llvm::SchedRemainder::CyclicCritPath
unsigned CyclicCritPath
Definition: MachineScheduler.h:583
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::PrintLaneMask
Printable PrintLaneMask(LaneBitmask LaneMask)
Create Printable object to print LaneBitmasks on a raw_ostream.
Definition: LaneBitmask.h:94
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:619
llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition: CommandLine.h:395
BitVector.h
llvm::SlotIndexes
SlotIndexes pass.
Definition: SlotIndexes.h:313
llvm::ScheduleDAGMI::startBlock
void startBlock(MachineBasicBlock *bb) override
Prepares to perform scheduling in the given block.
Definition: MachineScheduler.cpp:709
llvm::ArrayRef::rbegin
reverse_iterator rbegin() const
Definition: ArrayRef.h:155
llvm::SDep::Data
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
llvm::RegPressureDelta::Excess
PressureChange Excess
Definition: RegisterPressure.h:239
llvm::ScheduleDAGInstrs::RegionEnd
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
Definition: ScheduleDAGInstrs.h:151
llvm::TargetSchedModel::getWriteProcResEnd
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
Definition: TargetSchedule.h:137
llvm::BitVector
Definition: BitVector.h:75
llvm::SchedDFSResult::getNumSubtrees
unsigned getNumSubtrees() const
The number of subtrees detected in this DAG.
Definition: ScheduleDFS.h:163
llvm::PostGenericScheduler::tryCandidate
virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand)
Apply a set of heuristics to a new candidate for PostRA scheduling.
Definition: MachineScheduler.cpp:3551
llvm::LiveInterval
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:686
llvm::LiveQueryResult::valueIn
VNInfo * valueIn() const
Return the value that is live-in to the instruction.
Definition: LiveInterval.h:105
llvm::ReadyQueue::push
void push(SUnit *SU)
Definition: MachineScheduler.h:563
llvm::SlotIndex
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:82
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_END(MachineScheduler
llvm::VReg2SUnit
An individual mapping from virtual register number to SUnit.
Definition: ScheduleDAGInstrs.h:52
llvm::RegisterPassParser
RegisterPassParser class - Handle the addition of new machine passes.
Definition: MachinePassRegistry.h:135
llvm::SchedBoundary::dumpScheduledState
void dumpScheduledState() const
Definition: MachineScheduler.cpp:2595
llvm::ScheduleDAGMI::initQueues
void initQueues(ArrayRef< SUnit * > TopRoots, ArrayRef< SUnit * > BotRoots)
Release ExitSU predecessors and setup scheduler queues.
Definition: MachineScheduler.cpp:867
llvm::ArrayRef::rend
reverse_iterator rend() const
Definition: ArrayRef.h:156
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::SchedBoundary::releasePending
void releasePending()
Release pending ready nodes in to the available queue.
Definition: MachineScheduler.cpp:2523
llvm::RegPressureTracker::dump
void dump() const
Definition: RegisterPressure.cpp:117
llvm::GenericSchedulerBase::SchedCandidate::SU
SUnit * SU
Definition: MachineScheduler.h:865
llvm::ScheduleDAGMILive::RPTracker
RegPressureTracker RPTracker
Definition: MachineScheduler.h:413
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::GenericScheduler::initPolicy
void initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) override
Initialize the per-region scheduling policy.
Definition: MachineScheduler.cpp:2932
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::RegisterOperands::collect
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
Definition: RegisterPressure.cpp:570
llvm::RegisterClassInfo::getNumAllocatableRegs
unsigned getNumAllocatableRegs(const TargetRegisterClass *RC) const
getNumAllocatableRegs - Returns the number of actually allocatable registers in RC in the current fun...
Definition: RegisterClassInfo.h:91
llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
llvm::DOTGraphTraits
DOTGraphTraits - Template class that can be specialized to customize how graphs are converted to 'dot...
Definition: DOTGraphTraits.h:166
llvm::SUnit::getHeight
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
llvm::SUnit::TopReadyCycle
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:299
llvm::ScheduleDAGMILive::RegClassInfo
RegisterClassInfo * RegClassInfo
Definition: MachineScheduler.h:392
llvm::initializeMachineSchedulerPass
void initializeMachineSchedulerPass(PassRegistry &)
llvm::SchedBoundary::getZoneCritResIdx
unsigned getZoneCritResIdx() const
Definition: MachineScheduler.h:747
llvm::GenericSchedulerBase::Rem
SchedRemainder Rem
Definition: MachineScheduler.h:912
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::SparseMultiSet::end
iterator end()
Returns an iterator past this container.
Definition: SparseMultiSet.h:320
llvm::ReadyQueue::end
iterator end()
Definition: MachineScheduler.h:557
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
llvm::tryLess
bool tryLess(int TryVal, int CandVal, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason)
Return true if this heuristic determines order.
Definition: MachineScheduler.cpp:2823
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:640
llvm::RegPressureTracker::getRegSetPressureAtPos
const std::vector< unsigned > & getRegSetPressureAtPos() const
Get the register set pressure at the current position, which may be less than the pressure across the...
Definition: RegisterPressure.h:463
llvm::RegPressureTracker::recedeSkipDebugValues
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
Definition: RegisterPressure.cpp:853
llvm::cl::opt< bool >
llvm::GenericSchedulerBase::SchedCandidate
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
Definition: MachineScheduler.h:861
llvm::GenericSchedulerBase::NoCand
@ NoCand
Definition: MachineScheduler.h:814
llvm::SparseMultiSet::insert
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
Definition: SparseMultiSet.h:420
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:239
llvm::ScheduleDAGMI::LIS
LiveIntervals * LIS
Definition: MachineScheduler.h:274
llvm::SUnit::WeakSuccsLeft
unsigned WeakSuccsLeft
Definition: ScheduleDAG.h:271
LiveDebugValues::DbgValue
Class recording the (high level) value of a variable.
Definition: InstrRefBasedImpl.h:229
llvm::LiveRange::getVNInfoBefore
VNInfo * getVNInfoBefore(SlotIndex Idx) const
getVNInfoBefore - Return the VNInfo that is live up to but not necessarilly including Idx,...
Definition: LiveInterval.h:429
TargetSchedule.h
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:409
llvm::RegPressureTracker::closeBottom
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
Definition: RegisterPressure.cpp:331
llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition: TargetSchedule.h:30
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::TargetSubtargetInfo::enablePostRAMachineScheduler
virtual bool enablePostRAMachineScheduler() const
True if the subtarget should run a machine scheduler after register allocation.
Definition: TargetSubtargetInfo.cpp:52
LiveIntervals.h
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:932
llvm::GenericSchedulerBase::CandPolicy::DemandResIdx
unsigned DemandResIdx
Definition: MachineScheduler.h:826
llvm::createGenericSchedLive
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
Definition: MachineScheduler.cpp:3489
llvm::LiveIntervals::getMBBEndIdx
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const
Return the last index in the given basic block.
Definition: LiveIntervals.h:241
llvm::SchedBoundary::SchedModel
const TargetSchedModel * SchedModel
Definition: MachineScheduler.h:619
llvm::RegPressureTracker::reset
void reset()
Definition: RegisterPressure.cpp:243
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::MachineSchedStrategy::registerRoots
virtual void registerRoots()
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Definition: MachineScheduler.h:244
llvm::MachineSchedStrategy::scheduleTree
virtual void scheduleTree(unsigned SubtreeID)
Scheduler callback to notify that a new subtree is scheduled.
Definition: MachineScheduler.h:252
llvm::SchedBoundary::init
void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem)
Definition: MachineScheduler.cpp:2040
llvm::GenericSchedulerBase::Context
const MachineSchedContext * Context
Definition: MachineScheduler.h:908
llvm::tryPressure
bool tryPressure(const PressureChange &TryP, const PressureChange &CandP, GenericSchedulerBase::SchedCandidate &TryCand, GenericSchedulerBase::SchedCandidate &Cand, GenericSchedulerBase::CandReason Reason, const TargetRegisterInfo *TRI, const MachineFunction &MF)
Definition: MachineScheduler.cpp:3048
llvm::SchedDFSResult::resize
void resize(unsigned NumSUnits)
Initialize the result data with the size of the DAG.
Definition: ScheduleDFS.h:136
llvm::ScheduleDAGInstrs::IsReachable
bool IsReachable(SUnit *SU, SUnit *TargetSU)
IsReachable - Checks if SU is reachable from TargetSU.
Definition: ScheduleDAGInstrs.h:272
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::PostGenericScheduler::registerRoots
void registerRoots() override
Notify this strategy that all roots have been released (including those that depend on EntrySU or Exi...
Definition: MachineScheduler.cpp:3532
llvm::DenseMap
Definition: DenseMap.h:716
llvm::codeview::FrameCookieKind::Copy
@ Copy
llvm::operator<
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:339
llvm::PressureChange::isValid
bool isValid() const
Definition: RegisterPressure.h:112
llvm::SchedBoundary::getLatencyStallCycles
unsigned getLatencyStallCycles(SUnit *SU)
Get the difference between the given SUnit's ready time and the current cycle.
Definition: MachineScheduler.cpp:2074
ViewMISchedCutoff
static cl::opt< unsigned > ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, cl::desc("Hide nodes with more predecessor/successor than cutoff"))
In some situations a few uninteresting nodes depend on nearly all other nodes in the graph,...
llvm::MachineSchedContext
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
Definition: MachineScheduler.h:125
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::RegPressureTracker::closeTop
void closeTop()
Set the boundary for the top of the region and summarize live ins.
Definition: RegisterPressure.cpp:319
PriorityQueue.h
llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
llvm::SDep::getReg
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
llvm::SparseMultiSet< VReg2SUnit, VirtReg2IndexFunctor >::iterator
iterator_base< SparseMultiSet * > iterator
Definition: SparseMultiSet.h:312
llvm::RegPressureTracker::advance
void advance()
Advance across the current instruction.
Definition: RegisterPressure.cpp:948
llvm::SUnit::isScheduled
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::SchedBoundary::getCurrMOps
unsigned getCurrMOps() const
Micro-ops issued in the current cycle.
Definition: MachineScheduler.h:711
llvm::PressureChange::getUnitInc
int getUnitInc() const
Definition: RegisterPressure.h:124
llvm::SchedBoundary::incExecutedResources
void incExecutedResources(unsigned PIdx, unsigned Count)
Definition: MachineScheduler.cpp:2331
llvm::SlotIndex::isSameInstr
static bool isSameInstr(SlotIndex A, SlotIndex B)
isSameInstr - Return true if A and B refer to the same instruction.
Definition: SlotIndexes.h:196
llvm::DOTGraphTraits< ScheduleDAGMI * >::DOTGraphTraits
DOTGraphTraits(bool isSimple=false)
Definition: MachineScheduler.cpp:3875
ArrayRef.h
llvm::LiveRange::isLocal
bool isLocal(SlotIndex Start, SlotIndex End) const
True iff this segment is a single segment that lies between the specified boundaries,...
Definition: LiveInterval.h:518
llvm::LiveRange::Query
LiveQueryResult Query(SlotIndex Idx) const
Query Liveness at Idx.
Definition: LiveInterval.h:541
TargetPassConfig.h
llvm::ScheduleDAGMI::getLIS
LiveIntervals * getLIS() const
Definition: MachineScheduler.h:313
llvm::SchedRemainder
Summarize the unscheduled region.
Definition: MachineScheduler.h:580
MachineFunctionPass.h
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::SUnit::hasPhysRegUses
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:279
llvm::RegPressureTracker::getPressure
RegisterPressure & getPressure()
Get the resulting register pressure over the traversed region.
Definition: RegisterPressure.h:458
llvm::ScheduleDAGMILive::DFSResult
SchedDFSResult * DFSResult
Information about DAG subtrees.
Definition: MachineScheduler.h:396
llvm::TargetRegisterInfo::getRegPressureSetName
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
llvm::RegPressureDelta::CriticalMax
PressureChange CriticalMax
Definition: RegisterPressure.h:240
llvm::SUnit::biasCriticalPath
void biasCriticalPath()
Orders this node's predecessor edges such that the critical path edge occurs first.
Definition: ScheduleDAG.cpp:325
llvm::RegisterPressure::MaxSetPressure
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
Definition: RegisterPressure.h:49
ILPMaxRegistry
static MachineSchedRegistry ILPMaxRegistry("ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler)
llvm::RegPressureTracker::getMaxDownwardPressureDelta
void getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction top-down.
Definition: RegisterPressure.cpp:1344
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::RegisterPressure::LiveInRegs
SmallVector< RegisterMaskPair, 8 > LiveInRegs
List of live in virtual registers or physical register units.
Definition: RegisterPressure.h:52
llvm::ScheduleDAGMILive::ScheduledTrees
BitVector ScheduledTrees
Definition: MachineScheduler.h:397
llvm::ScheduleDAGMI::dumpSchedule
void dumpSchedule() const
dump the scheduled Sequence.
Definition: MachineScheduler.cpp:929
llvm::RegisterClassInfo
Definition: RegisterClassInfo.h:29
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::ScheduleDAGInstrs::canAddEdge
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
Definition: ScheduleDAGInstrs.cpp:1191
llvm::ScheduleDAGInstrs::TrackLaneMasks
bool TrackLaneMasks
Whether lane masks should get tracked.
Definition: ScheduleDAGInstrs.h:139
llvm::ScheduleDAGMI::SchedImpl
std::unique_ptr< MachineSchedStrategy > SchedImpl
Definition: MachineScheduler.h:275
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:234
llvm::GenericSchedulerBase::SchedCandidate::Policy
CandPolicy Policy
Definition: MachineScheduler.h:862
llvm::GenericSchedulerBase::SchedResourceDelta::CritResources
unsigned CritResources
Definition: MachineScheduler.h:843
iterator_range.h
llvm::TargetFrameLowering::getStackGrowthDirection
StackDirection getStackGrowthDirection() const
getStackGrowthDirection - Return the direction the stack grows
Definition: TargetFrameLowering.h:89
llvm::ScheduleDAG::dumpNodeAll
void dum