LLVM  9.0.0svn
ScheduleDAGRRList.cpp
Go to the documentation of this file.
1 //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements bottom-up and top-down register pressure reduction list
10 // schedulers, using standard algorithms. The basic approach uses a priority
11 // queue of available nodes to schedule. One at a time, nodes are taken from
12 // the priority queue (thus in priority order), checked for legality to
13 // schedule, and emitted if legal.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "ScheduleDAGSDNodes.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/Statistic.h"
38 #include "llvm/Config/llvm-config.h"
39 #include "llvm/IR/InlineAsm.h"
40 #include "llvm/MC/MCInstrDesc.h"
41 #include "llvm/MC/MCRegisterInfo.h"
42 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/CodeGen.h"
45 #include "llvm/Support/Compiler.h"
46 #include "llvm/Support/Debug.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstdint>
53 #include <cstdlib>
54 #include <iterator>
55 #include <limits>
56 #include <memory>
57 #include <utility>
58 #include <vector>
59 
60 using namespace llvm;
61 
62 #define DEBUG_TYPE "pre-RA-sched"
63 
64 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
65 STATISTIC(NumUnfolds, "Number of nodes unfolded");
66 STATISTIC(NumDups, "Number of duplicated nodes");
67 STATISTIC(NumPRCopies, "Number of physical register copies");
68 
69 static RegisterScheduler
70  burrListDAGScheduler("list-burr",
71  "Bottom-up register reduction list scheduling",
73 
74 static RegisterScheduler
75  sourceListDAGScheduler("source",
76  "Similar to list-burr but schedules in source "
77  "order when possible",
79 
80 static RegisterScheduler
81  hybridListDAGScheduler("list-hybrid",
82  "Bottom-up register pressure aware list scheduling "
83  "which tries to balance latency and register pressure",
85 
86 static RegisterScheduler
87  ILPListDAGScheduler("list-ilp",
88  "Bottom-up register pressure aware list scheduling "
89  "which tries to balance ILP and register pressure",
91 
93  "disable-sched-cycles", cl::Hidden, cl::init(false),
94  cl::desc("Disable cycle-level precision during preRA scheduling"));
95 
96 // Temporary sched=list-ilp flags until the heuristics are robust.
97 // Some options are also available under sched=list-hybrid.
99  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
100  cl::desc("Disable regpressure priority in sched=list-ilp"));
102  "disable-sched-live-uses", cl::Hidden, cl::init(true),
103  cl::desc("Disable live use priority in sched=list-ilp"));
105  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
106  cl::desc("Disable virtual register cycle interference checks"));
108  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
109  cl::desc("Disable physreg def-use affinity"));
111  "disable-sched-stalls", cl::Hidden, cl::init(true),
112  cl::desc("Disable no-stall priority in sched=list-ilp"));
114  "disable-sched-critical-path", cl::Hidden, cl::init(false),
115  cl::desc("Disable critical path priority in sched=list-ilp"));
117  "disable-sched-height", cl::Hidden, cl::init(false),
118  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
120  "disable-2addr-hack", cl::Hidden, cl::init(true),
121  cl::desc("Disable scheduler's two-address hack"));
122 
124  "max-sched-reorder", cl::Hidden, cl::init(6),
125  cl::desc("Number of instructions to allow ahead of the critical path "
126  "in sched=list-ilp"));
127 
129  "sched-avg-ipc", cl::Hidden, cl::init(1),
130  cl::desc("Average inst/cycle whan no target itinerary exists."));
131 
132 namespace {
133 
134 //===----------------------------------------------------------------------===//
135 /// ScheduleDAGRRList - The actual register reduction list scheduler
136 /// implementation. This supports both top-down and bottom-up scheduling.
137 ///
138 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
139 private:
140  /// NeedLatency - True if the scheduler will make use of latency information.
141  bool NeedLatency;
142 
143  /// AvailableQueue - The priority queue to use for the available SUnits.
144  SchedulingPriorityQueue *AvailableQueue;
145 
146  /// PendingQueue - This contains all of the instructions whose operands have
147  /// been issued, but their results are not ready yet (due to the latency of
148  /// the operation). Once the operands becomes available, the instruction is
149  /// added to the AvailableQueue.
150  std::vector<SUnit *> PendingQueue;
151 
152  /// HazardRec - The hazard recognizer to use.
153  ScheduleHazardRecognizer *HazardRec;
154 
155  /// CurCycle - The current scheduler state corresponds to this cycle.
156  unsigned CurCycle = 0;
157 
158  /// MinAvailableCycle - Cycle of the soonest available instruction.
159  unsigned MinAvailableCycle;
160 
161  /// IssueCount - Count instructions issued in this cycle
162  /// Currently valid only for bottom-up scheduling.
163  unsigned IssueCount;
164 
165  /// LiveRegDefs - A set of physical registers and their definition
166  /// that are "live". These nodes must be scheduled before any other nodes that
167  /// modifies the registers can be scheduled.
168  unsigned NumLiveRegs;
169  std::unique_ptr<SUnit*[]> LiveRegDefs;
170  std::unique_ptr<SUnit*[]> LiveRegGens;
171 
172  // Collect interferences between physical register use/defs.
173  // Each interference is an SUnit and set of physical registers.
174  SmallVector<SUnit*, 4> Interferences;
175 
177 
178  LRegsMapT LRegsMap;
179 
180  /// Topo - A topological ordering for SUnits which permits fast IsReachable
181  /// and similar queries.
183 
184  // Hack to keep track of the inverse of FindCallSeqStart without more crazy
185  // DAG crawling.
186  DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
187 
188 public:
189  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
190  SchedulingPriorityQueue *availqueue,
191  CodeGenOpt::Level OptLevel)
192  : ScheduleDAGSDNodes(mf),
193  NeedLatency(needlatency), AvailableQueue(availqueue),
194  Topo(SUnits, nullptr) {
195  const TargetSubtargetInfo &STI = mf.getSubtarget();
196  if (DisableSchedCycles || !NeedLatency)
197  HazardRec = new ScheduleHazardRecognizer();
198  else
199  HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
200  }
201 
202  ~ScheduleDAGRRList() override {
203  delete HazardRec;
204  delete AvailableQueue;
205  }
206 
207  void Schedule() override;
208 
209  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
210 
211  /// IsReachable - Checks if SU is reachable from TargetSU.
212  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
213  return Topo.IsReachable(SU, TargetSU);
214  }
215 
216  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
217  /// create a cycle.
218  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
219  return Topo.WillCreateCycle(SU, TargetSU);
220  }
221 
222  /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
223  /// This returns true if this is a new predecessor.
224  /// Does *NOT* update the topological ordering! It just queues an update.
225  void AddPredQueued(SUnit *SU, const SDep &D) {
226  Topo.AddPredQueued(SU, D.getSUnit());
227  SU->addPred(D);
228  }
229 
230  /// AddPred - adds a predecessor edge to SUnit SU.
231  /// This returns true if this is a new predecessor.
232  /// Updates the topological ordering if required.
233  void AddPred(SUnit *SU, const SDep &D) {
234  Topo.AddPred(SU, D.getSUnit());
235  SU->addPred(D);
236  }
237 
238  /// RemovePred - removes a predecessor edge from SUnit SU.
239  /// This returns true if an edge was removed.
240  /// Updates the topological ordering if required.
241  void RemovePred(SUnit *SU, const SDep &D) {
242  Topo.RemovePred(SU, D.getSUnit());
243  SU->removePred(D);
244  }
245 
246 private:
247  bool isReady(SUnit *SU) {
248  return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
249  AvailableQueue->isReady(SU);
250  }
251 
252  void ReleasePred(SUnit *SU, const SDep *PredEdge);
253  void ReleasePredecessors(SUnit *SU);
254  void ReleasePending();
255  void AdvanceToCycle(unsigned NextCycle);
256  void AdvancePastStalls(SUnit *SU);
257  void EmitNode(SUnit *SU);
258  void ScheduleNodeBottomUp(SUnit*);
259  void CapturePred(SDep *PredEdge);
260  void UnscheduleNodeBottomUp(SUnit*);
261  void RestoreHazardCheckerBottomUp();
262  void BacktrackBottomUp(SUnit*, SUnit*);
263  SUnit *TryUnfoldSU(SUnit *);
264  SUnit *CopyAndMoveSuccessors(SUnit*);
265  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
266  const TargetRegisterClass*,
267  const TargetRegisterClass*,
269  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
270 
271  void releaseInterferences(unsigned Reg = 0);
272 
273  SUnit *PickNodeToScheduleBottomUp();
274  void ListScheduleBottomUp();
275 
276  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
277  SUnit *CreateNewSUnit(SDNode *N) {
278  unsigned NumSUnits = SUnits.size();
279  SUnit *NewNode = newSUnit(N);
280  // Update the topological ordering.
281  if (NewNode->NodeNum >= NumSUnits)
282  Topo.MarkDirty();
283  return NewNode;
284  }
285 
286  /// CreateClone - Creates a new SUnit from an existing one.
287  SUnit *CreateClone(SUnit *N) {
288  unsigned NumSUnits = SUnits.size();
289  SUnit *NewNode = Clone(N);
290  // Update the topological ordering.
291  if (NewNode->NodeNum >= NumSUnits)
292  Topo.MarkDirty();
293  return NewNode;
294  }
295 
296  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
297  /// need actual latency information but the hybrid scheduler does.
298  bool forceUnitLatencies() const override {
299  return !NeedLatency;
300  }
301 };
302 
303 } // end anonymous namespace
304 
305 /// GetCostForDef - Looks up the register class and cost for a given definition.
306 /// Typically this just means looking up the representative register class,
307 /// but for untyped values (MVT::Untyped) it means inspecting the node's
308 /// opcode to determine what register class is being generated.
309 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
310  const TargetLowering *TLI,
311  const TargetInstrInfo *TII,
312  const TargetRegisterInfo *TRI,
313  unsigned &RegClass, unsigned &Cost,
314  const MachineFunction &MF) {
315  MVT VT = RegDefPos.GetValue();
316 
317  // Special handling for untyped values. These values can only come from
318  // the expansion of custom DAG-to-DAG patterns.
319  if (VT == MVT::Untyped) {
320  const SDNode *Node = RegDefPos.GetNode();
321 
322  // Special handling for CopyFromReg of untyped values.
323  if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
324  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
325  const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
326  RegClass = RC->getID();
327  Cost = 1;
328  return;
329  }
330 
331  unsigned Opcode = Node->getMachineOpcode();
332  if (Opcode == TargetOpcode::REG_SEQUENCE) {
333  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
334  const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
335  RegClass = RC->getID();
336  Cost = 1;
337  return;
338  }
339 
340  unsigned Idx = RegDefPos.GetIdx();
341  const MCInstrDesc Desc = TII->get(Opcode);
342  const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
343  RegClass = RC->getID();
344  // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
345  // better way to determine it.
346  Cost = 1;
347  } else {
348  RegClass = TLI->getRepRegClassFor(VT)->getID();
349  Cost = TLI->getRepRegClassCostFor(VT);
350  }
351 }
352 
353 /// Schedule - Schedule the DAG using list scheduling.
354 void ScheduleDAGRRList::Schedule() {
355  LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
356  << " '" << BB->getName() << "' **********\n");
357 
358  CurCycle = 0;
359  IssueCount = 0;
360  MinAvailableCycle =
362  NumLiveRegs = 0;
363  // Allocate slots for each physical register, plus one for a special register
364  // to track the virtual resource of a calling sequence.
365  LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
366  LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
367  CallSeqEndForStart.clear();
368  assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
369 
370  // Build the scheduling graph.
371  BuildSchedGraph(nullptr);
372 
373  LLVM_DEBUG(dump());
374  Topo.MarkDirty();
375 
376  AvailableQueue->initNodes(SUnits);
377 
378  HazardRec->Reset();
379 
380  // Execute the actual scheduling loop.
381  ListScheduleBottomUp();
382 
383  AvailableQueue->releaseState();
384 
385  LLVM_DEBUG({
386  dbgs() << "*** Final schedule ***\n";
387  dumpSchedule();
388  dbgs() << '\n';
389  });
390 }
391 
392 //===----------------------------------------------------------------------===//
393 // Bottom-Up Scheduling
394 //===----------------------------------------------------------------------===//
395 
396 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
397 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
398 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
399  SUnit *PredSU = PredEdge->getSUnit();
400 
401 #ifndef NDEBUG
402  if (PredSU->NumSuccsLeft == 0) {
403  dbgs() << "*** Scheduling failed! ***\n";
404  dumpNode(*PredSU);
405  dbgs() << " has been released too many times!\n";
406  llvm_unreachable(nullptr);
407  }
408 #endif
409  --PredSU->NumSuccsLeft;
410 
411  if (!forceUnitLatencies()) {
412  // Updating predecessor's height. This is now the cycle when the
413  // predecessor can be scheduled without causing a pipeline stall.
414  PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
415  }
416 
417  // If all the node's successors are scheduled, this node is ready
418  // to be scheduled. Ignore the special EntrySU node.
419  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
420  PredSU->isAvailable = true;
421 
422  unsigned Height = PredSU->getHeight();
423  if (Height < MinAvailableCycle)
424  MinAvailableCycle = Height;
425 
426  if (isReady(PredSU)) {
427  AvailableQueue->push(PredSU);
428  }
429  // CapturePred and others may have left the node in the pending queue, avoid
430  // adding it twice.
431  else if (!PredSU->isPending) {
432  PredSU->isPending = true;
433  PendingQueue.push_back(PredSU);
434  }
435  }
436 }
437 
438 /// IsChainDependent - Test if Outer is reachable from Inner through
439 /// chain dependencies.
440 static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
441  unsigned NestLevel,
442  const TargetInstrInfo *TII) {
443  SDNode *N = Outer;
444  while (true) {
445  if (N == Inner)
446  return true;
447  // For a TokenFactor, examine each operand. There may be multiple ways
448  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
449  // most nesting in order to ensure that we find the corresponding match.
450  if (N->getOpcode() == ISD::TokenFactor) {
451  for (const SDValue &Op : N->op_values())
452  if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
453  return true;
454  return false;
455  }
456  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
457  if (N->isMachineOpcode()) {
458  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
459  ++NestLevel;
460  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
461  if (NestLevel == 0)
462  return false;
463  --NestLevel;
464  }
465  }
466  // Otherwise, find the chain and continue climbing.
467  for (const SDValue &Op : N->op_values())
468  if (Op.getValueType() == MVT::Other) {
469  N = Op.getNode();
470  goto found_chain_operand;
471  }
472  return false;
473  found_chain_operand:;
474  if (N->getOpcode() == ISD::EntryToken)
475  return false;
476  }
477 }
478 
479 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
480 /// the corresponding (lowered) CALLSEQ_BEGIN node.
481 ///
482 /// NestLevel and MaxNested are used in recursion to indcate the current level
483 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
484 /// level seen so far.
485 ///
486 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point
487 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
488 static SDNode *
489 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
490  const TargetInstrInfo *TII) {
491  while (true) {
492  // For a TokenFactor, examine each operand. There may be multiple ways
493  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
494  // most nesting in order to ensure that we find the corresponding match.
495  if (N->getOpcode() == ISD::TokenFactor) {
496  SDNode *Best = nullptr;
497  unsigned BestMaxNest = MaxNest;
498  for (const SDValue &Op : N->op_values()) {
499  unsigned MyNestLevel = NestLevel;
500  unsigned MyMaxNest = MaxNest;
501  if (SDNode *New = FindCallSeqStart(Op.getNode(),
502  MyNestLevel, MyMaxNest, TII))
503  if (!Best || (MyMaxNest > BestMaxNest)) {
504  Best = New;
505  BestMaxNest = MyMaxNest;
506  }
507  }
508  assert(Best);
509  MaxNest = BestMaxNest;
510  return Best;
511  }
512  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
513  if (N->isMachineOpcode()) {
514  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
515  ++NestLevel;
516  MaxNest = std::max(MaxNest, NestLevel);
517  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
518  assert(NestLevel != 0);
519  --NestLevel;
520  if (NestLevel == 0)
521  return N;
522  }
523  }
524  // Otherwise, find the chain and continue climbing.
525  for (const SDValue &Op : N->op_values())
526  if (Op.getValueType() == MVT::Other) {
527  N = Op.getNode();
528  goto found_chain_operand;
529  }
530  return nullptr;
531  found_chain_operand:;
532  if (N->getOpcode() == ISD::EntryToken)
533  return nullptr;
534  }
535 }
536 
537 /// Call ReleasePred for each predecessor, then update register live def/gen.
538 /// Always update LiveRegDefs for a register dependence even if the current SU
539 /// also defines the register. This effectively create one large live range
540 /// across a sequence of two-address node. This is important because the
541 /// entire chain must be scheduled together. Example:
542 ///
543 /// flags = (3) add
544 /// flags = (2) addc flags
545 /// flags = (1) addc flags
546 ///
547 /// results in
548 ///
549 /// LiveRegDefs[flags] = 3
550 /// LiveRegGens[flags] = 1
551 ///
552 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
553 /// interference on flags.
554 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
555  // Bottom up: release predecessors
556  for (SDep &Pred : SU->Preds) {
557  ReleasePred(SU, &Pred);
558  if (Pred.isAssignedRegDep()) {
559  // This is a physical register dependency and it's impossible or
560  // expensive to copy the register. Make sure nothing that can
561  // clobber the register is scheduled between the predecessor and
562  // this node.
563  SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
564  assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
565  "interference on register dependence");
566  LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
567  if (!LiveRegGens[Pred.getReg()]) {
568  ++NumLiveRegs;
569  LiveRegGens[Pred.getReg()] = SU;
570  }
571  }
572  }
573 
574  // If we're scheduling a lowered CALLSEQ_END, find the corresponding
575  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
576  // these nodes, to prevent other calls from being interscheduled with them.
577  unsigned CallResource = TRI->getNumRegs();
578  if (!LiveRegDefs[CallResource])
579  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
580  if (Node->isMachineOpcode() &&
581  Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
582  unsigned NestLevel = 0;
583  unsigned MaxNest = 0;
584  SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
585  assert(N && "Must find call sequence start");
586 
587  SUnit *Def = &SUnits[N->getNodeId()];
588  CallSeqEndForStart[Def] = SU;
589 
590  ++NumLiveRegs;
591  LiveRegDefs[CallResource] = Def;
592  LiveRegGens[CallResource] = SU;
593  break;
594  }
595 }
596 
597 /// Check to see if any of the pending instructions are ready to issue. If
598 /// so, add them to the available queue.
599 void ScheduleDAGRRList::ReleasePending() {
600  if (DisableSchedCycles) {
601  assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
602  return;
603  }
604 
605  // If the available queue is empty, it is safe to reset MinAvailableCycle.
606  if (AvailableQueue->empty())
607  MinAvailableCycle = std::numeric_limits<unsigned>::max();
608 
609  // Check to see if any of the pending instructions are ready to issue. If
610  // so, add them to the available queue.
611  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
612  unsigned ReadyCycle = PendingQueue[i]->getHeight();
613  if (ReadyCycle < MinAvailableCycle)
614  MinAvailableCycle = ReadyCycle;
615 
616  if (PendingQueue[i]->isAvailable) {
617  if (!isReady(PendingQueue[i]))
618  continue;
619  AvailableQueue->push(PendingQueue[i]);
620  }
621  PendingQueue[i]->isPending = false;
622  PendingQueue[i] = PendingQueue.back();
623  PendingQueue.pop_back();
624  --i; --e;
625  }
626 }
627 
628 /// Move the scheduler state forward by the specified number of Cycles.
629 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
630  if (NextCycle <= CurCycle)
631  return;
632 
633  IssueCount = 0;
634  AvailableQueue->setCurCycle(NextCycle);
635  if (!HazardRec->isEnabled()) {
636  // Bypass lots of virtual calls in case of long latency.
637  CurCycle = NextCycle;
638  }
639  else {
640  for (; CurCycle != NextCycle; ++CurCycle) {
641  HazardRec->RecedeCycle();
642  }
643  }
644  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
645  // available Q to release pending nodes at least once before popping.
646  ReleasePending();
647 }
648 
649 /// Move the scheduler state forward until the specified node's dependents are
650 /// ready and can be scheduled with no resource conflicts.
651 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
652  if (DisableSchedCycles)
653  return;
654 
655  // FIXME: Nodes such as CopyFromReg probably should not advance the current
656  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
657  // has predecessors the cycle will be advanced when they are scheduled.
658  // But given the crude nature of modeling latency though such nodes, we
659  // currently need to treat these nodes like real instructions.
660  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
661 
662  unsigned ReadyCycle = SU->getHeight();
663 
664  // Bump CurCycle to account for latency. We assume the latency of other
665  // available instructions may be hidden by the stall (not a full pipe stall).
666  // This updates the hazard recognizer's cycle before reserving resources for
667  // this instruction.
668  AdvanceToCycle(ReadyCycle);
669 
670  // Calls are scheduled in their preceding cycle, so don't conflict with
671  // hazards from instructions after the call. EmitNode will reset the
672  // scoreboard state before emitting the call.
673  if (SU->isCall)
674  return;
675 
676  // FIXME: For resource conflicts in very long non-pipelined stages, we
677  // should probably skip ahead here to avoid useless scoreboard checks.
678  int Stalls = 0;
679  while (true) {
681  HazardRec->getHazardType(SU, -Stalls);
682 
684  break;
685 
686  ++Stalls;
687  }
688  AdvanceToCycle(CurCycle + Stalls);
689 }
690 
691 /// Record this SUnit in the HazardRecognizer.
692 /// Does not update CurCycle.
693 void ScheduleDAGRRList::EmitNode(SUnit *SU) {
694  if (!HazardRec->isEnabled())
695  return;
696 
697  // Check for phys reg copy.
698  if (!SU->getNode())
699  return;
700 
701  switch (SU->getNode()->getOpcode()) {
702  default:
703  assert(SU->getNode()->isMachineOpcode() &&
704  "This target-independent node should not be scheduled.");
705  break;
706  case ISD::MERGE_VALUES:
707  case ISD::TokenFactor:
708  case ISD::LIFETIME_START:
709  case ISD::LIFETIME_END:
710  case ISD::CopyToReg:
711  case ISD::CopyFromReg:
712  case ISD::EH_LABEL:
713  // Noops don't affect the scoreboard state. Copies are likely to be
714  // removed.
715  return;
716  case ISD::INLINEASM:
717  case ISD::INLINEASM_BR:
718  // For inline asm, clear the pipeline state.
719  HazardRec->Reset();
720  return;
721  }
722  if (SU->isCall) {
723  // Calls are scheduled with their preceding instructions. For bottom-up
724  // scheduling, clear the pipeline state before emitting.
725  HazardRec->Reset();
726  }
727 
728  HazardRec->EmitInstruction(SU);
729 }
730 
731 static void resetVRegCycle(SUnit *SU);
732 
733 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
734 /// count of its predecessors. If a predecessor pending count is zero, add it to
735 /// the Available queue.
736 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
737  LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
738  LLVM_DEBUG(dumpNode(*SU));
739 
740 #ifndef NDEBUG
741  if (CurCycle < SU->getHeight())
742  LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
743  << "] pipeline stall!\n");
744 #endif
745 
746  // FIXME: Do not modify node height. It may interfere with
747  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
748  // node its ready cycle can aid heuristics, and after scheduling it can
749  // indicate the scheduled cycle.
750  SU->setHeightToAtLeast(CurCycle);
751 
752  // Reserve resources for the scheduled instruction.
753  EmitNode(SU);
754 
755  Sequence.push_back(SU);
756 
757  AvailableQueue->scheduledNode(SU);
758 
759  // If HazardRec is disabled, and each inst counts as one cycle, then
760  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
761  // PendingQueue for schedulers that implement HasReadyFilter.
762  if (!HazardRec->isEnabled() && AvgIPC < 2)
763  AdvanceToCycle(CurCycle + 1);
764 
765  // Update liveness of predecessors before successors to avoid treating a
766  // two-address node as a live range def.
767  ReleasePredecessors(SU);
768 
769  // Release all the implicit physical register defs that are live.
770  for (SDep &Succ : SU->Succs) {
771  // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
772  if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
773  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
774  --NumLiveRegs;
775  LiveRegDefs[Succ.getReg()] = nullptr;
776  LiveRegGens[Succ.getReg()] = nullptr;
777  releaseInterferences(Succ.getReg());
778  }
779  }
780  // Release the special call resource dependence, if this is the beginning
781  // of a call.
782  unsigned CallResource = TRI->getNumRegs();
783  if (LiveRegDefs[CallResource] == SU)
784  for (const SDNode *SUNode = SU->getNode(); SUNode;
785  SUNode = SUNode->getGluedNode()) {
786  if (SUNode->isMachineOpcode() &&
787  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
788  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
789  --NumLiveRegs;
790  LiveRegDefs[CallResource] = nullptr;
791  LiveRegGens[CallResource] = nullptr;
792  releaseInterferences(CallResource);
793  }
794  }
795 
796  resetVRegCycle(SU);
797 
798  SU->isScheduled = true;
799 
800  // Conditions under which the scheduler should eagerly advance the cycle:
801  // (1) No available instructions
802  // (2) All pipelines full, so available instructions must have hazards.
803  //
804  // If HazardRec is disabled, the cycle was pre-advanced before calling
805  // ReleasePredecessors. In that case, IssueCount should remain 0.
806  //
807  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
808  if (HazardRec->isEnabled() || AvgIPC > 1) {
809  if (SU->getNode() && SU->getNode()->isMachineOpcode())
810  ++IssueCount;
811  if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
812  || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
813  AdvanceToCycle(CurCycle + 1);
814  }
815 }
816 
817 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
818 /// unscheduled, increase the succ left count of its predecessors. Remove
819 /// them from AvailableQueue if necessary.
820 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
821  SUnit *PredSU = PredEdge->getSUnit();
822  if (PredSU->isAvailable) {
823  PredSU->isAvailable = false;
824  if (!PredSU->isPending)
825  AvailableQueue->remove(PredSU);
826  }
827 
829  "NumSuccsLeft will overflow!");
830  ++PredSU->NumSuccsLeft;
831 }
832 
833 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
834 /// its predecessor states to reflect the change.
835 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
836  LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
837  LLVM_DEBUG(dumpNode(*SU));
838 
839  for (SDep &Pred : SU->Preds) {
840  CapturePred(&Pred);
841  if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
842  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
843  assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
844  "Physical register dependency violated?");
845  --NumLiveRegs;
846  LiveRegDefs[Pred.getReg()] = nullptr;
847  LiveRegGens[Pred.getReg()] = nullptr;
848  releaseInterferences(Pred.getReg());
849  }
850  }
851 
852  // Reclaim the special call resource dependence, if this is the beginning
853  // of a call.
854  unsigned CallResource = TRI->getNumRegs();
855  for (const SDNode *SUNode = SU->getNode(); SUNode;
856  SUNode = SUNode->getGluedNode()) {
857  if (SUNode->isMachineOpcode() &&
858  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
859  SUnit *SeqEnd = CallSeqEndForStart[SU];
860  assert(SeqEnd && "Call sequence start/end must be known");
861  assert(!LiveRegDefs[CallResource]);
862  assert(!LiveRegGens[CallResource]);
863  ++NumLiveRegs;
864  LiveRegDefs[CallResource] = SU;
865  LiveRegGens[CallResource] = SeqEnd;
866  }
867  }
868 
869  // Release the special call resource dependence, if this is the end
870  // of a call.
871  if (LiveRegGens[CallResource] == SU)
872  for (const SDNode *SUNode = SU->getNode(); SUNode;
873  SUNode = SUNode->getGluedNode()) {
874  if (SUNode->isMachineOpcode() &&
875  SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
876  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
877  assert(LiveRegDefs[CallResource]);
878  assert(LiveRegGens[CallResource]);
879  --NumLiveRegs;
880  LiveRegDefs[CallResource] = nullptr;
881  LiveRegGens[CallResource] = nullptr;
882  releaseInterferences(CallResource);
883  }
884  }
885 
886  for (auto &Succ : SU->Succs) {
887  if (Succ.isAssignedRegDep()) {
888  auto Reg = Succ.getReg();
889  if (!LiveRegDefs[Reg])
890  ++NumLiveRegs;
891  // This becomes the nearest def. Note that an earlier def may still be
892  // pending if this is a two-address node.
893  LiveRegDefs[Reg] = SU;
894 
895  // Update LiveRegGen only if was empty before this unscheduling.
896  // This is to avoid incorrect updating LiveRegGen set in previous run.
897  if (!LiveRegGens[Reg]) {
898  // Find the successor with the lowest height.
899  LiveRegGens[Reg] = Succ.getSUnit();
900  for (auto &Succ2 : SU->Succs) {
901  if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
902  Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
903  LiveRegGens[Reg] = Succ2.getSUnit();
904  }
905  }
906  }
907  }
908  if (SU->getHeight() < MinAvailableCycle)
909  MinAvailableCycle = SU->getHeight();
910 
911  SU->setHeightDirty();
912  SU->isScheduled = false;
913  SU->isAvailable = true;
914  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
915  // Don't make available until backtracking is complete.
916  SU->isPending = true;
917  PendingQueue.push_back(SU);
918  }
919  else {
920  AvailableQueue->push(SU);
921  }
922  AvailableQueue->unscheduledNode(SU);
923 }
924 
925 /// After backtracking, the hazard checker needs to be restored to a state
926 /// corresponding the current cycle.
927 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
928  HazardRec->Reset();
929 
930  unsigned LookAhead = std::min((unsigned)Sequence.size(),
931  HazardRec->getMaxLookAhead());
932  if (LookAhead == 0)
933  return;
934 
935  std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
936  unsigned HazardCycle = (*I)->getHeight();
937  for (auto E = Sequence.end(); I != E; ++I) {
938  SUnit *SU = *I;
939  for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
940  HazardRec->RecedeCycle();
941  }
942  EmitNode(SU);
943  }
944 }
945 
946 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
947 /// BTCycle in order to schedule a specific node.
948 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
949  SUnit *OldSU = Sequence.back();
950  while (true) {
951  Sequence.pop_back();
952  // FIXME: use ready cycle instead of height
953  CurCycle = OldSU->getHeight();
954  UnscheduleNodeBottomUp(OldSU);
955  AvailableQueue->setCurCycle(CurCycle);
956  if (OldSU == BtSU)
957  break;
958  OldSU = Sequence.back();
959  }
960 
961  assert(!SU->isSucc(OldSU) && "Something is wrong!");
962 
963  RestoreHazardCheckerBottomUp();
964 
965  ReleasePending();
966 
967  ++NumBacktracks;
968 }
969 
970 static bool isOperandOf(const SUnit *SU, SDNode *N) {
971  for (const SDNode *SUNode = SU->getNode(); SUNode;
972  SUNode = SUNode->getGluedNode()) {
973  if (SUNode->isOperandOf(N))
974  return true;
975  }
976  return false;
977 }
978 
979 /// TryUnfold - Attempt to unfold
980 SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
981  SDNode *N = SU->getNode();
982  // Use while over if to ease fall through.
983  SmallVector<SDNode *, 2> NewNodes;
984  if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
985  return nullptr;
986 
987  // unfolding an x86 DEC64m operation results in store, dec, load which
988  // can't be handled here so quit
989  if (NewNodes.size() == 3)
990  return nullptr;
991 
992  assert(NewNodes.size() == 2 && "Expected a load folding node!");
993 
994  N = NewNodes[1];
995  SDNode *LoadNode = NewNodes[0];
996  unsigned NumVals = N->getNumValues();
997  unsigned OldNumVals = SU->getNode()->getNumValues();
998 
999  // LoadNode may already exist. This can happen when there is another
1000  // load from the same location and producing the same type of value
1001  // but it has different alignment or volatileness.
1002  bool isNewLoad = true;
1003  SUnit *LoadSU;
1004  if (LoadNode->getNodeId() != -1) {
1005  LoadSU = &SUnits[LoadNode->getNodeId()];
1006  // If LoadSU has already been scheduled, we should clone it but
1007  // this would negate the benefit to unfolding so just return SU.
1008  if (LoadSU->isScheduled)
1009  return SU;
1010  isNewLoad = false;
1011  } else {
1012  LoadSU = CreateNewSUnit(LoadNode);
1013  LoadNode->setNodeId(LoadSU->NodeNum);
1014 
1015  InitNumRegDefsLeft(LoadSU);
1016  computeLatency(LoadSU);
1017  }
1018 
1019  bool isNewN = true;
1020  SUnit *NewSU;
1021  // This can only happen when isNewLoad is false.
1022  if (N->getNodeId() != -1) {
1023  NewSU = &SUnits[N->getNodeId()];
1024  // If NewSU has already been scheduled, we need to clone it, but this
1025  // negates the benefit to unfolding so just return SU.
1026  if (NewSU->isScheduled) {
1027  return SU;
1028  }
1029  isNewN = false;
1030  } else {
1031  NewSU = CreateNewSUnit(N);
1032  N->setNodeId(NewSU->NodeNum);
1033 
1034  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1035  for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1036  if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1037  NewSU->isTwoAddress = true;
1038  break;
1039  }
1040  }
1041  if (MCID.isCommutable())
1042  NewSU->isCommutable = true;
1043 
1044  InitNumRegDefsLeft(NewSU);
1045  computeLatency(NewSU);
1046  }
1047 
1048  LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
1049 
1050  // Now that we are committed to unfolding replace DAG Uses.
1051  for (unsigned i = 0; i != NumVals; ++i)
1052  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
1053  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
1054  SDValue(LoadNode, 1));
1055 
1056  // Record all the edges to and from the old SU, by category.
1057  SmallVector<SDep, 4> ChainPreds;
1058  SmallVector<SDep, 4> ChainSuccs;
1059  SmallVector<SDep, 4> LoadPreds;
1060  SmallVector<SDep, 4> NodePreds;
1061  SmallVector<SDep, 4> NodeSuccs;
1062  for (SDep &Pred : SU->Preds) {
1063  if (Pred.isCtrl())
1064  ChainPreds.push_back(Pred);
1065  else if (isOperandOf(Pred.getSUnit(), LoadNode))
1066  LoadPreds.push_back(Pred);
1067  else
1068  NodePreds.push_back(Pred);
1069  }
1070  for (SDep &Succ : SU->Succs) {
1071  if (Succ.isCtrl())
1072  ChainSuccs.push_back(Succ);
1073  else
1074  NodeSuccs.push_back(Succ);
1075  }
1076 
1077  // Now assign edges to the newly-created nodes.
1078  for (const SDep &Pred : ChainPreds) {
1079  RemovePred(SU, Pred);
1080  if (isNewLoad)
1081  AddPredQueued(LoadSU, Pred);
1082  }
1083  for (const SDep &Pred : LoadPreds) {
1084  RemovePred(SU, Pred);
1085  if (isNewLoad)
1086  AddPredQueued(LoadSU, Pred);
1087  }
1088  for (const SDep &Pred : NodePreds) {
1089  RemovePred(SU, Pred);
1090  AddPredQueued(NewSU, Pred);
1091  }
1092  for (SDep D : NodeSuccs) {
1093  SUnit *SuccDep = D.getSUnit();
1094  D.setSUnit(SU);
1095  RemovePred(SuccDep, D);
1096  D.setSUnit(NewSU);
1097  AddPredQueued(SuccDep, D);
1098  // Balance register pressure.
1099  if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
1100  !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1101  --NewSU->NumRegDefsLeft;
1102  }
1103  for (SDep D : ChainSuccs) {
1104  SUnit *SuccDep = D.getSUnit();
1105  D.setSUnit(SU);
1106  RemovePred(SuccDep, D);
1107  if (isNewLoad) {
1108  D.setSUnit(LoadSU);
1109  AddPredQueued(SuccDep, D);
1110  }
1111  }
1112 
1113  // Add a data dependency to reflect that NewSU reads the value defined
1114  // by LoadSU.
1115  SDep D(LoadSU, SDep::Data, 0);
1116  D.setLatency(LoadSU->Latency);
1117  AddPredQueued(NewSU, D);
1118 
1119  if (isNewLoad)
1120  AvailableQueue->addNode(LoadSU);
1121  if (isNewN)
1122  AvailableQueue->addNode(NewSU);
1123 
1124  ++NumUnfolds;
1125 
1126  if (NewSU->NumSuccsLeft == 0)
1127  NewSU->isAvailable = true;
1128 
1129  return NewSU;
1130 }
1131 
1132 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1133 /// successors to the newly created node.
1134 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
1135  SDNode *N = SU->getNode();
1136  if (!N)
1137  return nullptr;
1138 
1139  LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
1140  LLVM_DEBUG(dumpNode(*SU));
1141 
1142  if (N->getGluedNode() &&
1143  !TII->canCopyGluedNodeDuringSchedule(N)) {
1144  LLVM_DEBUG(
1145  dbgs()
1146  << "Giving up because it has incoming glue and the target does not "
1147  "want to copy it\n");
1148  return nullptr;
1149  }
1150 
1151  SUnit *NewSU;
1152  bool TryUnfold = false;
1153  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1154  MVT VT = N->getSimpleValueType(i);
1155  if (VT == MVT::Glue) {
1156  LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
1157  return nullptr;
1158  } else if (VT == MVT::Other)
1159  TryUnfold = true;
1160  }
1161  for (const SDValue &Op : N->op_values()) {
1162  MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
1163  if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
1164  LLVM_DEBUG(
1165  dbgs() << "Giving up because it one of the operands is glue and "
1166  "the target does not want to copy it\n");
1167  return nullptr;
1168  }
1169  }
1170 
1171  // If possible unfold instruction.
1172  if (TryUnfold) {
1173  SUnit *UnfoldSU = TryUnfoldSU(SU);
1174  if (!UnfoldSU)
1175  return nullptr;
1176  SU = UnfoldSU;
1177  N = SU->getNode();
1178  // If this can be scheduled don't bother duplicating and just return
1179  if (SU->NumSuccsLeft == 0)
1180  return SU;
1181  }
1182 
1183  LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1184  NewSU = CreateClone(SU);
1185 
1186  // New SUnit has the exact same predecessors.
1187  for (SDep &Pred : SU->Preds)
1188  if (!Pred.isArtificial())
1189  AddPredQueued(NewSU, Pred);
1190 
1191  // Only copy scheduled successors. Cut them from old node's successor
1192  // list and move them over.
1194  for (SDep &Succ : SU->Succs) {
1195  if (Succ.isArtificial())
1196  continue;
1197  SUnit *SuccSU = Succ.getSUnit();
1198  if (SuccSU->isScheduled) {
1199  SDep D = Succ;
1200  D.setSUnit(NewSU);
1201  AddPredQueued(SuccSU, D);
1202  D.setSUnit(SU);
1203  DelDeps.push_back(std::make_pair(SuccSU, D));
1204  }
1205  }
1206  for (auto &DelDep : DelDeps)
1207  RemovePred(DelDep.first, DelDep.second);
1208 
1209  AvailableQueue->updateNode(SU);
1210  AvailableQueue->addNode(NewSU);
1211 
1212  ++NumDups;
1213  return NewSU;
1214 }
1215 
1216 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
1217 /// scheduled successors of the given SUnit to the last copy.
1218 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1219  const TargetRegisterClass *DestRC,
1220  const TargetRegisterClass *SrcRC,
1222  SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1223  CopyFromSU->CopySrcRC = SrcRC;
1224  CopyFromSU->CopyDstRC = DestRC;
1225 
1226  SUnit *CopyToSU = CreateNewSUnit(nullptr);
1227  CopyToSU->CopySrcRC = DestRC;
1228  CopyToSU->CopyDstRC = SrcRC;
1229 
1230  // Only copy scheduled successors. Cut them from old node's successor
1231  // list and move them over.
1233  for (SDep &Succ : SU->Succs) {
1234  if (Succ.isArtificial())
1235  continue;
1236  SUnit *SuccSU = Succ.getSUnit();
1237  if (SuccSU->isScheduled) {
1238  SDep D = Succ;
1239  D.setSUnit(CopyToSU);
1240  AddPredQueued(SuccSU, D);
1241  DelDeps.push_back(std::make_pair(SuccSU, Succ));
1242  }
1243  else {
1244  // Avoid scheduling the def-side copy before other successors. Otherwise
1245  // we could introduce another physreg interference on the copy and
1246  // continue inserting copies indefinitely.
1247  AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1248  }
1249  }
1250  for (auto &DelDep : DelDeps)
1251  RemovePred(DelDep.first, DelDep.second);
1252 
1253  SDep FromDep(SU, SDep::Data, Reg);
1254  FromDep.setLatency(SU->Latency);
1255  AddPredQueued(CopyFromSU, FromDep);
1256  SDep ToDep(CopyFromSU, SDep::Data, 0);
1257  ToDep.setLatency(CopyFromSU->Latency);
1258  AddPredQueued(CopyToSU, ToDep);
1259 
1260  AvailableQueue->updateNode(SU);
1261  AvailableQueue->addNode(CopyFromSU);
1262  AvailableQueue->addNode(CopyToSU);
1263  Copies.push_back(CopyFromSU);
1264  Copies.push_back(CopyToSU);
1265 
1266  ++NumPRCopies;
1267 }
1268 
1269 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
1270 /// definition of the specified node.
1271 /// FIXME: Move to SelectionDAG?
1272 static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1273  const TargetInstrInfo *TII) {
1274  unsigned NumRes;
1275  if (N->getOpcode() == ISD::CopyFromReg) {
1276  // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1277  NumRes = 1;
1278  } else {
1279  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1280  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1281  NumRes = MCID.getNumDefs();
1282  for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1283  if (Reg == *ImpDef)
1284  break;
1285  ++NumRes;
1286  }
1287  }
1288  return N->getSimpleValueType(NumRes);
1289 }
1290 
1291 /// CheckForLiveRegDef - Return true and update live register vector if the
1292 /// specified register def of the specified SUnit clobbers any "live" registers.
1293 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1294  SUnit **LiveRegDefs,
1295  SmallSet<unsigned, 4> &RegAdded,
1297  const TargetRegisterInfo *TRI) {
1298  for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1299 
1300  // Check if Ref is live.
1301  if (!LiveRegDefs[*AliasI]) continue;
1302 
1303  // Allow multiple uses of the same def.
1304  if (LiveRegDefs[*AliasI] == SU) continue;
1305 
1306  // Add Reg to the set of interfering live regs.
1307  if (RegAdded.insert(*AliasI).second) {
1308  LRegs.push_back(*AliasI);
1309  }
1310  }
1311 }
1312 
1313 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1314 /// by RegMask, and add them to LRegs.
1315 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1316  ArrayRef<SUnit*> LiveRegDefs,
1317  SmallSet<unsigned, 4> &RegAdded,
1318  SmallVectorImpl<unsigned> &LRegs) {
1319  // Look at all live registers. Skip Reg0 and the special CallResource.
1320  for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1321  if (!LiveRegDefs[i]) continue;
1322  if (LiveRegDefs[i] == SU) continue;
1323  if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1324  if (RegAdded.insert(i).second)
1325  LRegs.push_back(i);
1326  }
1327 }
1328 
1329 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1330 static const uint32_t *getNodeRegMask(const SDNode *N) {
1331  for (const SDValue &Op : N->op_values())
1332  if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1333  return RegOp->getRegMask();
1334  return nullptr;
1335 }
1336 
1337 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1338 /// scheduling of the given node to satisfy live physical register dependencies.
1339 /// If the specific node is the last one that's available to schedule, do
1340 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1341 bool ScheduleDAGRRList::
1342 DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1343  if (NumLiveRegs == 0)
1344  return false;
1345 
1346  SmallSet<unsigned, 4> RegAdded;
1347  // If this node would clobber any "live" register, then it's not ready.
1348  //
1349  // If SU is the currently live definition of the same register that it uses,
1350  // then we are free to schedule it.
1351  for (SDep &Pred : SU->Preds) {
1352  if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
1353  CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
1354  RegAdded, LRegs, TRI);
1355  }
1356 
1357  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1358  if (Node->getOpcode() == ISD::INLINEASM ||
1359  Node->getOpcode() == ISD::INLINEASM_BR) {
1360  // Inline asm can clobber physical defs.
1361  unsigned NumOps = Node->getNumOperands();
1362  if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1363  --NumOps; // Ignore the glue operand.
1364 
1365  for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1366  unsigned Flags =
1367  cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1368  unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1369 
1370  ++i; // Skip the ID value.
1371  if (InlineAsm::isRegDefKind(Flags) ||
1373  InlineAsm::isClobberKind(Flags)) {
1374  // Check for def of register or earlyclobber register.
1375  for (; NumVals; --NumVals, ++i) {
1376  unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1378  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1379  }
1380  } else
1381  i += NumVals;
1382  }
1383  continue;
1384  }
1385 
1386  if (!Node->isMachineOpcode())
1387  continue;
1388  // If we're in the middle of scheduling a call, don't begin scheduling
1389  // another call. Also, don't allow any physical registers to be live across
1390  // the call.
1391  if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
1392  // Check the special calling-sequence resource.
1393  unsigned CallResource = TRI->getNumRegs();
1394  if (LiveRegDefs[CallResource]) {
1395  SDNode *Gen = LiveRegGens[CallResource]->getNode();
1396  while (SDNode *Glued = Gen->getGluedNode())
1397  Gen = Glued;
1398  if (!IsChainDependent(Gen, Node, 0, TII) &&
1399  RegAdded.insert(CallResource).second)
1400  LRegs.push_back(CallResource);
1401  }
1402  }
1403  if (const uint32_t *RegMask = getNodeRegMask(Node))
1404  CheckForLiveRegDefMasked(SU, RegMask,
1405  makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1406  RegAdded, LRegs);
1407 
1408  const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1409  if (MCID.hasOptionalDef()) {
1410  // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1411  // This operand can be either a def of CPSR, if the S bit is set; or a use
1412  // of %noreg. When the OptionalDef is set to a valid register, we need to
1413  // handle it in the same way as an ImplicitDef.
1414  for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
1415  if (MCID.OpInfo[i].isOptionalDef()) {
1416  const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
1417  unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
1418  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1419  }
1420  }
1421  if (!MCID.ImplicitDefs)
1422  continue;
1423  for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
1424  CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1425  }
1426 
1427  return !LRegs.empty();
1428 }
1429 
1430 void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1431  // Add the nodes that aren't ready back onto the available list.
1432  for (unsigned i = Interferences.size(); i > 0; --i) {
1433  SUnit *SU = Interferences[i-1];
1434  LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1435  if (Reg) {
1436  SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1437  if (!is_contained(LRegs, Reg))
1438  continue;
1439  }
1440  SU->isPending = false;
1441  // The interfering node may no longer be available due to backtracking.
1442  // Furthermore, it may have been made available again, in which case it is
1443  // now already in the AvailableQueue.
1444  if (SU->isAvailable && !SU->NodeQueueId) {
1445  LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
1446  AvailableQueue->push(SU);
1447  }
1448  if (i < Interferences.size())
1449  Interferences[i-1] = Interferences.back();
1450  Interferences.pop_back();
1451  LRegsMap.erase(LRegsPos);
1452  }
1453 }
1454 
1455 /// Return a node that can be scheduled in this cycle. Requirements:
1456 /// (1) Ready: latency has been satisfied
1457 /// (2) No Hazards: resources are available
1458 /// (3) No Interferences: may unschedule to break register interferences.
1459 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1460  SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1461  auto FindAvailableNode = [&]() {
1462  while (CurSU) {
1464  if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1465  break;
1466  LLVM_DEBUG(dbgs() << " Interfering reg ";
1467  if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
1468  else dbgs() << printReg(LRegs[0], TRI);
1469  dbgs() << " SU #" << CurSU->NodeNum << '\n');
1470  std::pair<LRegsMapT::iterator, bool> LRegsPair =
1471  LRegsMap.insert(std::make_pair(CurSU, LRegs));
1472  if (LRegsPair.second) {
1473  CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1474  Interferences.push_back(CurSU);
1475  }
1476  else {
1477  assert(CurSU->isPending && "Interferences are pending");
1478  // Update the interference with current live regs.
1479  LRegsPair.first->second = LRegs;
1480  }
1481  CurSU = AvailableQueue->pop();
1482  }
1483  };
1484  FindAvailableNode();
1485  if (CurSU)
1486  return CurSU;
1487 
1488  // We query the topological order in the loop body, so make sure outstanding
1489  // updates are applied before entering it (we only enter the loop if there
1490  // are some interferences). If we make changes to the ordering, we exit
1491  // the loop.
1492 
1493  // All candidates are delayed due to live physical reg dependencies.
1494  // Try backtracking, code duplication, or inserting cross class copies
1495  // to resolve it.
1496  for (SUnit *TrySU : Interferences) {
1497  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1498 
1499  // Try unscheduling up to the point where it's safe to schedule
1500  // this node.
1501  SUnit *BtSU = nullptr;
1502  unsigned LiveCycle = std::numeric_limits<unsigned>::max();
1503  for (unsigned Reg : LRegs) {
1504  if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1505  BtSU = LiveRegGens[Reg];
1506  LiveCycle = BtSU->getHeight();
1507  }
1508  }
1509  if (!WillCreateCycle(TrySU, BtSU)) {
1510  // BacktrackBottomUp mutates Interferences!
1511  BacktrackBottomUp(TrySU, BtSU);
1512 
1513  // Force the current node to be scheduled before the node that
1514  // requires the physical reg dep.
1515  if (BtSU->isAvailable) {
1516  BtSU->isAvailable = false;
1517  if (!BtSU->isPending)
1518  AvailableQueue->remove(BtSU);
1519  }
1520  LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
1521  << ") to SU(" << TrySU->NodeNum << ")\n");
1522  AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial));
1523 
1524  // If one or more successors has been unscheduled, then the current
1525  // node is no longer available.
1526  if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
1527  LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1528  CurSU = AvailableQueue->pop();
1529  } else {
1530  LLVM_DEBUG(dbgs() << "TrySU available\n");
1531  // Available and in AvailableQueue
1532  AvailableQueue->remove(TrySU);
1533  CurSU = TrySU;
1534  }
1535  FindAvailableNode();
1536  // Interferences has been mutated. We must break.
1537  break;
1538  }
1539  }
1540 
1541  if (!CurSU) {
1542  // Can't backtrack. If it's too expensive to copy the value, then try
1543  // duplicate the nodes that produces these "too expensive to copy"
1544  // values to break the dependency. In case even that doesn't work,
1545  // insert cross class copies.
1546  // If it's not too expensive, i.e. cost != -1, issue copies.
1547  SUnit *TrySU = Interferences[0];
1548  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1549  assert(LRegs.size() == 1 && "Can't handle this yet!");
1550  unsigned Reg = LRegs[0];
1551  SUnit *LRDef = LiveRegDefs[Reg];
1552  MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1553  const TargetRegisterClass *RC =
1554  TRI->getMinimalPhysRegClass(Reg, VT);
1555  const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1556 
1557  // If cross copy register class is the same as RC, then it must be possible
1558  // copy the value directly. Do not try duplicate the def.
1559  // If cross copy register class is not the same as RC, then it's possible to
1560  // copy the value but it require cross register class copies and it is
1561  // expensive.
1562  // If cross copy register class is null, then it's not possible to copy
1563  // the value at all.
1564  SUnit *NewDef = nullptr;
1565  if (DestRC != RC) {
1566  NewDef = CopyAndMoveSuccessors(LRDef);
1567  if (!DestRC && !NewDef)
1568  report_fatal_error("Can't handle live physical register dependency!");
1569  }
1570  if (!NewDef) {
1571  // Issue copies, these can be expensive cross register class copies.
1573  InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1574  LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1575  << " to SU #" << Copies.front()->NodeNum << "\n");
1576  AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial));
1577  NewDef = Copies.back();
1578  }
1579 
1580  LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1581  << " to SU #" << TrySU->NodeNum << "\n");
1582  LiveRegDefs[Reg] = NewDef;
1583  AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial));
1584  TrySU->isAvailable = false;
1585  CurSU = NewDef;
1586  }
1587  assert(CurSU && "Unable to resolve live physical register dependencies!");
1588  return CurSU;
1589 }
1590 
1591 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1592 /// schedulers.
1593 void ScheduleDAGRRList::ListScheduleBottomUp() {
1594  // Release any predecessors of the special Exit node.
1595  ReleasePredecessors(&ExitSU);
1596 
1597  // Add root to Available queue.
1598  if (!SUnits.empty()) {
1599  SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1600  assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1601  RootSU->isAvailable = true;
1602  AvailableQueue->push(RootSU);
1603  }
1604 
1605  // While Available queue is not empty, grab the node with the highest
1606  // priority. If it is not ready put it back. Schedule the node.
1607  Sequence.reserve(SUnits.size());
1608  while (!AvailableQueue->empty() || !Interferences.empty()) {
1609  LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
1610  AvailableQueue->dump(this));
1611 
1612  // Pick the best node to schedule taking all constraints into
1613  // consideration.
1614  SUnit *SU = PickNodeToScheduleBottomUp();
1615 
1616  AdvancePastStalls(SU);
1617 
1618  ScheduleNodeBottomUp(SU);
1619 
1620  while (AvailableQueue->empty() && !PendingQueue.empty()) {
1621  // Advance the cycle to free resources. Skip ahead to the next ready SU.
1622  assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
1623  "MinAvailableCycle uninitialized");
1624  AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1625  }
1626  }
1627 
1628  // Reverse the order if it is bottom up.
1629  std::reverse(Sequence.begin(), Sequence.end());
1630 
1631 #ifndef NDEBUG
1632  VerifyScheduledSequence(/*isBottomUp=*/true);
1633 #endif
1634 }
1635 
1636 namespace {
1637 
1638 class RegReductionPQBase;
1639 
1640 struct queue_sort {
1641  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1642 };
1643 
1644 #ifndef NDEBUG
1645 template<class SF>
1646 struct reverse_sort : public queue_sort {
1647  SF &SortFunc;
1648 
1649  reverse_sort(SF &sf) : SortFunc(sf) {}
1650 
1651  bool operator()(SUnit* left, SUnit* right) const {
1652  // reverse left/right rather than simply !SortFunc(left, right)
1653  // to expose different paths in the comparison logic.
1654  return SortFunc(right, left);
1655  }
1656 };
1657 #endif // NDEBUG
1658 
1659 /// bu_ls_rr_sort - Priority function for bottom up register pressure
1660 // reduction scheduler.
1661 struct bu_ls_rr_sort : public queue_sort {
1662  enum {
1663  IsBottomUp = true,
1664  HasReadyFilter = false
1665  };
1666 
1667  RegReductionPQBase *SPQ;
1668 
1669  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1670 
1671  bool operator()(SUnit* left, SUnit* right) const;
1672 };
1673 
1674 // src_ls_rr_sort - Priority function for source order scheduler.
1675 struct src_ls_rr_sort : public queue_sort {
1676  enum {
1677  IsBottomUp = true,
1678  HasReadyFilter = false
1679  };
1680 
1681  RegReductionPQBase *SPQ;
1682 
1683  src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1684 
1685  bool operator()(SUnit* left, SUnit* right) const;
1686 };
1687 
1688 // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1689 struct hybrid_ls_rr_sort : public queue_sort {
1690  enum {
1691  IsBottomUp = true,
1692  HasReadyFilter = false
1693  };
1694 
1695  RegReductionPQBase *SPQ;
1696 
1697  hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1698 
1699  bool isReady(SUnit *SU, unsigned CurCycle) const;
1700 
1701  bool operator()(SUnit* left, SUnit* right) const;
1702 };
1703 
1704 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1705 // scheduler.
1706 struct ilp_ls_rr_sort : public queue_sort {
1707  enum {
1708  IsBottomUp = true,
1709  HasReadyFilter = false
1710  };
1711 
1712  RegReductionPQBase *SPQ;
1713 
1714  ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1715 
1716  bool isReady(SUnit *SU, unsigned CurCycle) const;
1717 
1718  bool operator()(SUnit* left, SUnit* right) const;
1719 };
1720 
1721 class RegReductionPQBase : public SchedulingPriorityQueue {
1722 protected:
1723  std::vector<SUnit *> Queue;
1724  unsigned CurQueueId = 0;
1725  bool TracksRegPressure;
1726  bool SrcOrder;
1727 
1728  // SUnits - The SUnits for the current graph.
1729  std::vector<SUnit> *SUnits;
1730 
1731  MachineFunction &MF;
1732  const TargetInstrInfo *TII;
1733  const TargetRegisterInfo *TRI;
1734  const TargetLowering *TLI;
1735  ScheduleDAGRRList *scheduleDAG = nullptr;
1736 
1737  // SethiUllmanNumbers - The SethiUllman number for each node.
1738  std::vector<unsigned> SethiUllmanNumbers;
1739 
1740  /// RegPressure - Tracking current reg pressure per register class.
1741  std::vector<unsigned> RegPressure;
1742 
1743  /// RegLimit - Tracking the number of allocatable registers per register
1744  /// class.
1745  std::vector<unsigned> RegLimit;
1746 
1747 public:
1748  RegReductionPQBase(MachineFunction &mf,
1749  bool hasReadyFilter,
1750  bool tracksrp,
1751  bool srcorder,
1752  const TargetInstrInfo *tii,
1753  const TargetRegisterInfo *tri,
1754  const TargetLowering *tli)
1755  : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
1756  SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
1757  if (TracksRegPressure) {
1758  unsigned NumRC = TRI->getNumRegClasses();
1759  RegLimit.resize(NumRC);
1760  RegPressure.resize(NumRC);
1761  std::fill(RegLimit.begin(), RegLimit.end(), 0);
1762  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1763  for (const TargetRegisterClass *RC : TRI->regclasses())
1764  RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
1765  }
1766  }
1767 
1768  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1769  scheduleDAG = scheduleDag;
1770  }
1771 
1772  ScheduleHazardRecognizer* getHazardRec() {
1773  return scheduleDAG->getHazardRec();
1774  }
1775 
1776  void initNodes(std::vector<SUnit> &sunits) override;
1777 
1778  void addNode(const SUnit *SU) override;
1779 
1780  void updateNode(const SUnit *SU) override;
1781 
1782  void releaseState() override {
1783  SUnits = nullptr;
1784  SethiUllmanNumbers.clear();
1785  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1786  }
1787 
1788  unsigned getNodePriority(const SUnit *SU) const;
1789 
1790  unsigned getNodeOrdering(const SUnit *SU) const {
1791  if (!SU->getNode()) return 0;
1792 
1793  return SU->getNode()->getIROrder();
1794  }
1795 
1796  bool empty() const override { return Queue.empty(); }
1797 
1798  void push(SUnit *U) override {
1799  assert(!U->NodeQueueId && "Node in the queue already");
1800  U->NodeQueueId = ++CurQueueId;
1801  Queue.push_back(U);
1802  }
1803 
1804  void remove(SUnit *SU) override {
1805  assert(!Queue.empty() && "Queue is empty!");
1806  assert(SU->NodeQueueId != 0 && "Not in queue!");
1807  std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
1808  if (I != std::prev(Queue.end()))
1809  std::swap(*I, Queue.back());
1810  Queue.pop_back();
1811  SU->NodeQueueId = 0;
1812  }
1813 
1814  bool tracksRegPressure() const override { return TracksRegPressure; }
1815 
1816  void dumpRegPressure() const;
1817 
1818  bool HighRegPressure(const SUnit *SU) const;
1819 
1820  bool MayReduceRegPressure(SUnit *SU) const;
1821 
1822  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1823 
1824  void scheduledNode(SUnit *SU) override;
1825 
1826  void unscheduledNode(SUnit *SU) override;
1827 
1828 protected:
1829  bool canClobber(const SUnit *SU, const SUnit *Op);
1830  void AddPseudoTwoAddrDeps();
1831  void PrescheduleNodesWithMultipleUses();
1832  void CalculateSethiUllmanNumbers();
1833 };
1834 
1835 template<class SF>
1836 static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
1837  std::vector<SUnit *>::iterator Best = Q.begin();
1838  for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
1839  if (Picker(*Best, *I))
1840  Best = I;
1841  SUnit *V = *Best;
1842  if (Best != std::prev(Q.end()))
1843  std::swap(*Best, Q.back());
1844  Q.pop_back();
1845  return V;
1846 }
1847 
1848 template<class SF>
1849 SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
1850 #ifndef NDEBUG
1851  if (DAG->StressSched) {
1852  reverse_sort<SF> RPicker(Picker);
1853  return popFromQueueImpl(Q, RPicker);
1854  }
1855 #endif
1856  (void)DAG;
1857  return popFromQueueImpl(Q, Picker);
1858 }
1859 
1860 //===----------------------------------------------------------------------===//
1861 // RegReductionPriorityQueue Definition
1862 //===----------------------------------------------------------------------===//
1863 //
1864 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1865 // to reduce register pressure.
1866 //
1867 template<class SF>
1868 class RegReductionPriorityQueue : public RegReductionPQBase {
1869  SF Picker;
1870 
1871 public:
1872  RegReductionPriorityQueue(MachineFunction &mf,
1873  bool tracksrp,
1874  bool srcorder,
1875  const TargetInstrInfo *tii,
1876  const TargetRegisterInfo *tri,
1877  const TargetLowering *tli)
1878  : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1879  tii, tri, tli),
1880  Picker(this) {}
1881 
1882  bool isBottomUp() const override { return SF::IsBottomUp; }
1883 
1884  bool isReady(SUnit *U) const override {
1885  return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1886  }
1887 
1888  SUnit *pop() override {
1889  if (Queue.empty()) return nullptr;
1890 
1891  SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1892  V->NodeQueueId = 0;
1893  return V;
1894  }
1895 
1896 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1897  LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
1898  // Emulate pop() without clobbering NodeQueueIds.
1899  std::vector<SUnit *> DumpQueue = Queue;
1900  SF DumpPicker = Picker;
1901  while (!DumpQueue.empty()) {
1902  SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1903  dbgs() << "Height " << SU->getHeight() << ": ";
1904  DAG->dumpNode(*SU);
1905  }
1906  }
1907 #endif
1908 };
1909 
1910 using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
1911 using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
1912 using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
1913 using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
1914 
1915 } // end anonymous namespace
1916 
1917 //===----------------------------------------------------------------------===//
1918 // Static Node Priority for Register Pressure Reduction
1919 //===----------------------------------------------------------------------===//
1920 
1921 // Check for special nodes that bypass scheduling heuristics.
1922 // Currently this pushes TokenFactor nodes down, but may be used for other
1923 // pseudo-ops as well.
1924 //
1925 // Return -1 to schedule right above left, 1 for left above right.
1926 // Return 0 if no bias exists.
1927 static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1928  bool LSchedLow = left->isScheduleLow;
1929  bool RSchedLow = right->isScheduleLow;
1930  if (LSchedLow != RSchedLow)
1931  return LSchedLow < RSchedLow ? 1 : -1;
1932  return 0;
1933 }
1934 
1935 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1936 /// Smaller number is the higher priority.
1937 static unsigned
1938 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1939  if (SUNumbers[SU->NodeNum] != 0)
1940  return SUNumbers[SU->NodeNum];
1941 
1942  // Use WorkList to avoid stack overflow on excessively large IRs.
1943  struct WorkState {
1944  WorkState(const SUnit *SU) : SU(SU) {}
1945  const SUnit *SU;
1946  unsigned PredsProcessed = 0;
1947  };
1948 
1949  SmallVector<WorkState, 16> WorkList;
1950  WorkList.push_back(SU);
1951  while (!WorkList.empty()) {
1952  auto &Temp = WorkList.back();
1953  auto *TempSU = Temp.SU;
1954  bool AllPredsKnown = true;
1955  // Try to find a non-evaluated pred and push it into the processing stack.
1956  for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
1957  auto &Pred = TempSU->Preds[P];
1958  if (Pred.isCtrl()) continue; // ignore chain preds
1959  SUnit *PredSU = Pred.getSUnit();
1960  if (SUNumbers[PredSU->NodeNum] == 0) {
1961 #ifndef NDEBUG
1962  // In debug mode, check that we don't have such element in the stack.
1963  for (auto It : WorkList)
1964  assert(It.SU != PredSU && "Trying to push an element twice?");
1965 #endif
1966  // Next time start processing this one starting from the next pred.
1967  Temp.PredsProcessed = P + 1;
1968  WorkList.push_back(PredSU);
1969  AllPredsKnown = false;
1970  break;
1971  }
1972  }
1973 
1974  if (!AllPredsKnown)
1975  continue;
1976 
1977  // Once all preds are known, we can calculate the answer for this one.
1978  unsigned SethiUllmanNumber = 0;
1979  unsigned Extra = 0;
1980  for (const SDep &Pred : TempSU->Preds) {
1981  if (Pred.isCtrl()) continue; // ignore chain preds
1982  SUnit *PredSU = Pred.getSUnit();
1983  unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
1984  assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
1985  if (PredSethiUllman > SethiUllmanNumber) {
1986  SethiUllmanNumber = PredSethiUllman;
1987  Extra = 0;
1988  } else if (PredSethiUllman == SethiUllmanNumber)
1989  ++Extra;
1990  }
1991 
1992  SethiUllmanNumber += Extra;
1993  if (SethiUllmanNumber == 0)
1994  SethiUllmanNumber = 1;
1995  SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
1996  WorkList.pop_back();
1997  }
1998 
1999  assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
2000  return SUNumbers[SU->NodeNum];
2001 }
2002 
2003 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
2004 /// scheduling units.
2005 void RegReductionPQBase::CalculateSethiUllmanNumbers() {
2006  SethiUllmanNumbers.assign(SUnits->size(), 0);
2007 
2008  for (const SUnit &SU : *SUnits)
2009  CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
2010 }
2011 
2012 void RegReductionPQBase::addNode(const SUnit *SU) {
2013  unsigned SUSize = SethiUllmanNumbers.size();
2014  if (SUnits->size() > SUSize)
2015  SethiUllmanNumbers.resize(SUSize*2, 0);
2016  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2017 }
2018 
2019 void RegReductionPQBase::updateNode(const SUnit *SU) {
2020  SethiUllmanNumbers[SU->NodeNum] = 0;
2021  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2022 }
2023 
2024 // Lower priority means schedule further down. For bottom-up scheduling, lower
2025 // priority SUs are scheduled before higher priority SUs.
2026 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
2027  assert(SU->NodeNum < SethiUllmanNumbers.size());
2028  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2029  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2030  // CopyToReg should be close to its uses to facilitate coalescing and
2031  // avoid spilling.
2032  return 0;
2033  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2034  Opc == TargetOpcode::SUBREG_TO_REG ||
2035  Opc == TargetOpcode::INSERT_SUBREG)
2036  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2037  // close to their uses to facilitate coalescing.
2038  return 0;
2039  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
2040  // If SU does not have a register use, i.e. it doesn't produce a value
2041  // that would be consumed (e.g. store), then it terminates a chain of
2042  // computation. Give it a large SethiUllman number so it will be
2043  // scheduled right before its predecessors that it doesn't lengthen
2044  // their live ranges.
2045  return 0xffff;
2046  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2047  // If SU does not have a register def, schedule it close to its uses
2048  // because it does not lengthen any live ranges.
2049  return 0;
2050 #if 1
2051  return SethiUllmanNumbers[SU->NodeNum];
2052 #else
2053  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
2054  if (SU->isCallOp) {
2055  // FIXME: This assumes all of the defs are used as call operands.
2056  int NP = (int)Priority - SU->getNode()->getNumValues();
2057  return (NP > 0) ? NP : 0;
2058  }
2059  return Priority;
2060 #endif
2061 }
2062 
2063 //===----------------------------------------------------------------------===//
2064 // Register Pressure Tracking
2065 //===----------------------------------------------------------------------===//
2066 
2067 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2068 LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
2069  for (const TargetRegisterClass *RC : TRI->regclasses()) {
2070  unsigned Id = RC->getID();
2071  unsigned RP = RegPressure[Id];
2072  if (!RP) continue;
2073  LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
2074  << RegLimit[Id] << '\n');
2075  }
2076 }
2077 #endif
2078 
2079 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
2080  if (!TLI)
2081  return false;
2082 
2083  for (const SDep &Pred : SU->Preds) {
2084  if (Pred.isCtrl())
2085  continue;
2086  SUnit *PredSU = Pred.getSUnit();
2087  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2088  // to cover the number of registers defined (they are all live).
2089  if (PredSU->NumRegDefsLeft == 0) {
2090  continue;
2091  }
2092  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2093  RegDefPos.IsValid(); RegDefPos.Advance()) {
2094  unsigned RCId, Cost;
2095  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2096 
2097  if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
2098  return true;
2099  }
2100  }
2101  return false;
2102 }
2103 
2104 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
2105  const SDNode *N = SU->getNode();
2106 
2107  if (!N->isMachineOpcode() || !SU->NumSuccs)
2108  return false;
2109 
2110  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2111  for (unsigned i = 0; i != NumDefs; ++i) {
2112  MVT VT = N->getSimpleValueType(i);
2113  if (!N->hasAnyUseOfValue(i))
2114  continue;
2115  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2116  if (RegPressure[RCId] >= RegLimit[RCId])
2117  return true;
2118  }
2119  return false;
2120 }
2121 
2122 // Compute the register pressure contribution by this instruction by count up
2123 // for uses that are not live and down for defs. Only count register classes
2124 // that are already under high pressure. As a side effect, compute the number of
2125 // uses of registers that are already live.
2126 //
2127 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2128 // so could probably be factored.
2129 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
2130  LiveUses = 0;
2131  int PDiff = 0;
2132  for (const SDep &Pred : SU->Preds) {
2133  if (Pred.isCtrl())
2134  continue;
2135  SUnit *PredSU = Pred.getSUnit();
2136  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2137  // to cover the number of registers defined (they are all live).
2138  if (PredSU->NumRegDefsLeft == 0) {
2139  if (PredSU->getNode()->isMachineOpcode())
2140  ++LiveUses;
2141  continue;
2142  }
2143  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2144  RegDefPos.IsValid(); RegDefPos.Advance()) {
2145  MVT VT = RegDefPos.GetValue();
2146  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2147  if (RegPressure[RCId] >= RegLimit[RCId])
2148  ++PDiff;
2149  }
2150  }
2151  const SDNode *N = SU->getNode();
2152 
2153  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2154  return PDiff;
2155 
2156  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2157  for (unsigned i = 0; i != NumDefs; ++i) {
2158  MVT VT = N->getSimpleValueType(i);
2159  if (!N->hasAnyUseOfValue(i))
2160  continue;
2161  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2162  if (RegPressure[RCId] >= RegLimit[RCId])
2163  --PDiff;
2164  }
2165  return PDiff;
2166 }
2167 
2168 void RegReductionPQBase::scheduledNode(SUnit *SU) {
2169  if (!TracksRegPressure)
2170  return;
2171 
2172  if (!SU->getNode())
2173  return;
2174 
2175  for (const SDep &Pred : SU->Preds) {
2176  if (Pred.isCtrl())
2177  continue;
2178  SUnit *PredSU = Pred.getSUnit();
2179  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2180  // to cover the number of registers defined (they are all live).
2181  if (PredSU->NumRegDefsLeft == 0) {
2182  continue;
2183  }
2184  // FIXME: The ScheduleDAG currently loses information about which of a
2185  // node's values is consumed by each dependence. Consequently, if the node
2186  // defines multiple register classes, we don't know which to pressurize
2187  // here. Instead the following loop consumes the register defs in an
2188  // arbitrary order. At least it handles the common case of clustered loads
2189  // to the same class. For precise liveness, each SDep needs to indicate the
2190  // result number. But that tightly couples the ScheduleDAG with the
2191  // SelectionDAG making updates tricky. A simpler hack would be to attach a
2192  // value type or register class to SDep.
2193  //
2194  // The most important aspect of register tracking is balancing the increase
2195  // here with the reduction further below. Note that this SU may use multiple
2196  // defs in PredSU. The can't be determined here, but we've already
2197  // compensated by reducing NumRegDefsLeft in PredSU during
2198  // ScheduleDAGSDNodes::AddSchedEdges.
2199  --PredSU->NumRegDefsLeft;
2200  unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2201  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2202  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2203  if (SkipRegDefs)
2204  continue;
2205 
2206  unsigned RCId, Cost;
2207  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2208  RegPressure[RCId] += Cost;
2209  break;
2210  }
2211  }
2212 
2213  // We should have this assert, but there may be dead SDNodes that never
2214  // materialize as SUnits, so they don't appear to generate liveness.
2215  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2216  int SkipRegDefs = (int)SU->NumRegDefsLeft;
2217  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2218  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2219  if (SkipRegDefs > 0)
2220  continue;
2221  unsigned RCId, Cost;
2222  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2223  if (RegPressure[RCId] < Cost) {
2224  // Register pressure tracking is imprecise. This can happen. But we try
2225  // hard not to let it happen because it likely results in poor scheduling.
2226  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
2227  << ") has too many regdefs\n");
2228  RegPressure[RCId] = 0;
2229  }
2230  else {
2231  RegPressure[RCId] -= Cost;
2232  }
2233  }
2234  LLVM_DEBUG(dumpRegPressure());
2235 }
2236 
2237 void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2238  if (!TracksRegPressure)
2239  return;
2240 
2241  const SDNode *N = SU->getNode();
2242  if (!N) return;
2243 
2244  if (!N->isMachineOpcode()) {
2245  if (N->getOpcode() != ISD::CopyToReg)
2246  return;
2247  } else {
2248  unsigned Opc = N->getMachineOpcode();
2249  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2250  Opc == TargetOpcode::INSERT_SUBREG ||
2251  Opc == TargetOpcode::SUBREG_TO_REG ||
2252  Opc == TargetOpcode::REG_SEQUENCE ||
2253  Opc == TargetOpcode::IMPLICIT_DEF)
2254  return;
2255  }
2256 
2257  for (const SDep &Pred : SU->Preds) {
2258  if (Pred.isCtrl())
2259  continue;
2260  SUnit *PredSU = Pred.getSUnit();
2261  // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2262  // counts data deps.
2263  if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2264  continue;
2265  const SDNode *PN = PredSU->getNode();
2266  if (!PN->isMachineOpcode()) {
2267  if (PN->getOpcode() == ISD::CopyFromReg) {
2268  MVT VT = PN->getSimpleValueType(0);
2269  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2270  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2271  }
2272  continue;
2273  }
2274  unsigned POpc = PN->getMachineOpcode();
2275  if (POpc == TargetOpcode::IMPLICIT_DEF)
2276  continue;
2277  if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2278  POpc == TargetOpcode::INSERT_SUBREG ||
2279  POpc == TargetOpcode::SUBREG_TO_REG) {
2280  MVT VT = PN->getSimpleValueType(0);
2281  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2282  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2283  continue;
2284  }
2285  unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2286  for (unsigned i = 0; i != NumDefs; ++i) {
2287  MVT VT = PN->getSimpleValueType(i);
2288  if (!PN->hasAnyUseOfValue(i))
2289  continue;
2290  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2291  if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2292  // Register pressure tracking is imprecise. This can happen.
2293  RegPressure[RCId] = 0;
2294  else
2295  RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2296  }
2297  }
2298 
2299  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2300  // may transfer data dependencies to CopyToReg.
2301  if (SU->NumSuccs && N->isMachineOpcode()) {
2302  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2303  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2304  MVT VT = N->getSimpleValueType(i);
2305  if (VT == MVT::Glue || VT == MVT::Other)
2306  continue;
2307  if (!N->hasAnyUseOfValue(i))
2308  continue;
2309  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2310  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2311  }
2312  }
2313 
2314  LLVM_DEBUG(dumpRegPressure());
2315 }
2316 
2317 //===----------------------------------------------------------------------===//
2318 // Dynamic Node Priority for Register Pressure Reduction
2319 //===----------------------------------------------------------------------===//
2320 
2321 /// closestSucc - Returns the scheduled cycle of the successor which is
2322 /// closest to the current cycle.
2323 static unsigned closestSucc(const SUnit *SU) {
2324  unsigned MaxHeight = 0;
2325  for (const SDep &Succ : SU->Succs) {
2326  if (Succ.isCtrl()) continue; // ignore chain succs
2327  unsigned Height = Succ.getSUnit()->getHeight();
2328  // If there are bunch of CopyToRegs stacked up, they should be considered
2329  // to be at the same position.
2330  if (Succ.getSUnit()->getNode() &&
2331  Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2332  Height = closestSucc(Succ.getSUnit())+1;
2333  if (Height > MaxHeight)
2334  MaxHeight = Height;
2335  }
2336  return MaxHeight;
2337 }
2338 
2339 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
2340 /// for scratch registers, i.e. number of data dependencies.
2341 static unsigned calcMaxScratches(const SUnit *SU) {
2342  unsigned Scratches = 0;
2343  for (const SDep &Pred : SU->Preds) {
2344  if (Pred.isCtrl()) continue; // ignore chain preds
2345  Scratches++;
2346  }
2347  return Scratches;
2348 }
2349 
2350 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2351 /// CopyFromReg from a virtual register.
2352 static bool hasOnlyLiveInOpers(const SUnit *SU) {
2353  bool RetVal = false;
2354  for (const SDep &Pred : SU->Preds) {
2355  if (Pred.isCtrl()) continue;
2356  const SUnit *PredSU = Pred.getSUnit();
2357  if (PredSU->getNode() &&
2358  PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2359  unsigned Reg =
2360  cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2362  RetVal = true;
2363  continue;
2364  }
2365  }
2366  return false;
2367  }
2368  return RetVal;
2369 }
2370 
2371 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2372 /// CopyToReg to a virtual register. This SU def is probably a liveout and
2373 /// it has no other use. It should be scheduled closer to the terminator.
2374 static bool hasOnlyLiveOutUses(const SUnit *SU) {
2375  bool RetVal = false;
2376  for (const SDep &Succ : SU->Succs) {
2377  if (Succ.isCtrl()) continue;
2378  const SUnit *SuccSU = Succ.getSUnit();
2379  if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2380  unsigned Reg =
2381  cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2383  RetVal = true;
2384  continue;
2385  }
2386  }
2387  return false;
2388  }
2389  return RetVal;
2390 }
2391 
2392 // Set isVRegCycle for a node with only live in opers and live out uses. Also
2393 // set isVRegCycle for its CopyFromReg operands.
2394 //
2395 // This is only relevant for single-block loops, in which case the VRegCycle
2396 // node is likely an induction variable in which the operand and target virtual
2397 // registers should be coalesced (e.g. pre/post increment values). Setting the
2398 // isVRegCycle flag helps the scheduler prioritize other uses of the same
2399 // CopyFromReg so that this node becomes the virtual register "kill". This
2400 // avoids interference between the values live in and out of the block and
2401 // eliminates a copy inside the loop.
2402 static void initVRegCycle(SUnit *SU) {
2404  return;
2405 
2406  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2407  return;
2408 
2409  LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2410 
2411  SU->isVRegCycle = true;
2412 
2413  for (const SDep &Pred : SU->Preds) {
2414  if (Pred.isCtrl()) continue;
2415  Pred.getSUnit()->isVRegCycle = true;
2416  }
2417 }
2418 
2419 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2420 // CopyFromReg operands. We should no longer penalize other uses of this VReg.
2421 static void resetVRegCycle(SUnit *SU) {
2422  if (!SU->isVRegCycle)
2423  return;
2424 
2425  for (const SDep &Pred : SU->Preds) {
2426  if (Pred.isCtrl()) continue; // ignore chain preds
2427  SUnit *PredSU = Pred.getSUnit();
2428  if (PredSU->isVRegCycle) {
2429  assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2430  "VRegCycle def must be CopyFromReg");
2431  Pred.getSUnit()->isVRegCycle = false;
2432  }
2433  }
2434 }
2435 
2436 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2437 // means a node that defines the VRegCycle has not been scheduled yet.
2438 static bool hasVRegCycleUse(const SUnit *SU) {
2439  // If this SU also defines the VReg, don't hoist it as a "use".
2440  if (SU->isVRegCycle)
2441  return false;
2442 
2443  for (const SDep &Pred : SU->Preds) {
2444  if (Pred.isCtrl()) continue; // ignore chain preds
2445  if (Pred.getSUnit()->isVRegCycle &&
2446  Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2447  LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2448  return true;
2449  }
2450  }
2451  return false;
2452 }
2453 
2454 // Check for either a dependence (latency) or resource (hazard) stall.
2455 //
2456 // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2457 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2458  if ((int)SPQ->getCurCycle() < Height) return true;
2459  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2461  return true;
2462  return false;
2463 }
2464 
2465 // Return -1 if left has higher priority, 1 if right has higher priority.
2466 // Return 0 if latency-based priority is equivalent.
2467 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2468  RegReductionPQBase *SPQ) {
2469  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2470  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2471  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2472  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2473  int LHeight = (int)left->getHeight() + LPenalty;
2474  int RHeight = (int)right->getHeight() + RPenalty;
2475 
2476  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2477  BUHasStall(left, LHeight, SPQ);
2478  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2479  BUHasStall(right, RHeight, SPQ);
2480 
2481  // If scheduling one of the node will cause a pipeline stall, delay it.
2482  // If scheduling either one of the node will cause a pipeline stall, sort
2483  // them according to their height.
2484  if (LStall) {
2485  if (!RStall)
2486  return 1;
2487  if (LHeight != RHeight)
2488  return LHeight > RHeight ? 1 : -1;
2489  } else if (RStall)
2490  return -1;
2491 
2492  // If either node is scheduling for latency, sort them by height/depth
2493  // and latency.
2494  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2495  right->SchedulingPref == Sched::ILP)) {
2496  // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2497  // is enabled, grouping instructions by cycle, then its height is already
2498  // covered so only its depth matters. We also reach this point if both stall
2499  // but have the same height.
2500  if (!SPQ->getHazardRec()->isEnabled()) {
2501  if (LHeight != RHeight)
2502  return LHeight > RHeight ? 1 : -1;
2503  }
2504  int LDepth = left->getDepth() - LPenalty;
2505  int RDepth = right->getDepth() - RPenalty;
2506  if (LDepth != RDepth) {
2507  LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2508  << ") depth " << LDepth << " vs SU (" << right->NodeNum
2509  << ") depth " << RDepth << "\n");
2510  return LDepth < RDepth ? 1 : -1;
2511  }
2512  if (left->Latency != right->Latency)
2513  return left->Latency > right->Latency ? 1 : -1;
2514  }
2515  return 0;
2516 }
2517 
2518 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2519  // Schedule physical register definitions close to their use. This is
2520  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2521  // long as shortening physreg live ranges is generally good, we can defer
2522  // creating a subtarget hook.
2523  if (!DisableSchedPhysRegJoin) {
2524  bool LHasPhysReg = left->hasPhysRegDefs;
2525  bool RHasPhysReg = right->hasPhysRegDefs;
2526  if (LHasPhysReg != RHasPhysReg) {
2527  #ifndef NDEBUG
2528  static const char *const PhysRegMsg[] = { " has no physreg",
2529  " defines a physreg" };
2530  #endif
2531  LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2532  << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
2533  << ") " << PhysRegMsg[RHasPhysReg] << "\n");
2534  return LHasPhysReg < RHasPhysReg;
2535  }
2536  }
2537 
2538  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2539  unsigned LPriority = SPQ->getNodePriority(left);
2540  unsigned RPriority = SPQ->getNodePriority(right);
2541 
2542  // Be really careful about hoisting call operands above previous calls.
2543  // Only allows it if it would reduce register pressure.
2544  if (left->isCall && right->isCallOp) {
2545  unsigned RNumVals = right->getNode()->getNumValues();
2546  RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2547  }
2548  if (right->isCall && left->isCallOp) {
2549  unsigned LNumVals = left->getNode()->getNumValues();
2550  LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2551  }
2552 
2553  if (LPriority != RPriority)
2554  return LPriority > RPriority;
2555 
2556  // One or both of the nodes are calls and their sethi-ullman numbers are the
2557  // same, then keep source order.
2558  if (left->isCall || right->isCall) {
2559  unsigned LOrder = SPQ->getNodeOrdering(left);
2560  unsigned ROrder = SPQ->getNodeOrdering(right);
2561 
2562  // Prefer an ordering where the lower the non-zero order number, the higher
2563  // the preference.
2564  if ((LOrder || ROrder) && LOrder != ROrder)
2565  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2566  }
2567 
2568  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2569  // e.g.
2570  // t1 = op t2, c1
2571  // t3 = op t4, c2
2572  //
2573  // and the following instructions are both ready.
2574  // t2 = op c3
2575  // t4 = op c4
2576  //
2577  // Then schedule t2 = op first.
2578  // i.e.
2579  // t4 = op c4
2580  // t2 = op c3
2581  // t1 = op t2, c1
2582  // t3 = op t4, c2
2583  //
2584  // This creates more short live intervals.
2585  unsigned LDist = closestSucc(left);
2586  unsigned RDist = closestSucc(right);
2587  if (LDist != RDist)
2588  return LDist < RDist;
2589 
2590  // How many registers becomes live when the node is scheduled.
2591  unsigned LScratch = calcMaxScratches(left);
2592  unsigned RScratch = calcMaxScratches(right);
2593  if (LScratch != RScratch)
2594  return LScratch > RScratch;
2595 
2596  // Comparing latency against a call makes little sense unless the node
2597  // is register pressure-neutral.
2598  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2599  return (left->NodeQueueId > right->NodeQueueId);
2600 
2601  // Do not compare latencies when one or both of the nodes are calls.
2602  if (!DisableSchedCycles &&
2603  !(left->isCall || right->isCall)) {
2604  int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2605  if (result != 0)
2606  return result > 0;
2607  }
2608  else {
2609  if (left->getHeight() != right->getHeight())
2610  return left->getHeight() > right->getHeight();
2611 
2612  if (left->getDepth() != right->getDepth())
2613  return left->getDepth() < right->getDepth();
2614  }
2615 
2616  assert(left->NodeQueueId && right->NodeQueueId &&
2617  "NodeQueueId cannot be zero");
2618  return (left->NodeQueueId > right->NodeQueueId);
2619 }
2620 
2621 // Bottom up
2622 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2623  if (int res = checkSpecialNodes(left, right))
2624  return res > 0;
2625 
2626  return BURRSort(left, right, SPQ);
2627 }
2628 
2629 // Source order, otherwise bottom up.
2630 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2631  if (int res = checkSpecialNodes(left, right))
2632  return res > 0;
2633 
2634  unsigned LOrder = SPQ->getNodeOrdering(left);
2635  unsigned ROrder = SPQ->getNodeOrdering(right);
2636 
2637  // Prefer an ordering where the lower the non-zero order number, the higher
2638  // the preference.
2639  if ((LOrder || ROrder) && LOrder != ROrder)
2640  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2641 
2642  return BURRSort(left, right, SPQ);
2643 }
2644 
2645 // If the time between now and when the instruction will be ready can cover
2646 // the spill code, then avoid adding it to the ready queue. This gives long
2647 // stalls highest priority and allows hoisting across calls. It should also
2648 // speed up processing the available queue.
2649 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2650  static const unsigned ReadyDelay = 3;
2651 
2652  if (SPQ->MayReduceRegPressure(SU)) return true;
2653 
2654  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2655 
2656  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2658  return false;
2659 
2660  return true;
2661 }
2662 
2663 // Return true if right should be scheduled with higher priority than left.
2664 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2665  if (int res = checkSpecialNodes(left, right))
2666  return res > 0;
2667 
2668  if (left->isCall || right->isCall)
2669  // No way to compute latency of calls.
2670  return BURRSort(left, right, SPQ);
2671 
2672  bool LHigh = SPQ->HighRegPressure(left);
2673  bool RHigh = SPQ->HighRegPressure(right);
2674  // Avoid causing spills. If register pressure is high, schedule for
2675  // register pressure reduction.
2676  if (LHigh && !RHigh) {
2677  LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2678  << right->NodeNum << ")\n");
2679  return true;
2680  }
2681  else if (!LHigh && RHigh) {
2682  LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2683  << left->NodeNum << ")\n");
2684  return false;
2685  }
2686  if (!LHigh && !RHigh) {
2687  int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2688  if (result != 0)
2689  return result > 0;
2690  }
2691  return BURRSort(left, right, SPQ);
2692 }
2693 
2694 // Schedule as many instructions in each cycle as possible. So don't make an
2695 // instruction available unless it is ready in the current cycle.
2696 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2697  if (SU->getHeight() > CurCycle) return false;
2698 
2699  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2701  return false;
2702 
2703  return true;
2704 }
2705 
2706 static bool canEnableCoalescing(SUnit *SU) {
2707  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2708  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2709  // CopyToReg should be close to its uses to facilitate coalescing and
2710  // avoid spilling.
2711  return true;
2712 
2713  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2714  Opc == TargetOpcode::SUBREG_TO_REG ||
2715  Opc == TargetOpcode::INSERT_SUBREG)
2716  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2717  // close to their uses to facilitate coalescing.
2718  return true;
2719 
2720  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2721  // If SU does not have a register def, schedule it close to its uses
2722  // because it does not lengthen any live ranges.
2723  return true;
2724 
2725  return false;
2726 }
2727 
2728 // list-ilp is currently an experimental scheduler that allows various
2729 // heuristics to be enabled prior to the normal register reduction logic.
2730 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2731  if (int res = checkSpecialNodes(left, right))
2732  return res > 0;
2733 
2734  if (left->isCall || right->isCall)
2735  // No way to compute latency of calls.
2736  return BURRSort(left, right, SPQ);
2737 
2738  unsigned LLiveUses = 0, RLiveUses = 0;
2739  int LPDiff = 0, RPDiff = 0;
2741  LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2742  RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2743  }
2744  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2745  LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
2746  << "): " << LPDiff << " != SU(" << right->NodeNum
2747  << "): " << RPDiff << "\n");
2748  return LPDiff > RPDiff;
2749  }
2750 
2751  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2752  bool LReduce = canEnableCoalescing(left);
2753  bool RReduce = canEnableCoalescing(right);
2754  if (LReduce && !RReduce) return false;
2755  if (RReduce && !LReduce) return true;
2756  }
2757 
2758  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2759  LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2760  << " != SU(" << right->NodeNum << "): " << RLiveUses
2761  << "\n");
2762  return LLiveUses < RLiveUses;
2763  }
2764 
2765  if (!DisableSchedStalls) {
2766  bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2767  bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2768  if (LStall != RStall)
2769  return left->getHeight() > right->getHeight();
2770  }
2771 
2772  if (!DisableSchedCriticalPath) {
2773  int spread = (int)left->getDepth() - (int)right->getDepth();
2774  if (std::abs(spread) > MaxReorderWindow) {
2775  LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2776  << left->getDepth() << " != SU(" << right->NodeNum
2777  << "): " << right->getDepth() << "\n");
2778  return left->getDepth() < right->getDepth();
2779  }
2780  }
2781 
2782  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2783  int spread = (int)left->getHeight() - (int)right->getHeight();
2784  if (std::abs(spread) > MaxReorderWindow)
2785  return left->getHeight() > right->getHeight();
2786  }
2787 
2788  return BURRSort(left, right, SPQ);
2789 }
2790 
2791 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2792  SUnits = &sunits;
2793  // Add pseudo dependency edges for two-address nodes.
2794  if (!Disable2AddrHack)
2795  AddPseudoTwoAddrDeps();
2796  // Reroute edges to nodes with multiple uses.
2797  if (!TracksRegPressure && !SrcOrder)
2798  PrescheduleNodesWithMultipleUses();
2799  // Calculate node priorities.
2800  CalculateSethiUllmanNumbers();
2801 
2802  // For single block loops, mark nodes that look like canonical IV increments.
2803  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2804  for (SUnit &SU : sunits)
2805  initVRegCycle(&SU);
2806 }
2807 
2808 //===----------------------------------------------------------------------===//
2809 // Preschedule for Register Pressure
2810 //===----------------------------------------------------------------------===//
2811 
2812 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2813  if (SU->isTwoAddress) {
2814  unsigned Opc = SU->getNode()->getMachineOpcode();
2815  const MCInstrDesc &MCID = TII->get(Opc);
2816  unsigned NumRes = MCID.getNumDefs();
2817  unsigned NumOps = MCID.getNumOperands() - NumRes;
2818  for (unsigned i = 0; i != NumOps; ++i) {
2819  if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2820  SDNode *DU = SU->getNode()->getOperand(i).getNode();
2821  if (DU->getNodeId() != -1 &&
2822  Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2823  return true;
2824  }
2825  }
2826  }
2827  return false;
2828 }
2829 
2830 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2831 /// successor's explicit physregs whose definition can reach DepSU.
2832 /// i.e. DepSU should not be scheduled above SU.
2833 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2834  ScheduleDAGRRList *scheduleDAG,
2835  const TargetInstrInfo *TII,
2836  const TargetRegisterInfo *TRI) {
2837  const MCPhysReg *ImpDefs
2838  = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2839  const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2840  if(!ImpDefs && !RegMask)
2841  return false;
2842 
2843  for (const SDep &Succ : SU->Succs) {
2844  SUnit *SuccSU = Succ.getSUnit();
2845  for (const SDep &SuccPred : SuccSU->Preds) {
2846  if (!SuccPred.isAssignedRegDep())
2847  continue;
2848 
2849  if (RegMask &&
2850  MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2851  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2852  return true;
2853 
2854  if (ImpDefs)
2855  for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
2856  // Return true if SU clobbers this physical register use and the
2857  // definition of the register reaches from DepSU. IsReachable queries
2858  // a topological forward sort of the DAG (following the successors).
2859  if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
2860  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2861  return true;
2862  }
2863  }
2864  return false;
2865 }
2866 
2867 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2868 /// physical register defs.
2869 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2870  const TargetInstrInfo *TII,
2871  const TargetRegisterInfo *TRI) {
2872  SDNode *N = SuccSU->getNode();
2873  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2874  const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2875  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2876  for (const SDNode *SUNode = SU->getNode(); SUNode;
2877  SUNode = SUNode->getGluedNode()) {
2878  if (!SUNode->isMachineOpcode())
2879  continue;
2880  const MCPhysReg *SUImpDefs =
2881  TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2882  const uint32_t *SURegMask = getNodeRegMask(SUNode);
2883  if (!SUImpDefs && !SURegMask)
2884  continue;
2885  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2886  MVT VT = N->getSimpleValueType(i);
2887  if (VT == MVT::Glue || VT == MVT::Other)
2888  continue;
2889  if (!N->hasAnyUseOfValue(i))
2890  continue;
2891  unsigned Reg = ImpDefs[i - NumDefs];
2892  if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2893  return true;
2894  if (!SUImpDefs)
2895  continue;
2896  for (;*SUImpDefs; ++SUImpDefs) {
2897  unsigned SUReg = *SUImpDefs;
2898  if (TRI->regsOverlap(Reg, SUReg))
2899  return true;
2900  }
2901  }
2902  }
2903  return false;
2904 }
2905 
2906 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2907 /// are not handled well by the general register pressure reduction
2908 /// heuristics. When presented with code like this:
2909 ///
2910 /// N
2911 /// / |
2912 /// / |
2913 /// U store
2914 /// |
2915 /// ...
2916 ///
2917 /// the heuristics tend to push the store up, but since the
2918 /// operand of the store has another use (U), this would increase
2919 /// the length of that other use (the U->N edge).
2920 ///
2921 /// This function transforms code like the above to route U's
2922 /// dependence through the store when possible, like this:
2923 ///
2924 /// N
2925 /// ||
2926 /// ||
2927 /// store
2928 /// |
2929 /// U
2930 /// |
2931 /// ...
2932 ///
2933 /// This results in the store being scheduled immediately
2934 /// after N, which shortens the U->N live range, reducing
2935 /// register pressure.
2936 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2937  // Visit all the nodes in topological order, working top-down.
2938  for (SUnit &SU : *SUnits) {
2939  // For now, only look at nodes with no data successors, such as stores.
2940  // These are especially important, due to the heuristics in
2941  // getNodePriority for nodes with no data successors.
2942  if (SU.NumSuccs != 0)
2943  continue;
2944  // For now, only look at nodes with exactly one data predecessor.
2945  if (SU.NumPreds != 1)
2946  continue;
2947  // Avoid prescheduling copies to virtual registers, which don't behave
2948  // like other nodes from the perspective of scheduling heuristics.
2949  if (SDNode *N = SU.getNode())
2950  if (N->getOpcode() == ISD::CopyToReg &&
2952  (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2953  continue;
2954 
2955  SDNode *PredFrameSetup = nullptr;
2956  for (const SDep &Pred : SU.Preds)
2957  if (Pred.isCtrl() && Pred.getSUnit()) {
2958  // Find the predecessor which is not data dependence.
2959  SDNode *PredND = Pred.getSUnit()->getNode();
2960 
2961  // If PredND is FrameSetup, we should not pre-scheduled the node,
2962  // or else, when bottom up scheduling, ADJCALLSTACKDOWN and
2963  // ADJCALLSTACKUP may hold CallResource too long and make other
2964  // calls can't be scheduled. If there's no other available node
2965  // to schedule, the schedular will try to rename the register by
2966  // creating copy to avoid the conflict which will fail because
2967  // CallResource is not a real physical register.
2968  if (PredND && PredND->isMachineOpcode() &&
2969  (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
2970  PredFrameSetup = PredND;
2971  break;
2972  }
2973  }
2974  // Skip the node has FrameSetup parent.
2975  if (PredFrameSetup != nullptr)
2976  continue;
2977 
2978  // Locate the single data predecessor.
2979  SUnit *PredSU = nullptr;
2980  for (const SDep &Pred : SU.Preds)
2981  if (!Pred.isCtrl()) {
2982  PredSU = Pred.getSUnit();
2983  break;
2984  }
2985  assert(PredSU);
2986 
2987  // Don't rewrite edges that carry physregs, because that requires additional
2988  // support infrastructure.
2989  if (PredSU->hasPhysRegDefs)
2990  continue;
2991  // Short-circuit the case where SU is PredSU's only data successor.
2992  if (PredSU->NumSuccs == 1)
2993  continue;
2994  // Avoid prescheduling to copies from virtual registers, which don't behave
2995  // like other nodes from the perspective of scheduling heuristics.
2996  if (SDNode *N = SU.getNode())
2997  if (N->getOpcode() == ISD::CopyFromReg &&
2999  (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
3000  continue;
3001 
3002  // Perform checks on the successors of PredSU.
3003  for (const SDep &PredSucc : PredSU->Succs) {
3004  SUnit *PredSuccSU = PredSucc.getSUnit();
3005  if (PredSuccSU == &SU) continue;
3006  // If PredSU has another successor with no data successors, for
3007  // now don't attempt to choose either over the other.
3008  if (PredSuccSU->NumSuccs == 0)
3009  goto outer_loop_continue;
3010  // Don't break physical register dependencies.
3011  if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
3012  if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
3013  goto outer_loop_continue;
3014  // Don't introduce graph cycles.
3015  if (scheduleDAG->IsReachable(&SU, PredSuccSU))
3016  goto outer_loop_continue;
3017  }
3018 
3019  // Ok, the transformation is safe and the heuristics suggest it is
3020  // profitable. Update the graph.
3021  LLVM_DEBUG(
3022  dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
3023  << PredSU->NodeNum
3024  << " to guide scheduling in the presence of multiple uses\n");
3025  for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
3026  SDep Edge = PredSU->Succs[i];
3027  assert(!Edge.isAssignedRegDep());
3028  SUnit *SuccSU = Edge.getSUnit();
3029  if (SuccSU != &SU) {
3030  Edge.setSUnit(PredSU);
3031  scheduleDAG->RemovePred(SuccSU, Edge);
3032  scheduleDAG->AddPredQueued(&SU, Edge);
3033  Edge.setSUnit(&SU);
3034  scheduleDAG->AddPredQueued(SuccSU, Edge);
3035  --i;
3036  }
3037  }
3038  outer_loop_continue:;
3039  }
3040 }
3041 
3042 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
3043 /// it as a def&use operand. Add a pseudo control edge from it to the other
3044 /// node (if it won't create a cycle) so the two-address one will be scheduled
3045 /// first (lower in the schedule). If both nodes are two-address, favor the
3046 /// one that has a CopyToReg use (more likely to be a loop induction update).
3047 /// If both are two-address, but one is commutable while the other is not
3048 /// commutable, favor the one that's not commutable.
3049 void RegReductionPQBase::AddPseudoTwoAddrDeps() {
3050  for (SUnit &SU : *SUnits) {
3051  if (!SU.isTwoAddress)
3052  continue;
3053 
3054  SDNode *Node = SU.getNode();
3055  if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
3056  continue;
3057 
3058  bool isLiveOut = hasOnlyLiveOutUses(&SU);
3059  unsigned Opc = Node->getMachineOpcode();
3060  const MCInstrDesc &MCID = TII->get(Opc);
3061  unsigned NumRes = MCID.getNumDefs();
3062  unsigned NumOps = MCID.getNumOperands() - NumRes;
3063  for (unsigned j = 0; j != NumOps; ++j) {
3064  if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
3065  continue;
3066  SDNode *DU = SU.getNode()->getOperand(j).getNode();
3067  if (DU->getNodeId() == -1)
3068  continue;
3069  const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
3070  if (!DUSU)
3071  continue;
3072  for (const SDep &Succ : DUSU->Succs) {
3073  if (Succ.isCtrl())
3074  continue;
3075  SUnit *SuccSU = Succ.getSUnit();
3076  if (SuccSU == &SU)
3077  continue;
3078  // Be conservative. Ignore if nodes aren't at roughly the same
3079  // depth and height.
3080  if (SuccSU->getHeight() < SU.getHeight() &&
3081  (SU.getHeight() - SuccSU->getHeight()) > 1)
3082  continue;
3083  // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
3084  // constrains whatever is using the copy, instead of the copy
3085  // itself. In the case that the copy is coalesced, this
3086  // preserves the intent of the pseudo two-address heurietics.
3087  while (SuccSU->Succs.size() == 1 &&
3088  SuccSU->getNode()->isMachineOpcode() &&
3089  SuccSU->getNode()->getMachineOpcode() ==
3090  TargetOpcode::COPY_TO_REGCLASS)
3091  SuccSU = SuccSU->Succs.front().getSUnit();
3092  // Don't constrain non-instruction nodes.
3093  if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
3094  continue;
3095  // Don't constrain nodes with physical register defs if the
3096  // predecessor can clobber them.
3097  if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
3098  if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
3099  continue;
3100  }
3101  // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3102  // these may be coalesced away. We want them close to their uses.
3103  unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3104  if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3105  SuccOpc == TargetOpcode::INSERT_SUBREG ||
3106  SuccOpc == TargetOpcode::SUBREG_TO_REG)
3107  continue;
3108  if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
3109  (!canClobber(SuccSU, DUSU) ||
3110  (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
3111  (!SU.isCommutable && SuccSU->isCommutable)) &&
3112  !scheduleDAG->IsReachable(SuccSU, &SU)) {
3113  LLVM_DEBUG(dbgs()
3114  << " Adding a pseudo-two-addr edge from SU #"
3115  << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3116  scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial));
3117  }
3118  }
3119  }
3120  }
3121 }
3122 
3123 //===----------------------------------------------------------------------===//
3124 // Public Constructor Functions
3125 //===----------------------------------------------------------------------===//
3126 
3129  CodeGenOpt::Level OptLevel) {
3130  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3131  const TargetInstrInfo *TII = STI.getInstrInfo();
3132  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3133 
3134  BURegReductionPriorityQueue *PQ =
3135  new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
3136  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3137  PQ->setScheduleDAG(SD);
3138  return SD;
3139 }
3140 
3143  CodeGenOpt::Level OptLevel) {
3144  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3145  const TargetInstrInfo *TII = STI.getInstrInfo();
3146  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3147 
3148  SrcRegReductionPriorityQueue *PQ =
3149  new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3150  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3151  PQ->setScheduleDAG(SD);
3152  return SD;
3153 }
3154 
3157  CodeGenOpt::Level OptLevel) {
3158  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3159  const TargetInstrInfo *TII = STI.getInstrInfo();
3160  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3161  const TargetLowering *TLI = IS->TLI;
3162 
3163  HybridBURRPriorityQueue *PQ =
3164  new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3165 
3166  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3167  PQ->setScheduleDAG(SD);
3168  return SD;
3169 }
3170 
3173  CodeGenOpt::Level OptLevel) {
3174  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3175  const TargetInstrInfo *TII = STI.getInstrInfo();
3176  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3177  const TargetLowering *TLI = IS->TLI;
3178 
3179  ILPBURRPriorityQueue *PQ =
3180  new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3181  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3182  PQ->setScheduleDAG(SD);
3183  return SD;
3184 }
virtual void initNodes(std::vector< SUnit > &SUnits)=0
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned NumPreds
of SDep::Data preds.
Definition: ScheduleDAG.h:266
virtual void updateNode(const SUnit *SU)=0
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:473
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ)
static bool canEnableCoalescing(SUnit *SU)
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the &#39;representative&#39; register class for the specified value type.
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z, ..."), which produces the same result if Y and Z are exchanged.
Definition: MCInstrDesc.h:442
SDNode * getNode() const
Returns the representative SDNode for this SUnit.
Definition: ScheduleDAG.h:355
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
unsigned getIROrder() const
Return the node ordering.
static cl::opt< bool > DisableSchedRegPressure("disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp"))
virtual bool tracksRegPressure() const
Definition: ScheduleDAG.h:518
static cl::opt< unsigned > AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists."))
bool isTwoAddress
Is a two-address instruction.
Definition: ScheduleDAG.h:277
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:164
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF)
GetCostForDef - Looks up the register class and cost for a given definition.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
virtual void dumpNode(const SUnit &SU) const =0
virtual void push(SUnit *U)=0
static bool isClobberKind(unsigned Flag)
Definition: InlineAsm.h:280
void setSUnit(SUnit *SU)
Definition: ScheduleDAG.h:483
static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberReachingPhysRegUse - True if SU would clobber one of it&#39;s successor&#39;s explicit physregs who...
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
static cl::opt< bool > DisableSchedVRegCycle("disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks"))
unsigned getCallFrameDestroyOpcode() const
void setNodeId(int Id)
Set unique node id.
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
SDNode * getNode() const
get the SDNode which holds the desired result
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
Definition: ScheduleDAG.h:211
static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler)
static bool hasVRegCycleUse(const SUnit *SU)
const TargetRegisterClass * CopyDstRC
Is a special copy node if != nullptr.
Definition: ScheduleDAG.h:302
virtual void dump(ScheduleDAG *) const
Definition: ScheduleDAG.h:537
virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const
Return the register pressure "high water mark" for the specific register class.
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:44
MachineFunction * MF
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
ScheduleDAGSDNodes * createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createHybridListDAGScheduler - This creates a bottom up register pressure aware list scheduler that m...
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII)
FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate the corresponding (lowered) C...
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
This interface is used to plug different priorities computation algorithms into the list scheduler...
Definition: ScheduleDAG.h:496
static int checkSpecialNodes(const SUnit *left, const SUnit *right)
unsigned NumSuccs
of SDep::Data sucss.
Definition: ScheduleDAG.h:267
virtual void unscheduledNode(SUnit *)
Definition: ScheduleDAG.h:544
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
unsigned NumSuccsLeft
of succs not scheduled.
Definition: ScheduleDAG.h:269
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:211
const HexagonInstrInfo * TII
virtual void releaseState()=0
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", createSourceListDAGScheduler)
const TargetRegisterClass * getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const
Given a machine instruction descriptor, returns the register class constraint for OpNum...
Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
const TargetLowering * TLI
virtual uint8_t getRepRegClassCostFor(MVT VT) const
Return the cost of the &#39;representative&#39; register class for the specified value type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:169
iterator_range< regclass_iterator > regclasses() const
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
void setCurCycle(unsigned Cycle)
Definition: ScheduleDAG.h:546
static bool isRegDefEarlyClobberKind(unsigned Flag)
Definition: InlineAsm.h:277
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs, const TargetRegisterInfo *TRI)
CheckForLiveRegDef - Return true and update live register vector if the specified register def of the...
static const uint32_t * getNodeRegMask(const SDNode *N)
getNodeRegMask - Returns the register mask attached to an SDNode, if any.
unsigned getID() const
Return the register class ID number.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:695
unsigned getNumRegClasses() const
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:273
SUnit * OrigNode
If not this, the node from which this node was cloned.
Definition: ScheduleDAG.h:250
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
ScheduleDAGSDNodes * createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up register usage reduction list scheduler...
bool isPending
True once pending.
Definition: ScheduleDAG.h:282
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
static bool isRegDefKind(unsigned Flag)
Definition: InlineAsm.h:274
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
TargetInstrInfo - Interface to description of machine instruction set.
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, ArrayRef< SUnit *> LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs)
CheckForLiveRegDefMasked - Check for any live physregs that are clobbered by RegMask, and add them to LRegs.
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:877
Scheduling dependency.
Definition: ScheduleDAG.h:49
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
void setHeightToAtLeast(unsigned NewHeight)
If NewHeight is greater than this node&#39;s height value, set it to be the new height value...
bool isAvailable()
Definition: Compression.cpp:47
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
const MCPhysReg * getImplicitDefs() const
Return a list of registers that are potentially written by any instance of this machine instruction...
Definition: MCInstrDesc.h:551
bool WillCreateCycle(SUnit *TargetSU, SUnit *SU)
Returns true if addPred(TargetSU, SU) creates a cycle.
ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:200
static cl::opt< bool > DisableSchedStalls("disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp"))
Machine Value Type.
HazardRecognizer - This determines whether or not an instruction can be issued this cycle...
bool isOptionalDef() const
Set if this operand is a optional def.
Definition: MCInstrDesc.h:95
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:239
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
virtual void addNode(const SUnit *SU)=0
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
virtual bool empty() const =0
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
iterator_range< value_op_iterator > op_values() const
static RegisterScheduler ILPListDAGScheduler("list-ilp", "Bottom-up register pressure aware list scheduling " "which tries to balance ILP and register pressure", createILPListDAGScheduler)
const SDValue & getOperand(unsigned Num) const
ScheduleDAGSDNodes * createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createILPListDAGScheduler - This creates a bottom up register pressure aware list scheduler that trie...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
static bool isOperandOf(const SUnit *SU, SDNode *N)
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers...
static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII)
IsChainDependent - Test if Outer is reachable from Inner through chain dependencies.
static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ)
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:174
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag...
Definition: InlineAsm.h:335
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
bool isVRegCycle
May use and def the same vreg.
Definition: ScheduleDAG.h:274
unsigned getCallFrameSetupOpcode() const
These methods return the opcode of the frame setup/destroy instructions if they exist (-1 otherwise)...
MCRegAliasIterator enumerates all registers aliasing Reg.
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberPhysRegDefs - True if SU would clobber one of SuccSU&#39;s physical register defs...
static void resetVRegCycle(SUnit *SU)
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
static cl::opt< bool > DisableSchedLiveUses("disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp"))
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
static bool hasOnlyLiveInOpers(const SUnit *SU)
hasOnlyLiveInOpers - Return true if SU has only value predecessors that are CopyFromReg from a virtua...
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle...
Sched::Preference SchedulingPref
Scheduling preference.
Definition: ScheduleDAG.h:290
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
RegDefIter - In place iteration over the values defined by an SUnit.
size_t size() const
Definition: SmallVector.h:52
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1213
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:703
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:209
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ)
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specific constraint if it is set.
Definition: MCInstrDesc.h:188
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:49
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:278
ScheduleDAGSDNodes * createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up list scheduler that schedules nodes in source c...
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
virtual void remove(SUnit *SU)=0
bool regsOverlap(unsigned regA, unsigned regB) const
Returns true if the two registers are equal or alias each other.
static void initVRegCycle(SUnit *SU)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
void AddPredQueued(SUnit *Y, SUnit *X)
Queues an update to the topological ordering to accommodate an edge to be added from SUnit X to SUnit...
SI Lower i1 Copies
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:226
Represents one node in the SelectionDAG.
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const TargetRegisterClass * CopySrcRC
Definition: ScheduleDAG.h:304
void MarkDirty()
Mark the ordering as temporarily broken, after a new node has been added.
Definition: ScheduleDAG.h:759
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void scheduledNode(SUnit *)
As each node is scheduled, this method is invoked.
Definition: ScheduleDAG.h:542
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
bool isCallOp
Is a function call operand.
Definition: ScheduleDAG.h:276
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
TargetSubtargetInfo - Generic base class for all target subtargets.
int getNodeId() const
Return the unique node id.
bool isAvailable
True once available.
Definition: ScheduleDAG.h:283
unsigned NodeQueueId
Queue id of node.
Definition: ScheduleDAG.h:265
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
static RegisterScheduler hybridListDAGScheduler("list-hybrid", "Bottom-up register pressure aware list scheduling " "which tries to balance latency and register pressure", createHybridListDAGScheduler)
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool hasOnlyLiveOutUses(const SUnit *SU)
hasOnlyLiveOutUses - Return true if SU has only value successors that are CopyToReg to a virtual regi...
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
static cl::opt< bool > DisableSchedPhysRegJoin("disable-sched-physreg-join", cl::Hidden, cl::init(false), cl::desc("Disable physreg def-use affinity"))
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1212
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition: PtrState.h:40
unsigned short NumRegDefsLeft
of reg defs with no scheduled use.
Definition: ScheduleDAG.h:272
static cl::opt< bool > Disable2AddrHack("disable-2addr-hack", cl::Hidden, cl::init(true), cl::desc("Disable scheduler's two-address hack"))
static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII)
getPhysicalRegisterVT - Returns the ValueType of the physical register definition of the specified no...
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:174
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
INLINEASM_BR - Terminator version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:698
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
virtual bool isReady(SUnit *) const
Definition: ScheduleDAG.h:520
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:175
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y...
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
bool isScheduleLow
True if preferable to schedule low.
Definition: ScheduleDAG.h:286
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
#define LLVM_DEBUG(X)
Definition: Debug.h:122
for(unsigned i=Desc.getNumOperands(), e=OldMI.getNumOperands();i !=e;++i)
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:689
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool hasPhysRegClobbers
Has any physreg defs, used or not.
Definition: ScheduleDAG.h:281
virtual SUnit * pop()=0
bool isSucc(const SUnit *N) const
Tests if node N is a successor of this node.
Definition: ScheduleDAG.h:439
virtual HazardType getHazardType(SUnit *m, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
This file describes how to lower LLVM code to machine code.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1251
virtual bool atIssueLimit() const
atIssueLimit - Return true if no more instructions may be issued in this cycle.