LLVM  16.0.0git
ScheduleDAGRRList.cpp
Go to the documentation of this file.
1 //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements bottom-up and top-down register pressure reduction list
10 // schedulers, using standard algorithms. The basic approach uses a priority
11 // queue of available nodes to schedule. One at a time, nodes are taken from
12 // the priority queue (thus in priority order), checked for legality to
13 // schedule, and emitted if legal.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "ScheduleDAGSDNodes.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/Statistic.h"
27 #include "llvm/CodeGen/Register.h"
38 #include "llvm/Config/llvm-config.h"
39 #include "llvm/IR/InlineAsm.h"
40 #include "llvm/MC/MCInstrDesc.h"
41 #include "llvm/MC/MCRegisterInfo.h"
42 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/CodeGen.h"
45 #include "llvm/Support/Compiler.h"
46 #include "llvm/Support/Debug.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstdint>
53 #include <cstdlib>
54 #include <iterator>
55 #include <limits>
56 #include <memory>
57 #include <utility>
58 #include <vector>
59 
60 using namespace llvm;
61 
62 #define DEBUG_TYPE "pre-RA-sched"
63 
64 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
65 STATISTIC(NumUnfolds, "Number of nodes unfolded");
66 STATISTIC(NumDups, "Number of duplicated nodes");
67 STATISTIC(NumPRCopies, "Number of physical register copies");
68 
69 static RegisterScheduler
70  burrListDAGScheduler("list-burr",
71  "Bottom-up register reduction list scheduling",
73 
74 static RegisterScheduler
75  sourceListDAGScheduler("source",
76  "Similar to list-burr but schedules in source "
77  "order when possible",
79 
80 static RegisterScheduler
81  hybridListDAGScheduler("list-hybrid",
82  "Bottom-up register pressure aware list scheduling "
83  "which tries to balance latency and register pressure",
85 
86 static RegisterScheduler
87  ILPListDAGScheduler("list-ilp",
88  "Bottom-up register pressure aware list scheduling "
89  "which tries to balance ILP and register pressure",
91 
93  "disable-sched-cycles", cl::Hidden, cl::init(false),
94  cl::desc("Disable cycle-level precision during preRA scheduling"));
95 
96 // Temporary sched=list-ilp flags until the heuristics are robust.
97 // Some options are also available under sched=list-hybrid.
99  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
100  cl::desc("Disable regpressure priority in sched=list-ilp"));
102  "disable-sched-live-uses", cl::Hidden, cl::init(true),
103  cl::desc("Disable live use priority in sched=list-ilp"));
105  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
106  cl::desc("Disable virtual register cycle interference checks"));
108  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
109  cl::desc("Disable physreg def-use affinity"));
111  "disable-sched-stalls", cl::Hidden, cl::init(true),
112  cl::desc("Disable no-stall priority in sched=list-ilp"));
114  "disable-sched-critical-path", cl::Hidden, cl::init(false),
115  cl::desc("Disable critical path priority in sched=list-ilp"));
117  "disable-sched-height", cl::Hidden, cl::init(false),
118  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
120  "disable-2addr-hack", cl::Hidden, cl::init(true),
121  cl::desc("Disable scheduler's two-address hack"));
122 
124  "max-sched-reorder", cl::Hidden, cl::init(6),
125  cl::desc("Number of instructions to allow ahead of the critical path "
126  "in sched=list-ilp"));
127 
129  "sched-avg-ipc", cl::Hidden, cl::init(1),
130  cl::desc("Average inst/cycle whan no target itinerary exists."));
131 
132 namespace {
133 
134 //===----------------------------------------------------------------------===//
135 /// ScheduleDAGRRList - The actual register reduction list scheduler
136 /// implementation. This supports both top-down and bottom-up scheduling.
137 ///
138 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
139 private:
140  /// NeedLatency - True if the scheduler will make use of latency information.
141  bool NeedLatency;
142 
143  /// AvailableQueue - The priority queue to use for the available SUnits.
144  SchedulingPriorityQueue *AvailableQueue;
145 
146  /// PendingQueue - This contains all of the instructions whose operands have
147  /// been issued, but their results are not ready yet (due to the latency of
148  /// the operation). Once the operands becomes available, the instruction is
149  /// added to the AvailableQueue.
150  std::vector<SUnit *> PendingQueue;
151 
152  /// HazardRec - The hazard recognizer to use.
153  ScheduleHazardRecognizer *HazardRec;
154 
155  /// CurCycle - The current scheduler state corresponds to this cycle.
156  unsigned CurCycle = 0;
157 
158  /// MinAvailableCycle - Cycle of the soonest available instruction.
159  unsigned MinAvailableCycle;
160 
161  /// IssueCount - Count instructions issued in this cycle
162  /// Currently valid only for bottom-up scheduling.
163  unsigned IssueCount;
164 
165  /// LiveRegDefs - A set of physical registers and their definition
166  /// that are "live". These nodes must be scheduled before any other nodes that
167  /// modifies the registers can be scheduled.
168  unsigned NumLiveRegs;
169  std::unique_ptr<SUnit*[]> LiveRegDefs;
170  std::unique_ptr<SUnit*[]> LiveRegGens;
171 
172  // Collect interferences between physical register use/defs.
173  // Each interference is an SUnit and set of physical registers.
174  SmallVector<SUnit*, 4> Interferences;
175 
177 
178  LRegsMapT LRegsMap;
179 
180  /// Topo - A topological ordering for SUnits which permits fast IsReachable
181  /// and similar queries.
183 
184  // Hack to keep track of the inverse of FindCallSeqStart without more crazy
185  // DAG crawling.
186  DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
187 
188 public:
189  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
190  SchedulingPriorityQueue *availqueue,
191  CodeGenOpt::Level OptLevel)
192  : ScheduleDAGSDNodes(mf),
193  NeedLatency(needlatency), AvailableQueue(availqueue),
194  Topo(SUnits, nullptr) {
195  const TargetSubtargetInfo &STI = mf.getSubtarget();
196  if (DisableSchedCycles || !NeedLatency)
197  HazardRec = new ScheduleHazardRecognizer();
198  else
199  HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
200  }
201 
202  ~ScheduleDAGRRList() override {
203  delete HazardRec;
204  delete AvailableQueue;
205  }
206 
207  void Schedule() override;
208 
209  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
210 
211  /// IsReachable - Checks if SU is reachable from TargetSU.
212  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
213  return Topo.IsReachable(SU, TargetSU);
214  }
215 
216  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
217  /// create a cycle.
218  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
219  return Topo.WillCreateCycle(SU, TargetSU);
220  }
221 
222  /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
223  /// This returns true if this is a new predecessor.
224  /// Does *NOT* update the topological ordering! It just queues an update.
225  void AddPredQueued(SUnit *SU, const SDep &D) {
226  Topo.AddPredQueued(SU, D.getSUnit());
227  SU->addPred(D);
228  }
229 
230  /// AddPred - adds a predecessor edge to SUnit SU.
231  /// This returns true if this is a new predecessor.
232  /// Updates the topological ordering if required.
233  void AddPred(SUnit *SU, const SDep &D) {
234  Topo.AddPred(SU, D.getSUnit());
235  SU->addPred(D);
236  }
237 
238  /// RemovePred - removes a predecessor edge from SUnit SU.
239  /// This returns true if an edge was removed.
240  /// Updates the topological ordering if required.
241  void RemovePred(SUnit *SU, const SDep &D) {
242  Topo.RemovePred(SU, D.getSUnit());
243  SU->removePred(D);
244  }
245 
246 private:
247  bool isReady(SUnit *SU) {
248  return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
249  AvailableQueue->isReady(SU);
250  }
251 
252  void ReleasePred(SUnit *SU, const SDep *PredEdge);
253  void ReleasePredecessors(SUnit *SU);
254  void ReleasePending();
255  void AdvanceToCycle(unsigned NextCycle);
256  void AdvancePastStalls(SUnit *SU);
257  void EmitNode(SUnit *SU);
258  void ScheduleNodeBottomUp(SUnit*);
259  void CapturePred(SDep *PredEdge);
260  void UnscheduleNodeBottomUp(SUnit*);
261  void RestoreHazardCheckerBottomUp();
262  void BacktrackBottomUp(SUnit*, SUnit*);
263  SUnit *TryUnfoldSU(SUnit *);
264  SUnit *CopyAndMoveSuccessors(SUnit*);
265  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
266  const TargetRegisterClass*,
267  const TargetRegisterClass*,
269  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
270 
271  void releaseInterferences(unsigned Reg = 0);
272 
273  SUnit *PickNodeToScheduleBottomUp();
274  void ListScheduleBottomUp();
275 
276  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
277  SUnit *CreateNewSUnit(SDNode *N) {
278  unsigned NumSUnits = SUnits.size();
279  SUnit *NewNode = newSUnit(N);
280  // Update the topological ordering.
281  if (NewNode->NodeNum >= NumSUnits)
282  Topo.AddSUnitWithoutPredecessors(NewNode);
283  return NewNode;
284  }
285 
286  /// CreateClone - Creates a new SUnit from an existing one.
287  SUnit *CreateClone(SUnit *N) {
288  unsigned NumSUnits = SUnits.size();
289  SUnit *NewNode = Clone(N);
290  // Update the topological ordering.
291  if (NewNode->NodeNum >= NumSUnits)
292  Topo.AddSUnitWithoutPredecessors(NewNode);
293  return NewNode;
294  }
295 
296  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
297  /// need actual latency information but the hybrid scheduler does.
298  bool forceUnitLatencies() const override {
299  return !NeedLatency;
300  }
301 };
302 
303 } // end anonymous namespace
304 
305 /// GetCostForDef - Looks up the register class and cost for a given definition.
306 /// Typically this just means looking up the representative register class,
307 /// but for untyped values (MVT::Untyped) it means inspecting the node's
308 /// opcode to determine what register class is being generated.
309 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
310  const TargetLowering *TLI,
311  const TargetInstrInfo *TII,
312  const TargetRegisterInfo *TRI,
313  unsigned &RegClass, unsigned &Cost,
314  const MachineFunction &MF) {
315  MVT VT = RegDefPos.GetValue();
316 
317  // Special handling for untyped values. These values can only come from
318  // the expansion of custom DAG-to-DAG patterns.
319  if (VT == MVT::Untyped) {
320  const SDNode *Node = RegDefPos.GetNode();
321 
322  // Special handling for CopyFromReg of untyped values.
323  if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
324  Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
325  const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
326  RegClass = RC->getID();
327  Cost = 1;
328  return;
329  }
330 
331  unsigned Opcode = Node->getMachineOpcode();
332  if (Opcode == TargetOpcode::REG_SEQUENCE) {
333  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
334  const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
335  RegClass = RC->getID();
336  Cost = 1;
337  return;
338  }
339 
340  unsigned Idx = RegDefPos.GetIdx();
341  const MCInstrDesc Desc = TII->get(Opcode);
342  const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
343  RegClass = RC->getID();
344  // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
345  // better way to determine it.
346  Cost = 1;
347  } else {
348  RegClass = TLI->getRepRegClassFor(VT)->getID();
349  Cost = TLI->getRepRegClassCostFor(VT);
350  }
351 }
352 
353 /// Schedule - Schedule the DAG using list scheduling.
354 void ScheduleDAGRRList::Schedule() {
355  LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
356  << " '" << BB->getName() << "' **********\n");
357 
358  CurCycle = 0;
359  IssueCount = 0;
360  MinAvailableCycle =
362  NumLiveRegs = 0;
363  // Allocate slots for each physical register, plus one for a special register
364  // to track the virtual resource of a calling sequence.
365  LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
366  LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
367  CallSeqEndForStart.clear();
368  assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
369 
370  // Build the scheduling graph.
371  BuildSchedGraph(nullptr);
372 
373  LLVM_DEBUG(dump());
374  Topo.MarkDirty();
375 
376  AvailableQueue->initNodes(SUnits);
377 
378  HazardRec->Reset();
379 
380  // Execute the actual scheduling loop.
381  ListScheduleBottomUp();
382 
383  AvailableQueue->releaseState();
384 
385  LLVM_DEBUG({
386  dbgs() << "*** Final schedule ***\n";
387  dumpSchedule();
388  dbgs() << '\n';
389  });
390 }
391 
392 //===----------------------------------------------------------------------===//
393 // Bottom-Up Scheduling
394 //===----------------------------------------------------------------------===//
395 
396 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
397 /// the AvailableQueue if the count reaches zero. Also update its cycle bound.
398 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
399  SUnit *PredSU = PredEdge->getSUnit();
400 
401 #ifndef NDEBUG
402  if (PredSU->NumSuccsLeft == 0) {
403  dbgs() << "*** Scheduling failed! ***\n";
404  dumpNode(*PredSU);
405  dbgs() << " has been released too many times!\n";
406  llvm_unreachable(nullptr);
407  }
408 #endif
409  --PredSU->NumSuccsLeft;
410 
411  if (!forceUnitLatencies()) {
412  // Updating predecessor's height. This is now the cycle when the
413  // predecessor can be scheduled without causing a pipeline stall.
414  PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
415  }
416 
417  // If all the node's successors are scheduled, this node is ready
418  // to be scheduled. Ignore the special EntrySU node.
419  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
420  PredSU->isAvailable = true;
421 
422  unsigned Height = PredSU->getHeight();
423  if (Height < MinAvailableCycle)
424  MinAvailableCycle = Height;
425 
426  if (isReady(PredSU)) {
427  AvailableQueue->push(PredSU);
428  }
429  // CapturePred and others may have left the node in the pending queue, avoid
430  // adding it twice.
431  else if (!PredSU->isPending) {
432  PredSU->isPending = true;
433  PendingQueue.push_back(PredSU);
434  }
435  }
436 }
437 
438 /// IsChainDependent - Test if Outer is reachable from Inner through
439 /// chain dependencies.
440 static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
441  unsigned NestLevel,
442  const TargetInstrInfo *TII) {
443  SDNode *N = Outer;
444  while (true) {
445  if (N == Inner)
446  return true;
447  // For a TokenFactor, examine each operand. There may be multiple ways
448  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
449  // most nesting in order to ensure that we find the corresponding match.
450  if (N->getOpcode() == ISD::TokenFactor) {
451  for (const SDValue &Op : N->op_values())
452  if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
453  return true;
454  return false;
455  }
456  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
457  if (N->isMachineOpcode()) {
458  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
459  ++NestLevel;
460  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
461  if (NestLevel == 0)
462  return false;
463  --NestLevel;
464  }
465  }
466  // Otherwise, find the chain and continue climbing.
467  for (const SDValue &Op : N->op_values())
468  if (Op.getValueType() == MVT::Other) {
469  N = Op.getNode();
470  goto found_chain_operand;
471  }
472  return false;
473  found_chain_operand:;
474  if (N->getOpcode() == ISD::EntryToken)
475  return false;
476  }
477 }
478 
479 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
480 /// the corresponding (lowered) CALLSEQ_BEGIN node.
481 ///
482 /// NestLevel and MaxNested are used in recursion to indcate the current level
483 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
484 /// level seen so far.
485 ///
486 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point
487 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
488 static SDNode *
489 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
490  const TargetInstrInfo *TII) {
491  while (true) {
492  // For a TokenFactor, examine each operand. There may be multiple ways
493  // to get to the CALLSEQ_BEGIN, but we need to find the path with the
494  // most nesting in order to ensure that we find the corresponding match.
495  if (N->getOpcode() == ISD::TokenFactor) {
496  SDNode *Best = nullptr;
497  unsigned BestMaxNest = MaxNest;
498  for (const SDValue &Op : N->op_values()) {
499  unsigned MyNestLevel = NestLevel;
500  unsigned MyMaxNest = MaxNest;
501  if (SDNode *New = FindCallSeqStart(Op.getNode(),
502  MyNestLevel, MyMaxNest, TII))
503  if (!Best || (MyMaxNest > BestMaxNest)) {
504  Best = New;
505  BestMaxNest = MyMaxNest;
506  }
507  }
508  assert(Best);
509  MaxNest = BestMaxNest;
510  return Best;
511  }
512  // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
513  if (N->isMachineOpcode()) {
514  if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
515  ++NestLevel;
516  MaxNest = std::max(MaxNest, NestLevel);
517  } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
518  assert(NestLevel != 0);
519  --NestLevel;
520  if (NestLevel == 0)
521  return N;
522  }
523  }
524  // Otherwise, find the chain and continue climbing.
525  for (const SDValue &Op : N->op_values())
526  if (Op.getValueType() == MVT::Other) {
527  N = Op.getNode();
528  goto found_chain_operand;
529  }
530  return nullptr;
531  found_chain_operand:;
532  if (N->getOpcode() == ISD::EntryToken)
533  return nullptr;
534  }
535 }
536 
537 /// Call ReleasePred for each predecessor, then update register live def/gen.
538 /// Always update LiveRegDefs for a register dependence even if the current SU
539 /// also defines the register. This effectively create one large live range
540 /// across a sequence of two-address node. This is important because the
541 /// entire chain must be scheduled together. Example:
542 ///
543 /// flags = (3) add
544 /// flags = (2) addc flags
545 /// flags = (1) addc flags
546 ///
547 /// results in
548 ///
549 /// LiveRegDefs[flags] = 3
550 /// LiveRegGens[flags] = 1
551 ///
552 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
553 /// interference on flags.
554 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
555  // Bottom up: release predecessors
556  for (SDep &Pred : SU->Preds) {
557  ReleasePred(SU, &Pred);
558  if (Pred.isAssignedRegDep()) {
559  // This is a physical register dependency and it's impossible or
560  // expensive to copy the register. Make sure nothing that can
561  // clobber the register is scheduled between the predecessor and
562  // this node.
563  SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
564  assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
565  "interference on register dependence");
566  LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
567  if (!LiveRegGens[Pred.getReg()]) {
568  ++NumLiveRegs;
569  LiveRegGens[Pred.getReg()] = SU;
570  }
571  }
572  }
573 
574  // If we're scheduling a lowered CALLSEQ_END, find the corresponding
575  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
576  // these nodes, to prevent other calls from being interscheduled with them.
577  unsigned CallResource = TRI->getNumRegs();
578  if (!LiveRegDefs[CallResource])
579  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
580  if (Node->isMachineOpcode() &&
581  Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
582  unsigned NestLevel = 0;
583  unsigned MaxNest = 0;
584  SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
585  assert(N && "Must find call sequence start");
586 
587  SUnit *Def = &SUnits[N->getNodeId()];
588  CallSeqEndForStart[Def] = SU;
589 
590  ++NumLiveRegs;
591  LiveRegDefs[CallResource] = Def;
592  LiveRegGens[CallResource] = SU;
593  break;
594  }
595 }
596 
597 /// Check to see if any of the pending instructions are ready to issue. If
598 /// so, add them to the available queue.
599 void ScheduleDAGRRList::ReleasePending() {
600  if (DisableSchedCycles) {
601  assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
602  return;
603  }
604 
605  // If the available queue is empty, it is safe to reset MinAvailableCycle.
606  if (AvailableQueue->empty())
607  MinAvailableCycle = std::numeric_limits<unsigned>::max();
608 
609  // Check to see if any of the pending instructions are ready to issue. If
610  // so, add them to the available queue.
611  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
612  unsigned ReadyCycle = PendingQueue[i]->getHeight();
613  if (ReadyCycle < MinAvailableCycle)
614  MinAvailableCycle = ReadyCycle;
615 
616  if (PendingQueue[i]->isAvailable) {
617  if (!isReady(PendingQueue[i]))
618  continue;
619  AvailableQueue->push(PendingQueue[i]);
620  }
621  PendingQueue[i]->isPending = false;
622  PendingQueue[i] = PendingQueue.back();
623  PendingQueue.pop_back();
624  --i; --e;
625  }
626 }
627 
628 /// Move the scheduler state forward by the specified number of Cycles.
629 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
630  if (NextCycle <= CurCycle)
631  return;
632 
633  IssueCount = 0;
634  AvailableQueue->setCurCycle(NextCycle);
635  if (!HazardRec->isEnabled()) {
636  // Bypass lots of virtual calls in case of long latency.
637  CurCycle = NextCycle;
638  }
639  else {
640  for (; CurCycle != NextCycle; ++CurCycle) {
641  HazardRec->RecedeCycle();
642  }
643  }
644  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
645  // available Q to release pending nodes at least once before popping.
646  ReleasePending();
647 }
648 
649 /// Move the scheduler state forward until the specified node's dependents are
650 /// ready and can be scheduled with no resource conflicts.
651 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
652  if (DisableSchedCycles)
653  return;
654 
655  // FIXME: Nodes such as CopyFromReg probably should not advance the current
656  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
657  // has predecessors the cycle will be advanced when they are scheduled.
658  // But given the crude nature of modeling latency though such nodes, we
659  // currently need to treat these nodes like real instructions.
660  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
661 
662  unsigned ReadyCycle = SU->getHeight();
663 
664  // Bump CurCycle to account for latency. We assume the latency of other
665  // available instructions may be hidden by the stall (not a full pipe stall).
666  // This updates the hazard recognizer's cycle before reserving resources for
667  // this instruction.
668  AdvanceToCycle(ReadyCycle);
669 
670  // Calls are scheduled in their preceding cycle, so don't conflict with
671  // hazards from instructions after the call. EmitNode will reset the
672  // scoreboard state before emitting the call.
673  if (SU->isCall)
674  return;
675 
676  // FIXME: For resource conflicts in very long non-pipelined stages, we
677  // should probably skip ahead here to avoid useless scoreboard checks.
678  int Stalls = 0;
679  while (true) {
681  HazardRec->getHazardType(SU, -Stalls);
682 
684  break;
685 
686  ++Stalls;
687  }
688  AdvanceToCycle(CurCycle + Stalls);
689 }
690 
691 /// Record this SUnit in the HazardRecognizer.
692 /// Does not update CurCycle.
693 void ScheduleDAGRRList::EmitNode(SUnit *SU) {
694  if (!HazardRec->isEnabled())
695  return;
696 
697  // Check for phys reg copy.
698  if (!SU->getNode())
699  return;
700 
701  switch (SU->getNode()->getOpcode()) {
702  default:
703  assert(SU->getNode()->isMachineOpcode() &&
704  "This target-independent node should not be scheduled.");
705  break;
706  case ISD::MERGE_VALUES:
707  case ISD::TokenFactor:
708  case ISD::LIFETIME_START:
709  case ISD::LIFETIME_END:
710  case ISD::CopyToReg:
711  case ISD::CopyFromReg:
712  case ISD::EH_LABEL:
713  // Noops don't affect the scoreboard state. Copies are likely to be
714  // removed.
715  return;
716  case ISD::INLINEASM:
717  case ISD::INLINEASM_BR:
718  // For inline asm, clear the pipeline state.
719  HazardRec->Reset();
720  return;
721  }
722  if (SU->isCall) {
723  // Calls are scheduled with their preceding instructions. For bottom-up
724  // scheduling, clear the pipeline state before emitting.
725  HazardRec->Reset();
726  }
727 
728  HazardRec->EmitInstruction(SU);
729 }
730 
731 static void resetVRegCycle(SUnit *SU);
732 
733 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
734 /// count of its predecessors. If a predecessor pending count is zero, add it to
735 /// the Available queue.
736 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
737  LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
738  LLVM_DEBUG(dumpNode(*SU));
739 
740 #ifndef NDEBUG
741  if (CurCycle < SU->getHeight())
742  LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
743  << "] pipeline stall!\n");
744 #endif
745 
746  // FIXME: Do not modify node height. It may interfere with
747  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
748  // node its ready cycle can aid heuristics, and after scheduling it can
749  // indicate the scheduled cycle.
750  SU->setHeightToAtLeast(CurCycle);
751 
752  // Reserve resources for the scheduled instruction.
753  EmitNode(SU);
754 
755  Sequence.push_back(SU);
756 
757  AvailableQueue->scheduledNode(SU);
758 
759  // If HazardRec is disabled, and each inst counts as one cycle, then
760  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
761  // PendingQueue for schedulers that implement HasReadyFilter.
762  if (!HazardRec->isEnabled() && AvgIPC < 2)
763  AdvanceToCycle(CurCycle + 1);
764 
765  // Update liveness of predecessors before successors to avoid treating a
766  // two-address node as a live range def.
767  ReleasePredecessors(SU);
768 
769  // Release all the implicit physical register defs that are live.
770  for (SDep &Succ : SU->Succs) {
771  // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
772  if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
773  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
774  --NumLiveRegs;
775  LiveRegDefs[Succ.getReg()] = nullptr;
776  LiveRegGens[Succ.getReg()] = nullptr;
777  releaseInterferences(Succ.getReg());
778  }
779  }
780  // Release the special call resource dependence, if this is the beginning
781  // of a call.
782  unsigned CallResource = TRI->getNumRegs();
783  if (LiveRegDefs[CallResource] == SU)
784  for (const SDNode *SUNode = SU->getNode(); SUNode;
785  SUNode = SUNode->getGluedNode()) {
786  if (SUNode->isMachineOpcode() &&
787  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
788  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
789  --NumLiveRegs;
790  LiveRegDefs[CallResource] = nullptr;
791  LiveRegGens[CallResource] = nullptr;
792  releaseInterferences(CallResource);
793  }
794  }
795 
796  resetVRegCycle(SU);
797 
798  SU->isScheduled = true;
799 
800  // Conditions under which the scheduler should eagerly advance the cycle:
801  // (1) No available instructions
802  // (2) All pipelines full, so available instructions must have hazards.
803  //
804  // If HazardRec is disabled, the cycle was pre-advanced before calling
805  // ReleasePredecessors. In that case, IssueCount should remain 0.
806  //
807  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
808  if (HazardRec->isEnabled() || AvgIPC > 1) {
809  if (SU->getNode() && SU->getNode()->isMachineOpcode())
810  ++IssueCount;
811  if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
812  || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
813  AdvanceToCycle(CurCycle + 1);
814  }
815 }
816 
817 /// CapturePred - This does the opposite of ReleasePred. Since SU is being
818 /// unscheduled, increase the succ left count of its predecessors. Remove
819 /// them from AvailableQueue if necessary.
820 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
821  SUnit *PredSU = PredEdge->getSUnit();
822  if (PredSU->isAvailable) {
823  PredSU->isAvailable = false;
824  if (!PredSU->isPending)
825  AvailableQueue->remove(PredSU);
826  }
827 
829  "NumSuccsLeft will overflow!");
830  ++PredSU->NumSuccsLeft;
831 }
832 
833 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
834 /// its predecessor states to reflect the change.
835 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
836  LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
837  LLVM_DEBUG(dumpNode(*SU));
838 
839  for (SDep &Pred : SU->Preds) {
840  CapturePred(&Pred);
841  if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
842  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
843  assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
844  "Physical register dependency violated?");
845  --NumLiveRegs;
846  LiveRegDefs[Pred.getReg()] = nullptr;
847  LiveRegGens[Pred.getReg()] = nullptr;
848  releaseInterferences(Pred.getReg());
849  }
850  }
851 
852  // Reclaim the special call resource dependence, if this is the beginning
853  // of a call.
854  unsigned CallResource = TRI->getNumRegs();
855  for (const SDNode *SUNode = SU->getNode(); SUNode;
856  SUNode = SUNode->getGluedNode()) {
857  if (SUNode->isMachineOpcode() &&
858  SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
859  SUnit *SeqEnd = CallSeqEndForStart[SU];
860  assert(SeqEnd && "Call sequence start/end must be known");
861  assert(!LiveRegDefs[CallResource]);
862  assert(!LiveRegGens[CallResource]);
863  ++NumLiveRegs;
864  LiveRegDefs[CallResource] = SU;
865  LiveRegGens[CallResource] = SeqEnd;
866  }
867  }
868 
869  // Release the special call resource dependence, if this is the end
870  // of a call.
871  if (LiveRegGens[CallResource] == SU)
872  for (const SDNode *SUNode = SU->getNode(); SUNode;
873  SUNode = SUNode->getGluedNode()) {
874  if (SUNode->isMachineOpcode() &&
875  SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
876  assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
877  assert(LiveRegDefs[CallResource]);
878  assert(LiveRegGens[CallResource]);
879  --NumLiveRegs;
880  LiveRegDefs[CallResource] = nullptr;
881  LiveRegGens[CallResource] = nullptr;
882  releaseInterferences(CallResource);
883  }
884  }
885 
886  for (auto &Succ : SU->Succs) {
887  if (Succ.isAssignedRegDep()) {
888  auto Reg = Succ.getReg();
889  if (!LiveRegDefs[Reg])
890  ++NumLiveRegs;
891  // This becomes the nearest def. Note that an earlier def may still be
892  // pending if this is a two-address node.
893  LiveRegDefs[Reg] = SU;
894 
895  // Update LiveRegGen only if was empty before this unscheduling.
896  // This is to avoid incorrect updating LiveRegGen set in previous run.
897  if (!LiveRegGens[Reg]) {
898  // Find the successor with the lowest height.
899  LiveRegGens[Reg] = Succ.getSUnit();
900  for (auto &Succ2 : SU->Succs) {
901  if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
902  Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
903  LiveRegGens[Reg] = Succ2.getSUnit();
904  }
905  }
906  }
907  }
908  if (SU->getHeight() < MinAvailableCycle)
909  MinAvailableCycle = SU->getHeight();
910 
911  SU->setHeightDirty();
912  SU->isScheduled = false;
913  SU->isAvailable = true;
914  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
915  // Don't make available until backtracking is complete.
916  SU->isPending = true;
917  PendingQueue.push_back(SU);
918  }
919  else {
920  AvailableQueue->push(SU);
921  }
922  AvailableQueue->unscheduledNode(SU);
923 }
924 
925 /// After backtracking, the hazard checker needs to be restored to a state
926 /// corresponding the current cycle.
927 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
928  HazardRec->Reset();
929 
930  unsigned LookAhead = std::min((unsigned)Sequence.size(),
931  HazardRec->getMaxLookAhead());
932  if (LookAhead == 0)
933  return;
934 
935  std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
936  unsigned HazardCycle = (*I)->getHeight();
937  for (auto E = Sequence.end(); I != E; ++I) {
938  SUnit *SU = *I;
939  for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
940  HazardRec->RecedeCycle();
941  }
942  EmitNode(SU);
943  }
944 }
945 
946 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
947 /// BTCycle in order to schedule a specific node.
948 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
949  SUnit *OldSU = Sequence.back();
950  while (true) {
951  Sequence.pop_back();
952  // FIXME: use ready cycle instead of height
953  CurCycle = OldSU->getHeight();
954  UnscheduleNodeBottomUp(OldSU);
955  AvailableQueue->setCurCycle(CurCycle);
956  if (OldSU == BtSU)
957  break;
958  OldSU = Sequence.back();
959  }
960 
961  assert(!SU->isSucc(OldSU) && "Something is wrong!");
962 
963  RestoreHazardCheckerBottomUp();
964 
965  ReleasePending();
966 
967  ++NumBacktracks;
968 }
969 
970 static bool isOperandOf(const SUnit *SU, SDNode *N) {
971  for (const SDNode *SUNode = SU->getNode(); SUNode;
972  SUNode = SUNode->getGluedNode()) {
973  if (SUNode->isOperandOf(N))
974  return true;
975  }
976  return false;
977 }
978 
979 /// TryUnfold - Attempt to unfold
980 SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
981  SDNode *N = SU->getNode();
982  // Use while over if to ease fall through.
983  SmallVector<SDNode *, 2> NewNodes;
984  if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
985  return nullptr;
986 
987  // unfolding an x86 DEC64m operation results in store, dec, load which
988  // can't be handled here so quit
989  if (NewNodes.size() == 3)
990  return nullptr;
991 
992  assert(NewNodes.size() == 2 && "Expected a load folding node!");
993 
994  N = NewNodes[1];
995  SDNode *LoadNode = NewNodes[0];
996  unsigned NumVals = N->getNumValues();
997  unsigned OldNumVals = SU->getNode()->getNumValues();
998 
999  // LoadNode may already exist. This can happen when there is another
1000  // load from the same location and producing the same type of value
1001  // but it has different alignment or volatileness.
1002  bool isNewLoad = true;
1003  SUnit *LoadSU;
1004  if (LoadNode->getNodeId() != -1) {
1005  LoadSU = &SUnits[LoadNode->getNodeId()];
1006  // If LoadSU has already been scheduled, we should clone it but
1007  // this would negate the benefit to unfolding so just return SU.
1008  if (LoadSU->isScheduled)
1009  return SU;
1010  isNewLoad = false;
1011  } else {
1012  LoadSU = CreateNewSUnit(LoadNode);
1013  LoadNode->setNodeId(LoadSU->NodeNum);
1014 
1015  InitNumRegDefsLeft(LoadSU);
1016  computeLatency(LoadSU);
1017  }
1018 
1019  bool isNewN = true;
1020  SUnit *NewSU;
1021  // This can only happen when isNewLoad is false.
1022  if (N->getNodeId() != -1) {
1023  NewSU = &SUnits[N->getNodeId()];
1024  // If NewSU has already been scheduled, we need to clone it, but this
1025  // negates the benefit to unfolding so just return SU.
1026  if (NewSU->isScheduled) {
1027  return SU;
1028  }
1029  isNewN = false;
1030  } else {
1031  NewSU = CreateNewSUnit(N);
1032  N->setNodeId(NewSU->NodeNum);
1033 
1034  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1035  for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1036  if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1037  NewSU->isTwoAddress = true;
1038  break;
1039  }
1040  }
1041  if (MCID.isCommutable())
1042  NewSU->isCommutable = true;
1043 
1044  InitNumRegDefsLeft(NewSU);
1045  computeLatency(NewSU);
1046  }
1047 
1048  LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
1049 
1050  // Now that we are committed to unfolding replace DAG Uses.
1051  for (unsigned i = 0; i != NumVals; ++i)
1052  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
1053  DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
1054  SDValue(LoadNode, 1));
1055 
1056  // Record all the edges to and from the old SU, by category.
1057  SmallVector<SDep, 4> ChainPreds;
1058  SmallVector<SDep, 4> ChainSuccs;
1059  SmallVector<SDep, 4> LoadPreds;
1060  SmallVector<SDep, 4> NodePreds;
1061  SmallVector<SDep, 4> NodeSuccs;
1062  for (SDep &Pred : SU->Preds) {
1063  if (Pred.isCtrl())
1064  ChainPreds.push_back(Pred);
1065  else if (isOperandOf(Pred.getSUnit(), LoadNode))
1066  LoadPreds.push_back(Pred);
1067  else
1068  NodePreds.push_back(Pred);
1069  }
1070  for (SDep &Succ : SU->Succs) {
1071  if (Succ.isCtrl())
1072  ChainSuccs.push_back(Succ);
1073  else
1074  NodeSuccs.push_back(Succ);
1075  }
1076 
1077  // Now assign edges to the newly-created nodes.
1078  for (const SDep &Pred : ChainPreds) {
1079  RemovePred(SU, Pred);
1080  if (isNewLoad)
1081  AddPredQueued(LoadSU, Pred);
1082  }
1083  for (const SDep &Pred : LoadPreds) {
1084  RemovePred(SU, Pred);
1085  if (isNewLoad)
1086  AddPredQueued(LoadSU, Pred);
1087  }
1088  for (const SDep &Pred : NodePreds) {
1089  RemovePred(SU, Pred);
1090  AddPredQueued(NewSU, Pred);
1091  }
1092  for (SDep &D : NodeSuccs) {
1093  SUnit *SuccDep = D.getSUnit();
1094  D.setSUnit(SU);
1095  RemovePred(SuccDep, D);
1096  D.setSUnit(NewSU);
1097  AddPredQueued(SuccDep, D);
1098  // Balance register pressure.
1099  if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
1100  !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1101  --NewSU->NumRegDefsLeft;
1102  }
1103  for (SDep &D : ChainSuccs) {
1104  SUnit *SuccDep = D.getSUnit();
1105  D.setSUnit(SU);
1106  RemovePred(SuccDep, D);
1107  if (isNewLoad) {
1108  D.setSUnit(LoadSU);
1109  AddPredQueued(SuccDep, D);
1110  }
1111  }
1112 
1113  // Add a data dependency to reflect that NewSU reads the value defined
1114  // by LoadSU.
1115  SDep D(LoadSU, SDep::Data, 0);
1116  D.setLatency(LoadSU->Latency);
1117  AddPredQueued(NewSU, D);
1118 
1119  if (isNewLoad)
1120  AvailableQueue->addNode(LoadSU);
1121  if (isNewN)
1122  AvailableQueue->addNode(NewSU);
1123 
1124  ++NumUnfolds;
1125 
1126  if (NewSU->NumSuccsLeft == 0)
1127  NewSU->isAvailable = true;
1128 
1129  return NewSU;
1130 }
1131 
1132 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1133 /// successors to the newly created node.
1134 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
1135  SDNode *N = SU->getNode();
1136  if (!N)
1137  return nullptr;
1138 
1139  LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
1140  LLVM_DEBUG(dumpNode(*SU));
1141 
1142  if (N->getGluedNode() &&
1143  !TII->canCopyGluedNodeDuringSchedule(N)) {
1144  LLVM_DEBUG(
1145  dbgs()
1146  << "Giving up because it has incoming glue and the target does not "
1147  "want to copy it\n");
1148  return nullptr;
1149  }
1150 
1151  SUnit *NewSU;
1152  bool TryUnfold = false;
1153  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1154  MVT VT = N->getSimpleValueType(i);
1155  if (VT == MVT::Glue) {
1156  LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
1157  return nullptr;
1158  } else if (VT == MVT::Other)
1159  TryUnfold = true;
1160  }
1161  for (const SDValue &Op : N->op_values()) {
1162  MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
1163  if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
1164  LLVM_DEBUG(
1165  dbgs() << "Giving up because it one of the operands is glue and "
1166  "the target does not want to copy it\n");
1167  return nullptr;
1168  }
1169  }
1170 
1171  // If possible unfold instruction.
1172  if (TryUnfold) {
1173  SUnit *UnfoldSU = TryUnfoldSU(SU);
1174  if (!UnfoldSU)
1175  return nullptr;
1176  SU = UnfoldSU;
1177  N = SU->getNode();
1178  // If this can be scheduled don't bother duplicating and just return
1179  if (SU->NumSuccsLeft == 0)
1180  return SU;
1181  }
1182 
1183  LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1184  NewSU = CreateClone(SU);
1185 
1186  // New SUnit has the exact same predecessors.
1187  for (SDep &Pred : SU->Preds)
1188  if (!Pred.isArtificial())
1189  AddPredQueued(NewSU, Pred);
1190 
1191  // Make sure the clone comes after the original. (InstrEmitter assumes
1192  // this ordering.)
1193  AddPredQueued(NewSU, SDep(SU, SDep::Artificial));
1194 
1195  // Only copy scheduled successors. Cut them from old node's successor
1196  // list and move them over.
1198  for (SDep &Succ : SU->Succs) {
1199  if (Succ.isArtificial())
1200  continue;
1201  SUnit *SuccSU = Succ.getSUnit();
1202  if (SuccSU->isScheduled) {
1203  SDep D = Succ;
1204  D.setSUnit(NewSU);
1205  AddPredQueued(SuccSU, D);
1206  D.setSUnit(SU);
1207  DelDeps.emplace_back(SuccSU, D);
1208  }
1209  }
1210  for (const auto &[DelSU, DelD] : DelDeps)
1211  RemovePred(DelSU, DelD);
1212 
1213  AvailableQueue->updateNode(SU);
1214  AvailableQueue->addNode(NewSU);
1215 
1216  ++NumDups;
1217  return NewSU;
1218 }
1219 
1220 /// InsertCopiesAndMoveSuccs - Insert register copies and move all
1221 /// scheduled successors of the given SUnit to the last copy.
1222 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1223  const TargetRegisterClass *DestRC,
1224  const TargetRegisterClass *SrcRC,
1226  SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1227  CopyFromSU->CopySrcRC = SrcRC;
1228  CopyFromSU->CopyDstRC = DestRC;
1229 
1230  SUnit *CopyToSU = CreateNewSUnit(nullptr);
1231  CopyToSU->CopySrcRC = DestRC;
1232  CopyToSU->CopyDstRC = SrcRC;
1233 
1234  // Only copy scheduled successors. Cut them from old node's successor
1235  // list and move them over.
1237  for (SDep &Succ : SU->Succs) {
1238  if (Succ.isArtificial())
1239  continue;
1240  SUnit *SuccSU = Succ.getSUnit();
1241  if (SuccSU->isScheduled) {
1242  SDep D = Succ;
1243  D.setSUnit(CopyToSU);
1244  AddPredQueued(SuccSU, D);
1245  DelDeps.emplace_back(SuccSU, Succ);
1246  }
1247  else {
1248  // Avoid scheduling the def-side copy before other successors. Otherwise,
1249  // we could introduce another physreg interference on the copy and
1250  // continue inserting copies indefinitely.
1251  AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1252  }
1253  }
1254  for (const auto &[DelSU, DelD] : DelDeps)
1255  RemovePred(DelSU, DelD);
1256 
1257  SDep FromDep(SU, SDep::Data, Reg);
1258  FromDep.setLatency(SU->Latency);
1259  AddPredQueued(CopyFromSU, FromDep);
1260  SDep ToDep(CopyFromSU, SDep::Data, 0);
1261  ToDep.setLatency(CopyFromSU->Latency);
1262  AddPredQueued(CopyToSU, ToDep);
1263 
1264  AvailableQueue->updateNode(SU);
1265  AvailableQueue->addNode(CopyFromSU);
1266  AvailableQueue->addNode(CopyToSU);
1267  Copies.push_back(CopyFromSU);
1268  Copies.push_back(CopyToSU);
1269 
1270  ++NumPRCopies;
1271 }
1272 
1273 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
1274 /// definition of the specified node.
1275 /// FIXME: Move to SelectionDAG?
1277  const TargetInstrInfo *TII) {
1278  unsigned NumRes;
1279  if (N->getOpcode() == ISD::CopyFromReg) {
1280  // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1281  NumRes = 1;
1282  } else {
1283  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1284  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1285  NumRes = MCID.getNumDefs();
1286  for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1287  if (Reg == *ImpDef)
1288  break;
1289  ++NumRes;
1290  }
1291  }
1292  return N->getSimpleValueType(NumRes);
1293 }
1294 
1295 /// CheckForLiveRegDef - Return true and update live register vector if the
1296 /// specified register def of the specified SUnit clobbers any "live" registers.
1297 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs,
1298  SmallSet<unsigned, 4> &RegAdded,
1300  const TargetRegisterInfo *TRI,
1301  const SDNode *Node = nullptr) {
1302  for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1303 
1304  // Check if Ref is live.
1305  if (!LiveRegDefs[*AliasI]) continue;
1306 
1307  // Allow multiple uses of the same def.
1308  if (LiveRegDefs[*AliasI] == SU) continue;
1309 
1310  // Allow multiple uses of same def
1311  if (Node && LiveRegDefs[*AliasI]->getNode() == Node)
1312  continue;
1313 
1314  // Add Reg to the set of interfering live regs.
1315  if (RegAdded.insert(*AliasI).second) {
1316  LRegs.push_back(*AliasI);
1317  }
1318  }
1319 }
1320 
1321 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1322 /// by RegMask, and add them to LRegs.
1323 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1324  ArrayRef<SUnit*> LiveRegDefs,
1325  SmallSet<unsigned, 4> &RegAdded,
1326  SmallVectorImpl<unsigned> &LRegs) {
1327  // Look at all live registers. Skip Reg0 and the special CallResource.
1328  for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1329  if (!LiveRegDefs[i]) continue;
1330  if (LiveRegDefs[i] == SU) continue;
1331  if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1332  if (RegAdded.insert(i).second)
1333  LRegs.push_back(i);
1334  }
1335 }
1336 
1337 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1338 static const uint32_t *getNodeRegMask(const SDNode *N) {
1339  for (const SDValue &Op : N->op_values())
1340  if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1341  return RegOp->getRegMask();
1342  return nullptr;
1343 }
1344 
1345 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1346 /// scheduling of the given node to satisfy live physical register dependencies.
1347 /// If the specific node is the last one that's available to schedule, do
1348 /// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1349 bool ScheduleDAGRRList::
1350 DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1351  if (NumLiveRegs == 0)
1352  return false;
1353 
1354  SmallSet<unsigned, 4> RegAdded;
1355  // If this node would clobber any "live" register, then it's not ready.
1356  //
1357  // If SU is the currently live definition of the same register that it uses,
1358  // then we are free to schedule it.
1359  for (SDep &Pred : SU->Preds) {
1360  if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
1361  CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
1362  RegAdded, LRegs, TRI);
1363  }
1364 
1365  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1366  if (Node->getOpcode() == ISD::INLINEASM ||
1367  Node->getOpcode() == ISD::INLINEASM_BR) {
1368  // Inline asm can clobber physical defs.
1369  unsigned NumOps = Node->getNumOperands();
1370  if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1371  --NumOps; // Ignore the glue operand.
1372 
1373  for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1374  unsigned Flags =
1375  cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1376  unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1377 
1378  ++i; // Skip the ID value.
1379  if (InlineAsm::isRegDefKind(Flags) ||
1381  InlineAsm::isClobberKind(Flags)) {
1382  // Check for def of register or earlyclobber register.
1383  for (; NumVals; --NumVals, ++i) {
1384  Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1386  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1387  }
1388  } else
1389  i += NumVals;
1390  }
1391  continue;
1392  }
1393 
1394  if (Node->getOpcode() == ISD::CopyToReg) {
1395  Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
1396  if (Reg.isPhysical()) {
1397  SDNode *SrcNode = Node->getOperand(2).getNode();
1398  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI,
1399  SrcNode);
1400  }
1401  }
1402 
1403  if (!Node->isMachineOpcode())
1404  continue;
1405  // If we're in the middle of scheduling a call, don't begin scheduling
1406  // another call. Also, don't allow any physical registers to be live across
1407  // the call.
1408  if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
1409  // Check the special calling-sequence resource.
1410  unsigned CallResource = TRI->getNumRegs();
1411  if (LiveRegDefs[CallResource]) {
1412  SDNode *Gen = LiveRegGens[CallResource]->getNode();
1413  while (SDNode *Glued = Gen->getGluedNode())
1414  Gen = Glued;
1415  if (!IsChainDependent(Gen, Node, 0, TII) &&
1416  RegAdded.insert(CallResource).second)
1417  LRegs.push_back(CallResource);
1418  }
1419  }
1420  if (const uint32_t *RegMask = getNodeRegMask(Node))
1421  CheckForLiveRegDefMasked(SU, RegMask,
1422  makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1423  RegAdded, LRegs);
1424 
1425  const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1426  if (MCID.hasOptionalDef()) {
1427  // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1428  // This operand can be either a def of CPSR, if the S bit is set; or a use
1429  // of %noreg. When the OptionalDef is set to a valid register, we need to
1430  // handle it in the same way as an ImplicitDef.
1431  for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
1432  if (MCID.OpInfo[i].isOptionalDef()) {
1433  const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
1434  Register Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
1435  CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1436  }
1437  }
1438  if (!MCID.ImplicitDefs)
1439  continue;
1440  for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
1441  CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1442  }
1443 
1444  return !LRegs.empty();
1445 }
1446 
1447 void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1448  // Add the nodes that aren't ready back onto the available list.
1449  for (unsigned i = Interferences.size(); i > 0; --i) {
1450  SUnit *SU = Interferences[i-1];
1451  LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1452  if (Reg) {
1453  SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1454  if (!is_contained(LRegs, Reg))
1455  continue;
1456  }
1457  SU->isPending = false;
1458  // The interfering node may no longer be available due to backtracking.
1459  // Furthermore, it may have been made available again, in which case it is
1460  // now already in the AvailableQueue.
1461  if (SU->isAvailable && !SU->NodeQueueId) {
1462  LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
1463  AvailableQueue->push(SU);
1464  }
1465  if (i < Interferences.size())
1466  Interferences[i-1] = Interferences.back();
1467  Interferences.pop_back();
1468  LRegsMap.erase(LRegsPos);
1469  }
1470 }
1471 
1472 /// Return a node that can be scheduled in this cycle. Requirements:
1473 /// (1) Ready: latency has been satisfied
1474 /// (2) No Hazards: resources are available
1475 /// (3) No Interferences: may unschedule to break register interferences.
1476 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1477  SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1478  auto FindAvailableNode = [&]() {
1479  while (CurSU) {
1481  if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1482  break;
1483  LLVM_DEBUG(dbgs() << " Interfering reg ";
1484  if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
1485  else dbgs() << printReg(LRegs[0], TRI);
1486  dbgs() << " SU #" << CurSU->NodeNum << '\n');
1487  auto [LRegsIter, LRegsInserted] = LRegsMap.try_emplace(CurSU, LRegs);
1488  if (LRegsInserted) {
1489  CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1490  Interferences.push_back(CurSU);
1491  }
1492  else {
1493  assert(CurSU->isPending && "Interferences are pending");
1494  // Update the interference with current live regs.
1495  LRegsIter->second = LRegs;
1496  }
1497  CurSU = AvailableQueue->pop();
1498  }
1499  };
1500  FindAvailableNode();
1501  if (CurSU)
1502  return CurSU;
1503 
1504  // We query the topological order in the loop body, so make sure outstanding
1505  // updates are applied before entering it (we only enter the loop if there
1506  // are some interferences). If we make changes to the ordering, we exit
1507  // the loop.
1508 
1509  // All candidates are delayed due to live physical reg dependencies.
1510  // Try backtracking, code duplication, or inserting cross class copies
1511  // to resolve it.
1512  for (SUnit *TrySU : Interferences) {
1513  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1514 
1515  // Try unscheduling up to the point where it's safe to schedule
1516  // this node.
1517  SUnit *BtSU = nullptr;
1518  unsigned LiveCycle = std::numeric_limits<unsigned>::max();
1519  for (unsigned Reg : LRegs) {
1520  if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1521  BtSU = LiveRegGens[Reg];
1522  LiveCycle = BtSU->getHeight();
1523  }
1524  }
1525  if (!WillCreateCycle(TrySU, BtSU)) {
1526  // BacktrackBottomUp mutates Interferences!
1527  BacktrackBottomUp(TrySU, BtSU);
1528 
1529  // Force the current node to be scheduled before the node that
1530  // requires the physical reg dep.
1531  if (BtSU->isAvailable) {
1532  BtSU->isAvailable = false;
1533  if (!BtSU->isPending)
1534  AvailableQueue->remove(BtSU);
1535  }
1536  LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
1537  << ") to SU(" << TrySU->NodeNum << ")\n");
1538  AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial));
1539 
1540  // If one or more successors has been unscheduled, then the current
1541  // node is no longer available.
1542  if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
1543  LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1544  CurSU = AvailableQueue->pop();
1545  } else {
1546  LLVM_DEBUG(dbgs() << "TrySU available\n");
1547  // Available and in AvailableQueue
1548  AvailableQueue->remove(TrySU);
1549  CurSU = TrySU;
1550  }
1551  FindAvailableNode();
1552  // Interferences has been mutated. We must break.
1553  break;
1554  }
1555  }
1556 
1557  if (!CurSU) {
1558  // Can't backtrack. If it's too expensive to copy the value, then try
1559  // duplicate the nodes that produces these "too expensive to copy"
1560  // values to break the dependency. In case even that doesn't work,
1561  // insert cross class copies.
1562  // If it's not too expensive, i.e. cost != -1, issue copies.
1563  SUnit *TrySU = Interferences[0];
1564  SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1565  assert(LRegs.size() == 1 && "Can't handle this yet!");
1566  unsigned Reg = LRegs[0];
1567  SUnit *LRDef = LiveRegDefs[Reg];
1568  MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1569  const TargetRegisterClass *RC =
1571  const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1572 
1573  // If cross copy register class is the same as RC, then it must be possible
1574  // copy the value directly. Do not try duplicate the def.
1575  // If cross copy register class is not the same as RC, then it's possible to
1576  // copy the value but it require cross register class copies and it is
1577  // expensive.
1578  // If cross copy register class is null, then it's not possible to copy
1579  // the value at all.
1580  SUnit *NewDef = nullptr;
1581  if (DestRC != RC) {
1582  NewDef = CopyAndMoveSuccessors(LRDef);
1583  if (!DestRC && !NewDef)
1584  report_fatal_error("Can't handle live physical register dependency!");
1585  }
1586  if (!NewDef) {
1587  // Issue copies, these can be expensive cross register class copies.
1589  InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1590  LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1591  << " to SU #" << Copies.front()->NodeNum << "\n");
1592  AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial));
1593  NewDef = Copies.back();
1594  }
1595 
1596  LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1597  << " to SU #" << TrySU->NodeNum << "\n");
1598  LiveRegDefs[Reg] = NewDef;
1599  AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial));
1600  TrySU->isAvailable = false;
1601  CurSU = NewDef;
1602  }
1603  assert(CurSU && "Unable to resolve live physical register dependencies!");
1604  return CurSU;
1605 }
1606 
1607 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1608 /// schedulers.
1609 void ScheduleDAGRRList::ListScheduleBottomUp() {
1610  // Release any predecessors of the special Exit node.
1611  ReleasePredecessors(&ExitSU);
1612 
1613  // Add root to Available queue.
1614  if (!SUnits.empty()) {
1615  SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1616  assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1617  RootSU->isAvailable = true;
1618  AvailableQueue->push(RootSU);
1619  }
1620 
1621  // While Available queue is not empty, grab the node with the highest
1622  // priority. If it is not ready put it back. Schedule the node.
1623  Sequence.reserve(SUnits.size());
1624  while (!AvailableQueue->empty() || !Interferences.empty()) {
1625  LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
1626  AvailableQueue->dump(this));
1627 
1628  // Pick the best node to schedule taking all constraints into
1629  // consideration.
1630  SUnit *SU = PickNodeToScheduleBottomUp();
1631 
1632  AdvancePastStalls(SU);
1633 
1634  ScheduleNodeBottomUp(SU);
1635 
1636  while (AvailableQueue->empty() && !PendingQueue.empty()) {
1637  // Advance the cycle to free resources. Skip ahead to the next ready SU.
1638  assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
1639  "MinAvailableCycle uninitialized");
1640  AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1641  }
1642  }
1643 
1644  // Reverse the order if it is bottom up.
1645  std::reverse(Sequence.begin(), Sequence.end());
1646 
1647 #ifndef NDEBUG
1648  VerifyScheduledSequence(/*isBottomUp=*/true);
1649 #endif
1650 }
1651 
1652 namespace {
1653 
1654 class RegReductionPQBase;
1655 
1656 struct queue_sort {
1657  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1658 };
1659 
1660 #ifndef NDEBUG
1661 template<class SF>
1662 struct reverse_sort : public queue_sort {
1663  SF &SortFunc;
1664 
1665  reverse_sort(SF &sf) : SortFunc(sf) {}
1666 
1667  bool operator()(SUnit* left, SUnit* right) const {
1668  // reverse left/right rather than simply !SortFunc(left, right)
1669  // to expose different paths in the comparison logic.
1670  return SortFunc(right, left);
1671  }
1672 };
1673 #endif // NDEBUG
1674 
1675 /// bu_ls_rr_sort - Priority function for bottom up register pressure
1676 // reduction scheduler.
1677 struct bu_ls_rr_sort : public queue_sort {
1678  enum {
1679  IsBottomUp = true,
1680  HasReadyFilter = false
1681  };
1682 
1683  RegReductionPQBase *SPQ;
1684 
1685  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1686 
1687  bool operator()(SUnit* left, SUnit* right) const;
1688 };
1689 
1690 // src_ls_rr_sort - Priority function for source order scheduler.
1691 struct src_ls_rr_sort : public queue_sort {
1692  enum {
1693  IsBottomUp = true,
1694  HasReadyFilter = false
1695  };
1696 
1697  RegReductionPQBase *SPQ;
1698 
1699  src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1700 
1701  bool operator()(SUnit* left, SUnit* right) const;
1702 };
1703 
1704 // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1705 struct hybrid_ls_rr_sort : public queue_sort {
1706  enum {
1707  IsBottomUp = true,
1708  HasReadyFilter = false
1709  };
1710 
1711  RegReductionPQBase *SPQ;
1712 
1713  hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1714 
1715  bool isReady(SUnit *SU, unsigned CurCycle) const;
1716 
1717  bool operator()(SUnit* left, SUnit* right) const;
1718 };
1719 
1720 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1721 // scheduler.
1722 struct ilp_ls_rr_sort : public queue_sort {
1723  enum {
1724  IsBottomUp = true,
1725  HasReadyFilter = false
1726  };
1727 
1728  RegReductionPQBase *SPQ;
1729 
1730  ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1731 
1732  bool isReady(SUnit *SU, unsigned CurCycle) const;
1733 
1734  bool operator()(SUnit* left, SUnit* right) const;
1735 };
1736 
1737 class RegReductionPQBase : public SchedulingPriorityQueue {
1738 protected:
1739  std::vector<SUnit *> Queue;
1740  unsigned CurQueueId = 0;
1741  bool TracksRegPressure;
1742  bool SrcOrder;
1743 
1744  // SUnits - The SUnits for the current graph.
1745  std::vector<SUnit> *SUnits;
1746 
1747  MachineFunction &MF;
1748  const TargetInstrInfo *TII;
1749  const TargetRegisterInfo *TRI;
1750  const TargetLowering *TLI;
1751  ScheduleDAGRRList *scheduleDAG = nullptr;
1752 
1753  // SethiUllmanNumbers - The SethiUllman number for each node.
1754  std::vector<unsigned> SethiUllmanNumbers;
1755 
1756  /// RegPressure - Tracking current reg pressure per register class.
1757  std::vector<unsigned> RegPressure;
1758 
1759  /// RegLimit - Tracking the number of allocatable registers per register
1760  /// class.
1761  std::vector<unsigned> RegLimit;
1762 
1763 public:
1764  RegReductionPQBase(MachineFunction &mf,
1765  bool hasReadyFilter,
1766  bool tracksrp,
1767  bool srcorder,
1768  const TargetInstrInfo *tii,
1769  const TargetRegisterInfo *tri,
1770  const TargetLowering *tli)
1771  : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
1772  SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
1773  if (TracksRegPressure) {
1774  unsigned NumRC = TRI->getNumRegClasses();
1775  RegLimit.resize(NumRC);
1776  RegPressure.resize(NumRC);
1777  std::fill(RegLimit.begin(), RegLimit.end(), 0);
1778  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1779  for (const TargetRegisterClass *RC : TRI->regclasses())
1780  RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
1781  }
1782  }
1783 
1784  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1785  scheduleDAG = scheduleDag;
1786  }
1787 
1788  ScheduleHazardRecognizer* getHazardRec() {
1789  return scheduleDAG->getHazardRec();
1790  }
1791 
1792  void initNodes(std::vector<SUnit> &sunits) override;
1793 
1794  void addNode(const SUnit *SU) override;
1795 
1796  void updateNode(const SUnit *SU) override;
1797 
1798  void releaseState() override {
1799  SUnits = nullptr;
1800  SethiUllmanNumbers.clear();
1801  std::fill(RegPressure.begin(), RegPressure.end(), 0);
1802  }
1803 
1804  unsigned getNodePriority(const SUnit *SU) const;
1805 
1806  unsigned getNodeOrdering(const SUnit *SU) const {
1807  if (!SU->getNode()) return 0;
1808 
1809  return SU->getNode()->getIROrder();
1810  }
1811 
1812  bool empty() const override { return Queue.empty(); }
1813 
1814  void push(SUnit *U) override {
1815  assert(!U->NodeQueueId && "Node in the queue already");
1816  U->NodeQueueId = ++CurQueueId;
1817  Queue.push_back(U);
1818  }
1819 
1820  void remove(SUnit *SU) override {
1821  assert(!Queue.empty() && "Queue is empty!");
1822  assert(SU->NodeQueueId != 0 && "Not in queue!");
1823  std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
1824  if (I != std::prev(Queue.end()))
1825  std::swap(*I, Queue.back());
1826  Queue.pop_back();
1827  SU->NodeQueueId = 0;
1828  }
1829 
1830  bool tracksRegPressure() const override { return TracksRegPressure; }
1831 
1832  void dumpRegPressure() const;
1833 
1834  bool HighRegPressure(const SUnit *SU) const;
1835 
1836  bool MayReduceRegPressure(SUnit *SU) const;
1837 
1838  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1839 
1840  void scheduledNode(SUnit *SU) override;
1841 
1842  void unscheduledNode(SUnit *SU) override;
1843 
1844 protected:
1845  bool canClobber(const SUnit *SU, const SUnit *Op);
1846  void AddPseudoTwoAddrDeps();
1847  void PrescheduleNodesWithMultipleUses();
1848  void CalculateSethiUllmanNumbers();
1849 };
1850 
1851 template<class SF>
1852 static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
1853  unsigned BestIdx = 0;
1854  // Only compute the cost for the first 1000 items in the queue, to avoid
1855  // excessive compile-times for very large queues.
1856  for (unsigned I = 1, E = std::min(Q.size(), (decltype(Q.size()))1000); I != E;
1857  I++)
1858  if (Picker(Q[BestIdx], Q[I]))
1859  BestIdx = I;
1860  SUnit *V = Q[BestIdx];
1861  if (BestIdx + 1 != Q.size())
1862  std::swap(Q[BestIdx], Q.back());
1863  Q.pop_back();
1864  return V;
1865 }
1866 
1867 template<class SF>
1868 SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
1869 #ifndef NDEBUG
1870  if (DAG->StressSched) {
1871  reverse_sort<SF> RPicker(Picker);
1872  return popFromQueueImpl(Q, RPicker);
1873  }
1874 #endif
1875  (void)DAG;
1876  return popFromQueueImpl(Q, Picker);
1877 }
1878 
1879 //===----------------------------------------------------------------------===//
1880 // RegReductionPriorityQueue Definition
1881 //===----------------------------------------------------------------------===//
1882 //
1883 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1884 // to reduce register pressure.
1885 //
1886 template<class SF>
1887 class RegReductionPriorityQueue : public RegReductionPQBase {
1888  SF Picker;
1889 
1890 public:
1891  RegReductionPriorityQueue(MachineFunction &mf,
1892  bool tracksrp,
1893  bool srcorder,
1894  const TargetInstrInfo *tii,
1895  const TargetRegisterInfo *tri,
1896  const TargetLowering *tli)
1897  : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1898  tii, tri, tli),
1899  Picker(this) {}
1900 
1901  bool isBottomUp() const override { return SF::IsBottomUp; }
1902 
1903  bool isReady(SUnit *U) const override {
1904  return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1905  }
1906 
1907  SUnit *pop() override {
1908  if (Queue.empty()) return nullptr;
1909 
1910  SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1911  V->NodeQueueId = 0;
1912  return V;
1913  }
1914 
1915 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1916  LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
1917  // Emulate pop() without clobbering NodeQueueIds.
1918  std::vector<SUnit *> DumpQueue = Queue;
1919  SF DumpPicker = Picker;
1920  while (!DumpQueue.empty()) {
1921  SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1922  dbgs() << "Height " << SU->getHeight() << ": ";
1923  DAG->dumpNode(*SU);
1924  }
1925  }
1926 #endif
1927 };
1928 
1929 using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
1930 using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
1931 using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
1932 using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
1933 
1934 } // end anonymous namespace
1935 
1936 //===----------------------------------------------------------------------===//
1937 // Static Node Priority for Register Pressure Reduction
1938 //===----------------------------------------------------------------------===//
1939 
1940 // Check for special nodes that bypass scheduling heuristics.
1941 // Currently this pushes TokenFactor nodes down, but may be used for other
1942 // pseudo-ops as well.
1943 //
1944 // Return -1 to schedule right above left, 1 for left above right.
1945 // Return 0 if no bias exists.
1946 static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1947  bool LSchedLow = left->isScheduleLow;
1948  bool RSchedLow = right->isScheduleLow;
1949  if (LSchedLow != RSchedLow)
1950  return LSchedLow < RSchedLow ? 1 : -1;
1951  return 0;
1952 }
1953 
1954 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1955 /// Smaller number is the higher priority.
1956 static unsigned
1957 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1958  if (SUNumbers[SU->NodeNum] != 0)
1959  return SUNumbers[SU->NodeNum];
1960 
1961  // Use WorkList to avoid stack overflow on excessively large IRs.
1962  struct WorkState {
1963  WorkState(const SUnit *SU) : SU(SU) {}
1964  const SUnit *SU;
1965  unsigned PredsProcessed = 0;
1966  };
1967 
1968  SmallVector<WorkState, 16> WorkList;
1969  WorkList.push_back(SU);
1970  while (!WorkList.empty()) {
1971  auto &Temp = WorkList.back();
1972  auto *TempSU = Temp.SU;
1973  bool AllPredsKnown = true;
1974  // Try to find a non-evaluated pred and push it into the processing stack.
1975  for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
1976  auto &Pred = TempSU->Preds[P];
1977  if (Pred.isCtrl()) continue; // ignore chain preds
1978  SUnit *PredSU = Pred.getSUnit();
1979  if (SUNumbers[PredSU->NodeNum] == 0) {
1980 #ifndef NDEBUG
1981  // In debug mode, check that we don't have such element in the stack.
1982  for (auto It : WorkList)
1983  assert(It.SU != PredSU && "Trying to push an element twice?");
1984 #endif
1985  // Next time start processing this one starting from the next pred.
1986  Temp.PredsProcessed = P + 1;
1987  WorkList.push_back(PredSU);
1988  AllPredsKnown = false;
1989  break;
1990  }
1991  }
1992 
1993  if (!AllPredsKnown)
1994  continue;
1995 
1996  // Once all preds are known, we can calculate the answer for this one.
1997  unsigned SethiUllmanNumber = 0;
1998  unsigned Extra = 0;
1999  for (const SDep &Pred : TempSU->Preds) {
2000  if (Pred.isCtrl()) continue; // ignore chain preds
2001  SUnit *PredSU = Pred.getSUnit();
2002  unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
2003  assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
2004  if (PredSethiUllman > SethiUllmanNumber) {
2005  SethiUllmanNumber = PredSethiUllman;
2006  Extra = 0;
2007  } else if (PredSethiUllman == SethiUllmanNumber)
2008  ++Extra;
2009  }
2010 
2011  SethiUllmanNumber += Extra;
2012  if (SethiUllmanNumber == 0)
2013  SethiUllmanNumber = 1;
2014  SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
2015  WorkList.pop_back();
2016  }
2017 
2018  assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
2019  return SUNumbers[SU->NodeNum];
2020 }
2021 
2022 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
2023 /// scheduling units.
2024 void RegReductionPQBase::CalculateSethiUllmanNumbers() {
2025  SethiUllmanNumbers.assign(SUnits->size(), 0);
2026 
2027  for (const SUnit &SU : *SUnits)
2028  CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
2029 }
2030 
2031 void RegReductionPQBase::addNode(const SUnit *SU) {
2032  unsigned SUSize = SethiUllmanNumbers.size();
2033  if (SUnits->size() > SUSize)
2034  SethiUllmanNumbers.resize(SUSize*2, 0);
2035  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2036 }
2037 
2038 void RegReductionPQBase::updateNode(const SUnit *SU) {
2039  SethiUllmanNumbers[SU->NodeNum] = 0;
2040  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2041 }
2042 
2043 // Lower priority means schedule further down. For bottom-up scheduling, lower
2044 // priority SUs are scheduled before higher priority SUs.
2045 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
2046  assert(SU->NodeNum < SethiUllmanNumbers.size());
2047  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2048  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2049  // CopyToReg should be close to its uses to facilitate coalescing and
2050  // avoid spilling.
2051  return 0;
2052  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2053  Opc == TargetOpcode::SUBREG_TO_REG ||
2054  Opc == TargetOpcode::INSERT_SUBREG)
2055  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2056  // close to their uses to facilitate coalescing.
2057  return 0;
2058  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
2059  // If SU does not have a register use, i.e. it doesn't produce a value
2060  // that would be consumed (e.g. store), then it terminates a chain of
2061  // computation. Give it a large SethiUllman number so it will be
2062  // scheduled right before its predecessors that it doesn't lengthen
2063  // their live ranges.
2064  return 0xffff;
2065  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2066  // If SU does not have a register def, schedule it close to its uses
2067  // because it does not lengthen any live ranges.
2068  return 0;
2069 #if 1
2070  return SethiUllmanNumbers[SU->NodeNum];
2071 #else
2072  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
2073  if (SU->isCallOp) {
2074  // FIXME: This assumes all of the defs are used as call operands.
2075  int NP = (int)Priority - SU->getNode()->getNumValues();
2076  return (NP > 0) ? NP : 0;
2077  }
2078  return Priority;
2079 #endif
2080 }
2081 
2082 //===----------------------------------------------------------------------===//
2083 // Register Pressure Tracking
2084 //===----------------------------------------------------------------------===//
2085 
2086 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2087 LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
2088  for (const TargetRegisterClass *RC : TRI->regclasses()) {
2089  unsigned Id = RC->getID();
2090  unsigned RP = RegPressure[Id];
2091  if (!RP) continue;
2092  LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
2093  << RegLimit[Id] << '\n');
2094  }
2095 }
2096 #endif
2097 
2098 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
2099  if (!TLI)
2100  return false;
2101 
2102  for (const SDep &Pred : SU->Preds) {
2103  if (Pred.isCtrl())
2104  continue;
2105  SUnit *PredSU = Pred.getSUnit();
2106  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2107  // to cover the number of registers defined (they are all live).
2108  if (PredSU->NumRegDefsLeft == 0) {
2109  continue;
2110  }
2111  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2112  RegDefPos.IsValid(); RegDefPos.Advance()) {
2113  unsigned RCId, Cost;
2114  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2115 
2116  if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
2117  return true;
2118  }
2119  }
2120  return false;
2121 }
2122 
2123 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
2124  const SDNode *N = SU->getNode();
2125 
2126  if (!N->isMachineOpcode() || !SU->NumSuccs)
2127  return false;
2128 
2129  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2130  for (unsigned i = 0; i != NumDefs; ++i) {
2131  MVT VT = N->getSimpleValueType(i);
2132  if (!N->hasAnyUseOfValue(i))
2133  continue;
2134  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2135  if (RegPressure[RCId] >= RegLimit[RCId])
2136  return true;
2137  }
2138  return false;
2139 }
2140 
2141 // Compute the register pressure contribution by this instruction by count up
2142 // for uses that are not live and down for defs. Only count register classes
2143 // that are already under high pressure. As a side effect, compute the number of
2144 // uses of registers that are already live.
2145 //
2146 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2147 // so could probably be factored.
2148 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
2149  LiveUses = 0;
2150  int PDiff = 0;
2151  for (const SDep &Pred : SU->Preds) {
2152  if (Pred.isCtrl())
2153  continue;
2154  SUnit *PredSU = Pred.getSUnit();
2155  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2156  // to cover the number of registers defined (they are all live).
2157  if (PredSU->NumRegDefsLeft == 0) {
2158  if (PredSU->getNode()->isMachineOpcode())
2159  ++LiveUses;
2160  continue;
2161  }
2162  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2163  RegDefPos.IsValid(); RegDefPos.Advance()) {
2164  MVT VT = RegDefPos.GetValue();
2165  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2166  if (RegPressure[RCId] >= RegLimit[RCId])
2167  ++PDiff;
2168  }
2169  }
2170  const SDNode *N = SU->getNode();
2171 
2172  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2173  return PDiff;
2174 
2175  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2176  for (unsigned i = 0; i != NumDefs; ++i) {
2177  MVT VT = N->getSimpleValueType(i);
2178  if (!N->hasAnyUseOfValue(i))
2179  continue;
2180  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2181  if (RegPressure[RCId] >= RegLimit[RCId])
2182  --PDiff;
2183  }
2184  return PDiff;
2185 }
2186 
2187 void RegReductionPQBase::scheduledNode(SUnit *SU) {
2188  if (!TracksRegPressure)
2189  return;
2190 
2191  if (!SU->getNode())
2192  return;
2193 
2194  for (const SDep &Pred : SU->Preds) {
2195  if (Pred.isCtrl())
2196  continue;
2197  SUnit *PredSU = Pred.getSUnit();
2198  // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2199  // to cover the number of registers defined (they are all live).
2200  if (PredSU->NumRegDefsLeft == 0) {
2201  continue;
2202  }
2203  // FIXME: The ScheduleDAG currently loses information about which of a
2204  // node's values is consumed by each dependence. Consequently, if the node
2205  // defines multiple register classes, we don't know which to pressurize
2206  // here. Instead the following loop consumes the register defs in an
2207  // arbitrary order. At least it handles the common case of clustered loads
2208  // to the same class. For precise liveness, each SDep needs to indicate the
2209  // result number. But that tightly couples the ScheduleDAG with the
2210  // SelectionDAG making updates tricky. A simpler hack would be to attach a
2211  // value type or register class to SDep.
2212  //
2213  // The most important aspect of register tracking is balancing the increase
2214  // here with the reduction further below. Note that this SU may use multiple
2215  // defs in PredSU. The can't be determined here, but we've already
2216  // compensated by reducing NumRegDefsLeft in PredSU during
2217  // ScheduleDAGSDNodes::AddSchedEdges.
2218  --PredSU->NumRegDefsLeft;
2219  unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2220  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2221  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2222  if (SkipRegDefs)
2223  continue;
2224 
2225  unsigned RCId, Cost;
2226  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2227  RegPressure[RCId] += Cost;
2228  break;
2229  }
2230  }
2231 
2232  // We should have this assert, but there may be dead SDNodes that never
2233  // materialize as SUnits, so they don't appear to generate liveness.
2234  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2235  int SkipRegDefs = (int)SU->NumRegDefsLeft;
2236  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2237  RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2238  if (SkipRegDefs > 0)
2239  continue;
2240  unsigned RCId, Cost;
2241  GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2242  if (RegPressure[RCId] < Cost) {
2243  // Register pressure tracking is imprecise. This can happen. But we try
2244  // hard not to let it happen because it likely results in poor scheduling.
2245  LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
2246  << ") has too many regdefs\n");
2247  RegPressure[RCId] = 0;
2248  }
2249  else {
2250  RegPressure[RCId] -= Cost;
2251  }
2252  }
2253  LLVM_DEBUG(dumpRegPressure());
2254 }
2255 
2256 void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2257  if (!TracksRegPressure)
2258  return;
2259 
2260  const SDNode *N = SU->getNode();
2261  if (!N) return;
2262 
2263  if (!N->isMachineOpcode()) {
2264  if (N->getOpcode() != ISD::CopyToReg)
2265  return;
2266  } else {
2267  unsigned Opc = N->getMachineOpcode();
2268  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2269  Opc == TargetOpcode::INSERT_SUBREG ||
2270  Opc == TargetOpcode::SUBREG_TO_REG ||
2271  Opc == TargetOpcode::REG_SEQUENCE ||
2272  Opc == TargetOpcode::IMPLICIT_DEF)
2273  return;
2274  }
2275 
2276  for (const SDep &Pred : SU->Preds) {
2277  if (Pred.isCtrl())
2278  continue;
2279  SUnit *PredSU = Pred.getSUnit();
2280  // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2281  // counts data deps.
2282  if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2283  continue;
2284  const SDNode *PN = PredSU->getNode();
2285  if (!PN->isMachineOpcode()) {
2286  if (PN->getOpcode() == ISD::CopyFromReg) {
2287  MVT VT = PN->getSimpleValueType(0);
2288  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2289  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2290  }
2291  continue;
2292  }
2293  unsigned POpc = PN->getMachineOpcode();
2294  if (POpc == TargetOpcode::IMPLICIT_DEF)
2295  continue;
2296  if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2297  POpc == TargetOpcode::INSERT_SUBREG ||
2298  POpc == TargetOpcode::SUBREG_TO_REG) {
2299  MVT VT = PN->getSimpleValueType(0);
2300  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2301  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2302  continue;
2303  }
2304  unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2305  for (unsigned i = 0; i != NumDefs; ++i) {
2306  MVT VT = PN->getSimpleValueType(i);
2307  if (!PN->hasAnyUseOfValue(i))
2308  continue;
2309  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2310  if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2311  // Register pressure tracking is imprecise. This can happen.
2312  RegPressure[RCId] = 0;
2313  else
2314  RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2315  }
2316  }
2317 
2318  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2319  // may transfer data dependencies to CopyToReg.
2320  if (SU->NumSuccs && N->isMachineOpcode()) {
2321  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2322  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2323  MVT VT = N->getSimpleValueType(i);
2324  if (VT == MVT::Glue || VT == MVT::Other)
2325  continue;
2326  if (!N->hasAnyUseOfValue(i))
2327  continue;
2328  unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2329  RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2330  }
2331  }
2332 
2333  LLVM_DEBUG(dumpRegPressure());
2334 }
2335 
2336 //===----------------------------------------------------------------------===//
2337 // Dynamic Node Priority for Register Pressure Reduction
2338 //===----------------------------------------------------------------------===//
2339 
2340 /// closestSucc - Returns the scheduled cycle of the successor which is
2341 /// closest to the current cycle.
2342 static unsigned closestSucc(const SUnit *SU) {
2343  unsigned MaxHeight = 0;
2344  for (const SDep &Succ : SU->Succs) {
2345  if (Succ.isCtrl()) continue; // ignore chain succs
2346  unsigned Height = Succ.getSUnit()->getHeight();
2347  // If there are bunch of CopyToRegs stacked up, they should be considered
2348  // to be at the same position.
2349  if (Succ.getSUnit()->getNode() &&
2350  Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2351  Height = closestSucc(Succ.getSUnit())+1;
2352  if (Height > MaxHeight)
2353  MaxHeight = Height;
2354  }
2355  return MaxHeight;
2356 }
2357 
2358 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
2359 /// for scratch registers, i.e. number of data dependencies.
2360 static unsigned calcMaxScratches(const SUnit *SU) {
2361  unsigned Scratches = 0;
2362  for (const SDep &Pred : SU->Preds) {
2363  if (Pred.isCtrl()) continue; // ignore chain preds
2364  Scratches++;
2365  }
2366  return Scratches;
2367 }
2368 
2369 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2370 /// CopyFromReg from a virtual register.
2371 static bool hasOnlyLiveInOpers(const SUnit *SU) {
2372  bool RetVal = false;
2373  for (const SDep &Pred : SU->Preds) {
2374  if (Pred.isCtrl()) continue;
2375  const SUnit *PredSU = Pred.getSUnit();
2376  if (PredSU->getNode() &&
2377  PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2378  Register Reg =
2379  cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2381  RetVal = true;
2382  continue;
2383  }
2384  }
2385  return false;
2386  }
2387  return RetVal;
2388 }
2389 
2390 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2391 /// CopyToReg to a virtual register. This SU def is probably a liveout and
2392 /// it has no other use. It should be scheduled closer to the terminator.
2393 static bool hasOnlyLiveOutUses(const SUnit *SU) {
2394  bool RetVal = false;
2395  for (const SDep &Succ : SU->Succs) {
2396  if (Succ.isCtrl()) continue;
2397  const SUnit *SuccSU = Succ.getSUnit();
2398  if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2399  Register Reg =
2400  cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2402  RetVal = true;
2403  continue;
2404  }
2405  }
2406  return false;
2407  }
2408  return RetVal;
2409 }
2410 
2411 // Set isVRegCycle for a node with only live in opers and live out uses. Also
2412 // set isVRegCycle for its CopyFromReg operands.
2413 //
2414 // This is only relevant for single-block loops, in which case the VRegCycle
2415 // node is likely an induction variable in which the operand and target virtual
2416 // registers should be coalesced (e.g. pre/post increment values). Setting the
2417 // isVRegCycle flag helps the scheduler prioritize other uses of the same
2418 // CopyFromReg so that this node becomes the virtual register "kill". This
2419 // avoids interference between the values live in and out of the block and
2420 // eliminates a copy inside the loop.
2421 static void initVRegCycle(SUnit *SU) {
2423  return;
2424 
2425  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2426  return;
2427 
2428  LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2429 
2430  SU->isVRegCycle = true;
2431 
2432  for (const SDep &Pred : SU->Preds) {
2433  if (Pred.isCtrl()) continue;
2434  Pred.getSUnit()->isVRegCycle = true;
2435  }
2436 }
2437 
2438 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2439 // CopyFromReg operands. We should no longer penalize other uses of this VReg.
2440 static void resetVRegCycle(SUnit *SU) {
2441  if (!SU->isVRegCycle)
2442  return;
2443 
2444  for (const SDep &Pred : SU->Preds) {
2445  if (Pred.isCtrl()) continue; // ignore chain preds
2446  SUnit *PredSU = Pred.getSUnit();
2447  if (PredSU->isVRegCycle) {
2448  assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2449  "VRegCycle def must be CopyFromReg");
2450  Pred.getSUnit()->isVRegCycle = false;
2451  }
2452  }
2453 }
2454 
2455 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2456 // means a node that defines the VRegCycle has not been scheduled yet.
2457 static bool hasVRegCycleUse(const SUnit *SU) {
2458  // If this SU also defines the VReg, don't hoist it as a "use".
2459  if (SU->isVRegCycle)
2460  return false;
2461 
2462  for (const SDep &Pred : SU->Preds) {
2463  if (Pred.isCtrl()) continue; // ignore chain preds
2464  if (Pred.getSUnit()->isVRegCycle &&
2465  Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2466  LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2467  return true;
2468  }
2469  }
2470  return false;
2471 }
2472 
2473 // Check for either a dependence (latency) or resource (hazard) stall.
2474 //
2475 // Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2476 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2477  if ((int)SPQ->getCurCycle() < Height) return true;
2478  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2480  return true;
2481  return false;
2482 }
2483 
2484 // Return -1 if left has higher priority, 1 if right has higher priority.
2485 // Return 0 if latency-based priority is equivalent.
2486 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2487  RegReductionPQBase *SPQ) {
2488  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2489  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2490  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2491  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2492  int LHeight = (int)left->getHeight() + LPenalty;
2493  int RHeight = (int)right->getHeight() + RPenalty;
2494 
2495  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2496  BUHasStall(left, LHeight, SPQ);
2497  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2498  BUHasStall(right, RHeight, SPQ);
2499 
2500  // If scheduling one of the node will cause a pipeline stall, delay it.
2501  // If scheduling either one of the node will cause a pipeline stall, sort
2502  // them according to their height.
2503  if (LStall) {
2504  if (!RStall)
2505  return 1;
2506  if (LHeight != RHeight)
2507  return LHeight > RHeight ? 1 : -1;
2508  } else if (RStall)
2509  return -1;
2510 
2511  // If either node is scheduling for latency, sort them by height/depth
2512  // and latency.
2513  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2514  right->SchedulingPref == Sched::ILP)) {
2515  // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2516  // is enabled, grouping instructions by cycle, then its height is already
2517  // covered so only its depth matters. We also reach this point if both stall
2518  // but have the same height.
2519  if (!SPQ->getHazardRec()->isEnabled()) {
2520  if (LHeight != RHeight)
2521  return LHeight > RHeight ? 1 : -1;
2522  }
2523  int LDepth = left->getDepth() - LPenalty;
2524  int RDepth = right->getDepth() - RPenalty;
2525  if (LDepth != RDepth) {
2526  LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2527  << ") depth " << LDepth << " vs SU (" << right->NodeNum
2528  << ") depth " << RDepth << "\n");
2529  return LDepth < RDepth ? 1 : -1;
2530  }
2531  if (left->Latency != right->Latency)
2532  return left->Latency > right->Latency ? 1 : -1;
2533  }
2534  return 0;
2535 }
2536 
2537 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2538  // Schedule physical register definitions close to their use. This is
2539  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2540  // long as shortening physreg live ranges is generally good, we can defer
2541  // creating a subtarget hook.
2542  if (!DisableSchedPhysRegJoin) {
2543  bool LHasPhysReg = left->hasPhysRegDefs;
2544  bool RHasPhysReg = right->hasPhysRegDefs;
2545  if (LHasPhysReg != RHasPhysReg) {
2546  #ifndef NDEBUG
2547  static const char *const PhysRegMsg[] = { " has no physreg",
2548  " defines a physreg" };
2549  #endif
2550  LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2551  << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
2552  << ") " << PhysRegMsg[RHasPhysReg] << "\n");
2553  return LHasPhysReg < RHasPhysReg;
2554  }
2555  }
2556 
2557  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2558  unsigned LPriority = SPQ->getNodePriority(left);
2559  unsigned RPriority = SPQ->getNodePriority(right);
2560 
2561  // Be really careful about hoisting call operands above previous calls.
2562  // Only allows it if it would reduce register pressure.
2563  if (left->isCall && right->isCallOp) {
2564  unsigned RNumVals = right->getNode()->getNumValues();
2565  RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2566  }
2567  if (right->isCall && left->isCallOp) {
2568  unsigned LNumVals = left->getNode()->getNumValues();
2569  LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2570  }
2571 
2572  if (LPriority != RPriority)
2573  return LPriority > RPriority;
2574 
2575  // One or both of the nodes are calls and their sethi-ullman numbers are the
2576  // same, then keep source order.
2577  if (left->isCall || right->isCall) {
2578  unsigned LOrder = SPQ->getNodeOrdering(left);
2579  unsigned ROrder = SPQ->getNodeOrdering(right);
2580 
2581  // Prefer an ordering where the lower the non-zero order number, the higher
2582  // the preference.
2583  if ((LOrder || ROrder) && LOrder != ROrder)
2584  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2585  }
2586 
2587  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2588  // e.g.
2589  // t1 = op t2, c1
2590  // t3 = op t4, c2
2591  //
2592  // and the following instructions are both ready.
2593  // t2 = op c3
2594  // t4 = op c4
2595  //
2596  // Then schedule t2 = op first.
2597  // i.e.
2598  // t4 = op c4
2599  // t2 = op c3
2600  // t1 = op t2, c1
2601  // t3 = op t4, c2
2602  //
2603  // This creates more short live intervals.
2604  unsigned LDist = closestSucc(left);
2605  unsigned RDist = closestSucc(right);
2606  if (LDist != RDist)
2607  return LDist < RDist;
2608 
2609  // How many registers becomes live when the node is scheduled.
2610  unsigned LScratch = calcMaxScratches(left);
2611  unsigned RScratch = calcMaxScratches(right);
2612  if (LScratch != RScratch)
2613  return LScratch > RScratch;
2614 
2615  // Comparing latency against a call makes little sense unless the node
2616  // is register pressure-neutral.
2617  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2618  return (left->NodeQueueId > right->NodeQueueId);
2619 
2620  // Do not compare latencies when one or both of the nodes are calls.
2621  if (!DisableSchedCycles &&
2622  !(left->isCall || right->isCall)) {
2623  int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2624  if (result != 0)
2625  return result > 0;
2626  }
2627  else {
2628  if (left->getHeight() != right->getHeight())
2629  return left->getHeight() > right->getHeight();
2630 
2631  if (left->getDepth() != right->getDepth())
2632  return left->getDepth() < right->getDepth();
2633  }
2634 
2635  assert(left->NodeQueueId && right->NodeQueueId &&
2636  "NodeQueueId cannot be zero");
2637  return (left->NodeQueueId > right->NodeQueueId);
2638 }
2639 
2640 // Bottom up
2641 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2642  if (int res = checkSpecialNodes(left, right))
2643  return res > 0;
2644 
2645  return BURRSort(left, right, SPQ);
2646 }
2647 
2648 // Source order, otherwise bottom up.
2649 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2650  if (int res = checkSpecialNodes(left, right))
2651  return res > 0;
2652 
2653  unsigned LOrder = SPQ->getNodeOrdering(left);
2654  unsigned ROrder = SPQ->getNodeOrdering(right);
2655 
2656  // Prefer an ordering where the lower the non-zero order number, the higher
2657  // the preference.
2658  if ((LOrder || ROrder) && LOrder != ROrder)
2659  return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2660 
2661  return BURRSort(left, right, SPQ);
2662 }
2663 
2664 // If the time between now and when the instruction will be ready can cover
2665 // the spill code, then avoid adding it to the ready queue. This gives long
2666 // stalls highest priority and allows hoisting across calls. It should also
2667 // speed up processing the available queue.
2668 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2669  static const unsigned ReadyDelay = 3;
2670 
2671  if (SPQ->MayReduceRegPressure(SU)) return true;
2672 
2673  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2674 
2675  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2677  return false;
2678 
2679  return true;
2680 }
2681 
2682 // Return true if right should be scheduled with higher priority than left.
2683 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2684  if (int res = checkSpecialNodes(left, right))
2685  return res > 0;
2686 
2687  if (left->isCall || right->isCall)
2688  // No way to compute latency of calls.
2689  return BURRSort(left, right, SPQ);
2690 
2691  bool LHigh = SPQ->HighRegPressure(left);
2692  bool RHigh = SPQ->HighRegPressure(right);
2693  // Avoid causing spills. If register pressure is high, schedule for
2694  // register pressure reduction.
2695  if (LHigh && !RHigh) {
2696  LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2697  << right->NodeNum << ")\n");
2698  return true;
2699  }
2700  else if (!LHigh && RHigh) {
2701  LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2702  << left->NodeNum << ")\n");
2703  return false;
2704  }
2705  if (!LHigh && !RHigh) {
2706  int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2707  if (result != 0)
2708  return result > 0;
2709  }
2710  return BURRSort(left, right, SPQ);
2711 }
2712 
2713 // Schedule as many instructions in each cycle as possible. So don't make an
2714 // instruction available unless it is ready in the current cycle.
2715 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2716  if (SU->getHeight() > CurCycle) return false;
2717 
2718  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2720  return false;
2721 
2722  return true;
2723 }
2724 
2725 static bool canEnableCoalescing(SUnit *SU) {
2726  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2727  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2728  // CopyToReg should be close to its uses to facilitate coalescing and
2729  // avoid spilling.
2730  return true;
2731 
2732  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2733  Opc == TargetOpcode::SUBREG_TO_REG ||
2734  Opc == TargetOpcode::INSERT_SUBREG)
2735  // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2736  // close to their uses to facilitate coalescing.
2737  return true;
2738 
2739  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2740  // If SU does not have a register def, schedule it close to its uses
2741  // because it does not lengthen any live ranges.
2742  return true;
2743 
2744  return false;
2745 }
2746 
2747 // list-ilp is currently an experimental scheduler that allows various
2748 // heuristics to be enabled prior to the normal register reduction logic.
2749 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2750  if (int res = checkSpecialNodes(left, right))
2751  return res > 0;
2752 
2753  if (left->isCall || right->isCall)
2754  // No way to compute latency of calls.
2755  return BURRSort(left, right, SPQ);
2756 
2757  unsigned LLiveUses = 0, RLiveUses = 0;
2758  int LPDiff = 0, RPDiff = 0;
2760  LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2761  RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2762  }
2763  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2764  LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
2765  << "): " << LPDiff << " != SU(" << right->NodeNum
2766  << "): " << RPDiff << "\n");
2767  return LPDiff > RPDiff;
2768  }
2769 
2770  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2771  bool LReduce = canEnableCoalescing(left);
2772  bool RReduce = canEnableCoalescing(right);
2773  if (LReduce && !RReduce) return false;
2774  if (RReduce && !LReduce) return true;
2775  }
2776 
2777  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2778  LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2779  << " != SU(" << right->NodeNum << "): " << RLiveUses
2780  << "\n");
2781  return LLiveUses < RLiveUses;
2782  }
2783 
2784  if (!DisableSchedStalls) {
2785  bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2786  bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2787  if (LStall != RStall)
2788  return left->getHeight() > right->getHeight();
2789  }
2790 
2791  if (!DisableSchedCriticalPath) {
2792  int spread = (int)left->getDepth() - (int)right->getDepth();
2793  if (std::abs(spread) > MaxReorderWindow) {
2794  LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2795  << left->getDepth() << " != SU(" << right->NodeNum
2796  << "): " << right->getDepth() << "\n");
2797  return left->getDepth() < right->getDepth();
2798  }
2799  }
2800 
2801  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2802  int spread = (int)left->getHeight() - (int)right->getHeight();
2803  if (std::abs(spread) > MaxReorderWindow)
2804  return left->getHeight() > right->getHeight();
2805  }
2806 
2807  return BURRSort(left, right, SPQ);
2808 }
2809 
2810 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2811  SUnits = &sunits;
2812  // Add pseudo dependency edges for two-address nodes.
2813  if (!Disable2AddrHack)
2814  AddPseudoTwoAddrDeps();
2815  // Reroute edges to nodes with multiple uses.
2816  if (!TracksRegPressure && !SrcOrder)
2817  PrescheduleNodesWithMultipleUses();
2818  // Calculate node priorities.
2819  CalculateSethiUllmanNumbers();
2820 
2821  // For single block loops, mark nodes that look like canonical IV increments.
2822  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2823  for (SUnit &SU : sunits)
2824  initVRegCycle(&SU);
2825 }
2826 
2827 //===----------------------------------------------------------------------===//
2828 // Preschedule for Register Pressure
2829 //===----------------------------------------------------------------------===//
2830 
2831 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2832  if (SU->isTwoAddress) {
2833  unsigned Opc = SU->getNode()->getMachineOpcode();
2834  const MCInstrDesc &MCID = TII->get(Opc);
2835  unsigned NumRes = MCID.getNumDefs();
2836  unsigned NumOps = MCID.getNumOperands() - NumRes;
2837  for (unsigned i = 0; i != NumOps; ++i) {
2838  if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2839  SDNode *DU = SU->getNode()->getOperand(i).getNode();
2840  if (DU->getNodeId() != -1 &&
2841  Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2842  return true;
2843  }
2844  }
2845  }
2846  return false;
2847 }
2848 
2849 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2850 /// successor's explicit physregs whose definition can reach DepSU.
2851 /// i.e. DepSU should not be scheduled above SU.
2852 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2853  ScheduleDAGRRList *scheduleDAG,
2854  const TargetInstrInfo *TII,
2855  const TargetRegisterInfo *TRI) {
2856  const MCPhysReg *ImpDefs
2857  = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2858  const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2859  if(!ImpDefs && !RegMask)
2860  return false;
2861 
2862  for (const SDep &Succ : SU->Succs) {
2863  SUnit *SuccSU = Succ.getSUnit();
2864  for (const SDep &SuccPred : SuccSU->Preds) {
2865  if (!SuccPred.isAssignedRegDep())
2866  continue;
2867 
2868  if (RegMask &&
2869  MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2870  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2871  return true;
2872 
2873  if (ImpDefs)
2874  for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
2875  // Return true if SU clobbers this physical register use and the
2876  // definition of the register reaches from DepSU. IsReachable queries
2877  // a topological forward sort of the DAG (following the successors).
2878  if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) &&
2879  scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2880  return true;
2881  }
2882  }
2883  return false;
2884 }
2885 
2886 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2887 /// physical register defs.
2888 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2889  const TargetInstrInfo *TII,
2890  const TargetRegisterInfo *TRI) {
2891  SDNode *N = SuccSU->getNode();
2892  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2893  const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2894  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2895  for (const SDNode *SUNode = SU->getNode(); SUNode;
2896  SUNode = SUNode->getGluedNode()) {
2897  if (!SUNode->isMachineOpcode())
2898  continue;
2899  const MCPhysReg *SUImpDefs =
2900  TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2901  const uint32_t *SURegMask = getNodeRegMask(SUNode);
2902  if (!SUImpDefs && !SURegMask)
2903  continue;
2904  for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2905  MVT VT = N->getSimpleValueType(i);
2906  if (VT == MVT::Glue || VT == MVT::Other)
2907  continue;
2908  if (!N->hasAnyUseOfValue(i))
2909  continue;
2910  MCPhysReg Reg = ImpDefs[i - NumDefs];
2911  if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2912  return true;
2913  if (!SUImpDefs)
2914  continue;
2915  for (;*SUImpDefs; ++SUImpDefs) {
2916  MCPhysReg SUReg = *SUImpDefs;
2917  if (TRI->regsOverlap(Reg, SUReg))
2918  return true;
2919  }
2920  }
2921  }
2922  return false;
2923 }
2924 
2925 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2926 /// are not handled well by the general register pressure reduction
2927 /// heuristics. When presented with code like this:
2928 ///
2929 /// N
2930 /// / |
2931 /// / |
2932 /// U store
2933 /// |
2934 /// ...
2935 ///
2936 /// the heuristics tend to push the store up, but since the
2937 /// operand of the store has another use (U), this would increase
2938 /// the length of that other use (the U->N edge).
2939 ///
2940 /// This function transforms code like the above to route U's
2941 /// dependence through the store when possible, like this:
2942 ///
2943 /// N
2944 /// ||
2945 /// ||
2946 /// store
2947 /// |
2948 /// U
2949 /// |
2950 /// ...
2951 ///
2952 /// This results in the store being scheduled immediately
2953 /// after N, which shortens the U->N live range, reducing
2954 /// register pressure.
2955 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2956  // Visit all the nodes in topological order, working top-down.
2957  for (SUnit &SU : *SUnits) {
2958  // For now, only look at nodes with no data successors, such as stores.
2959  // These are especially important, due to the heuristics in
2960  // getNodePriority for nodes with no data successors.
2961  if (SU.NumSuccs != 0)
2962  continue;
2963  // For now, only look at nodes with exactly one data predecessor.
2964  if (SU.NumPreds != 1)
2965  continue;
2966  // Avoid prescheduling copies to virtual registers, which don't behave
2967  // like other nodes from the perspective of scheduling heuristics.
2968  if (SDNode *N = SU.getNode())
2969  if (N->getOpcode() == ISD::CopyToReg &&
2971  cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2972  continue;
2973 
2974  SDNode *PredFrameSetup = nullptr;
2975  for (const SDep &Pred : SU.Preds)
2976  if (Pred.isCtrl() && Pred.getSUnit()) {
2977  // Find the predecessor which is not data dependence.
2978  SDNode *PredND = Pred.getSUnit()->getNode();
2979 
2980  // If PredND is FrameSetup, we should not pre-scheduled the node,
2981  // or else, when bottom up scheduling, ADJCALLSTACKDOWN and
2982  // ADJCALLSTACKUP may hold CallResource too long and make other
2983  // calls can't be scheduled. If there's no other available node
2984  // to schedule, the schedular will try to rename the register by
2985  // creating copy to avoid the conflict which will fail because
2986  // CallResource is not a real physical register.
2987  if (PredND && PredND->isMachineOpcode() &&
2988  (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
2989  PredFrameSetup = PredND;
2990  break;
2991  }
2992  }
2993  // Skip the node has FrameSetup parent.
2994  if (PredFrameSetup != nullptr)
2995  continue;
2996 
2997  // Locate the single data predecessor.
2998  SUnit *PredSU = nullptr;
2999  for (const SDep &Pred : SU.Preds)
3000  if (!Pred.isCtrl()) {
3001  PredSU = Pred.getSUnit();
3002  break;
3003  }
3004  assert(PredSU);
3005 
3006  // Don't rewrite edges that carry physregs, because that requires additional
3007  // support infrastructure.
3008  if (PredSU->hasPhysRegDefs)
3009  continue;
3010  // Short-circuit the case where SU is PredSU's only data successor.
3011  if (PredSU->NumSuccs == 1)
3012  continue;
3013  // Avoid prescheduling to copies from virtual registers, which don't behave
3014  // like other nodes from the perspective of scheduling heuristics.
3015  if (SDNode *N = SU.getNode())
3016  if (N->getOpcode() == ISD::CopyFromReg &&
3018  cast<RegisterSDNode>(N->getOperand(1))->getReg()))
3019  continue;
3020 
3021  // Perform checks on the successors of PredSU.
3022  for (const SDep &PredSucc : PredSU->Succs) {
3023  SUnit *PredSuccSU = PredSucc.getSUnit();
3024  if (PredSuccSU == &SU) continue;
3025  // If PredSU has another successor with no data successors, for
3026  // now don't attempt to choose either over the other.
3027  if (PredSuccSU->NumSuccs == 0)
3028  goto outer_loop_continue;
3029  // Don't break physical register dependencies.
3030  if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
3031  if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
3032  goto outer_loop_continue;
3033  // Don't introduce graph cycles.
3034  if (scheduleDAG->IsReachable(&SU, PredSuccSU))
3035  goto outer_loop_continue;
3036  }
3037 
3038  // Ok, the transformation is safe and the heuristics suggest it is
3039  // profitable. Update the graph.
3040  LLVM_DEBUG(
3041  dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
3042  << PredSU->NodeNum
3043  << " to guide scheduling in the presence of multiple uses\n");
3044  for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
3045  SDep Edge = PredSU->Succs[i];
3046  assert(!Edge.isAssignedRegDep());
3047  SUnit *SuccSU = Edge.getSUnit();
3048  if (SuccSU != &SU) {
3049  Edge.setSUnit(PredSU);
3050  scheduleDAG->RemovePred(SuccSU, Edge);
3051  scheduleDAG->AddPredQueued(&SU, Edge);
3052  Edge.setSUnit(&SU);
3053  scheduleDAG->AddPredQueued(SuccSU, Edge);
3054  --i;
3055  }
3056  }
3057  outer_loop_continue:;
3058  }
3059 }
3060 
3061 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
3062 /// it as a def&use operand. Add a pseudo control edge from it to the other
3063 /// node (if it won't create a cycle) so the two-address one will be scheduled
3064 /// first (lower in the schedule). If both nodes are two-address, favor the
3065 /// one that has a CopyToReg use (more likely to be a loop induction update).
3066 /// If both are two-address, but one is commutable while the other is not
3067 /// commutable, favor the one that's not commutable.
3068 void RegReductionPQBase::AddPseudoTwoAddrDeps() {
3069  for (SUnit &SU : *SUnits) {
3070  if (!SU.isTwoAddress)
3071  continue;
3072 
3073  SDNode *Node = SU.getNode();
3074  if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
3075  continue;
3076 
3077  bool isLiveOut = hasOnlyLiveOutUses(&SU);
3078  unsigned Opc = Node->getMachineOpcode();
3079  const MCInstrDesc &MCID = TII->get(Opc);
3080  unsigned NumRes = MCID.getNumDefs();
3081  unsigned NumOps = MCID.getNumOperands() - NumRes;
3082  for (unsigned j = 0; j != NumOps; ++j) {
3083  if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
3084  continue;
3085  SDNode *DU = SU.getNode()->getOperand(j).getNode();
3086  if (DU->getNodeId() == -1)
3087  continue;
3088  const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
3089  if (!DUSU)
3090  continue;
3091  for (const SDep &Succ : DUSU->Succs) {
3092  if (Succ.isCtrl())
3093  continue;
3094  SUnit *SuccSU = Succ.getSUnit();
3095  if (SuccSU == &SU)
3096  continue;
3097  // Be conservative. Ignore if nodes aren't at roughly the same
3098  // depth and height.
3099  if (SuccSU->getHeight() < SU.getHeight() &&
3100  (SU.getHeight() - SuccSU->getHeight()) > 1)
3101  continue;
3102  // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
3103  // constrains whatever is using the copy, instead of the copy
3104  // itself. In the case that the copy is coalesced, this
3105  // preserves the intent of the pseudo two-address heurietics.
3106  while (SuccSU->Succs.size() == 1 &&
3107  SuccSU->getNode()->isMachineOpcode() &&
3108  SuccSU->getNode()->getMachineOpcode() ==
3109  TargetOpcode::COPY_TO_REGCLASS)
3110  SuccSU = SuccSU->Succs.front().getSUnit();
3111  // Don't constrain non-instruction nodes.
3112  if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
3113  continue;
3114  // Don't constrain nodes with physical register defs if the
3115  // predecessor can clobber them.
3116  if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
3117  if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
3118  continue;
3119  }
3120  // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3121  // these may be coalesced away. We want them close to their uses.
3122  unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3123  if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3124  SuccOpc == TargetOpcode::INSERT_SUBREG ||
3125  SuccOpc == TargetOpcode::SUBREG_TO_REG)
3126  continue;
3127  if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
3128  (!canClobber(SuccSU, DUSU) ||
3129  (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
3130  (!SU.isCommutable && SuccSU->isCommutable)) &&
3131  !scheduleDAG->IsReachable(SuccSU, &SU)) {
3132  LLVM_DEBUG(dbgs()
3133  << " Adding a pseudo-two-addr edge from SU #"
3134  << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3135  scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial));
3136  }
3137  }
3138  }
3139  }
3140 }
3141 
3142 //===----------------------------------------------------------------------===//
3143 // Public Constructor Functions
3144 //===----------------------------------------------------------------------===//
3145 
3148  CodeGenOpt::Level OptLevel) {
3149  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3150  const TargetInstrInfo *TII = STI.getInstrInfo();
3151  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3152 
3153  BURegReductionPriorityQueue *PQ =
3154  new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
3155  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3156  PQ->setScheduleDAG(SD);
3157  return SD;
3158 }
3159 
3162  CodeGenOpt::Level OptLevel) {
3163  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3164  const TargetInstrInfo *TII = STI.getInstrInfo();
3165  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3166 
3167  SrcRegReductionPriorityQueue *PQ =
3168  new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3169  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3170  PQ->setScheduleDAG(SD);
3171  return SD;
3172 }
3173 
3176  CodeGenOpt::Level OptLevel) {
3177  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3178  const TargetInstrInfo *TII = STI.getInstrInfo();
3179  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3180  const TargetLowering *TLI = IS->TLI;
3181 
3182  HybridBURRPriorityQueue *PQ =
3183  new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3184 
3185  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3186  PQ->setScheduleDAG(SD);
3187  return SD;
3188 }
3189 
3192  CodeGenOpt::Level OptLevel) {
3193  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3194  const TargetInstrInfo *TII = STI.getInstrInfo();
3195  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3196  const TargetLowering *TLI = IS->TLI;
3197 
3198  ILPBURRPriorityQueue *PQ =
3199  new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3200  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3201  PQ->setScheduleDAG(SD);
3202  return SD;
3203 }
i
i
Definition: README.txt:29
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:245
llvm::SchedulingPriorityQueue::scheduledNode
virtual void scheduledNode(SUnit *)
As each node is scheduled, this method is invoked.
Definition: ScheduleDAG.h:541
llvm::ScheduleDAGTopologicalSort::RemovePred
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
Definition: ScheduleDAG.cpp:566
right
the custom lowered code happens to be right
Definition: README-SSE.txt:480
ScheduleDAG.h
burrListDAGScheduler
static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler)
llvm::ScheduleHazardRecognizer::getMaxLookAhead
unsigned getMaxLookAhead() const
Definition: ScheduleHazardRecognizer.h:43
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
llvm::SelectionDAGISel::TLI
const TargetLowering * TLI
Definition: SelectionDAGISel.h:56
llvm::TargetRegisterClass::getID
unsigned getID() const
Return the register class ID number.
Definition: TargetRegisterInfo.h:75
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
llvm::ScheduleDAGTopologicalSort::WillCreateCycle
bool WillCreateCycle(SUnit *TargetSU, SUnit *SU)
Returns true if addPred(TargetSU, SU) creates a cycle.
Definition: ScheduleDAG.cpp:703
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::SUnit::isScheduleLow
bool isScheduleLow
True if preferable to schedule low.
Definition: ScheduleDAG.h:286
llvm::SDNode::getIROrder
unsigned getIROrder() const
Return the node ordering.
Definition: SelectionDAGNodes.h:731
llvm::TargetRegisterInfo::getCrossCopyRegClass
virtual const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const
Returns a legal register class to copy a register in the specified class to or from.
Definition: TargetRegisterInfo.h:800
llvm::objcarc::Sequence
Sequence
Definition: PtrState.h:41
AvgIPC
static cl::opt< unsigned > AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists."))
llvm::ScheduleDAGSDNodes
ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
Definition: ScheduleDAGSDNodes.h:46
llvm::TargetInstrInfo::CreateTargetHazardRecognizer
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
Definition: TargetInstrInfo.cpp:1051
FindCallSeqStart
static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII)
FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate the corresponding (lowered) C...
Definition: ScheduleDAGRRList.cpp:489
llvm::ISD::LIFETIME_END
@ LIFETIME_END
Definition: ISDOpcodes.h:1225
llvm::SDep::Artificial
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
llvm::SUnit::isAvailable
bool isAvailable
True once available.
Definition: ScheduleDAG.h:283
MCInstrDesc.h
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:159
llvm::ISD::LIFETIME_START
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1224
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
SchedulerRegistry.h
checkSpecialNodes
static int checkSpecialNodes(const SUnit *left, const SUnit *right)
Definition: ScheduleDAGRRList.cpp:1946
llvm::TargetSubtargetInfo::getInstrInfo
virtual const TargetInstrInfo * getInstrInfo() const
Definition: TargetSubtargetInfo.h:95
llvm::SchedulingPriorityQueue::dump
virtual void dump(ScheduleDAG *) const
Definition: ScheduleDAG.h:536
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::ScheduleDAGTopologicalSort::IsReachable
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
Definition: ScheduleDAG.cpp:723
InlineAsm.h
DisableSchedVRegCycle
static cl::opt< bool > DisableSchedVRegCycle("disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks"))
llvm::printMBBReference
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Definition: MachineBasicBlock.cpp:117
ErrorHandling.h
canEnableCoalescing
static bool canEnableCoalescing(SUnit *SU)
Definition: ScheduleDAGRRList.cpp:2725
llvm::Sched::ILP
@ ILP
Definition: TargetLowering.h:102
llvm::MCRegisterInfo::getNumRegs
unsigned getNumRegs() const
Return the number of registers this target has (useful for sizing arrays holding per register informa...
Definition: MCRegisterInfo.h:491
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::ISD::EH_LABEL
@ EH_LABEL
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:1033
llvm::SUnit::NumSuccsLeft
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:269
hasVRegCycleUse
static bool hasVRegCycleUse(const SUnit *SU)
Definition: ScheduleDAGRRList.cpp:2457
llvm::SUnit::isCommutable
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:278
push
static void push(SmallVectorImpl< uint64_t > &R, StringRef Str)
Definition: BitstreamRemarkSerializer.cpp:24
llvm::SUnit::isCall
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:463
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:127
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::MVT::Glue
@ Glue
Definition: MachineValueType.h:282
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237
DenseMap.h
llvm::SchedulingPriorityQueue::addNode
virtual void addNode(const SUnit *SU)=0
llvm::SchedulingPriorityQueue::tracksRegPressure
virtual bool tracksRegPressure() const
Definition: ScheduleDAG.h:518
TargetInstrInfo.h
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
llvm::SUnit::getDepth
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
canClobberReachingPhysRegUse
static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberReachingPhysRegUse - True if SU would clobber one of it's successor's explicit physregs who...
Definition: ScheduleDAGRRList.cpp:2852
llvm::SchedulingPriorityQueue::isReady
virtual bool isReady(SUnit *) const
Definition: ScheduleDAG.h:520
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
STLExtras.h
BUCompareLatency
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ)
Definition: ScheduleDAGRRList.cpp:2486
llvm::SDep::setSUnit
void setSUnit(SUnit *SU)
Definition: ScheduleDAG.h:483
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::AMDGPU::HSAMD::ValueKind::Queue
@ Queue
llvm::ScheduleDAGTopologicalSort::AddPredQueued
void AddPredQueued(SUnit *Y, SUnit *X)
Queues an update to the topological ordering to accommodate an edge to be added from SUnit X to SUnit...
Definition: ScheduleDAG.cpp:536
llvm::ISD::INLINEASM
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1025
GetCostForDef
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF)
GetCostForDef - Looks up the register class and cost for a given definition.
Definition: ScheduleDAGRRList.cpp:309
llvm::TargetLoweringBase::getRepRegClassCostFor
virtual uint8_t getRepRegClassCostFor(MVT VT) const
Return the cost of the 'representative' register class for the specified value type.
Definition: TargetLowering.h:925
llvm::SDep::isArtificial
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
MachineValueType.h
result
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s result
Definition: README_P9.txt:256
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
isLiveOut
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
Definition: SIOptimizeExecMasking.cpp:316
llvm::ScheduleDAGTopologicalSort::MarkDirty
void MarkDirty()
Mark the ordering as temporarily broken, after a new node has been added.
Definition: ScheduleDAG.h:767
llvm::ScheduleDAGTopologicalSort
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:693
CommandLine.h
TargetLowering.h
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:644
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:667
ScheduleHazardRecognizer.h
llvm::SUnit::removePred
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
Definition: ScheduleDAG.cpp:175
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:98
SelectionDAGNodes.h
InlinePriorityMode::Cost
@ Cost
llvm::AArch64::RP
@ RP
Definition: AArch64ISelLowering.h:486
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ILPListDAGScheduler
static RegisterScheduler ILPListDAGScheduler("list-ilp", "Bottom-up register pressure aware list scheduling " "which tries to balance ILP and register pressure", createILPListDAGScheduler)
CalcNodeSethiUllmanNumber
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
Definition: ScheduleDAGRRList.cpp:1957
llvm::ISD::CopyToReg
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
llvm::SUnit::setHeightToAtLeast
void setHeightToAtLeast(unsigned NewHeight)
If NewHeight is greater than this node's height value, set it to be the new height value.
Definition: ScheduleDAG.cpp:255
llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3490
BUHasStall
static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ)
Definition: ScheduleDAGRRList.cpp:2476
llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
llvm::createILPListDAGScheduler
ScheduleDAGSDNodes * createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createILPListDAGScheduler - This creates a bottom up register pressure aware list scheduler that trie...
Definition: ScheduleDAGRRList.cpp:3191
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::MCInstrDesc::getImplicitDefs
const MCPhysReg * getImplicitDefs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:587
TargetOpcodes.h
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
CheckForLiveRegDefMasked
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, ArrayRef< SUnit * > LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs)
CheckForLiveRegDefMasked - Check for any live physregs that are clobbered by RegMask,...
Definition: ScheduleDAGRRList.cpp:1323
llvm::SUnit::NumPreds
unsigned NumPreds
Definition: ScheduleDAG.h:266
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
llvm::SchedulingPriorityQueue::empty
virtual bool empty() const =0
llvm::MCInstrDesc::isCommutable
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
Definition: MCInstrDesc.h:478
llvm::ScheduleDAGTopologicalSort::AddPred
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
Definition: ScheduleDAG.cpp:548
llvm::InlineAsm::isRegDefKind
static bool isRegDefKind(unsigned Flag)
Definition: InlineAsm.h:299
llvm::InlineAsm::Op_FirstOperand
@ Op_FirstOperand
Definition: InlineAsm.h:222
llvm::createSourceListDAGScheduler
ScheduleDAGSDNodes * createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up list scheduler that schedules nodes in source c...
Definition: ScheduleDAGRRList.cpp:3161
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
ScheduleDAGSDNodes.h
llvm::SDep::Data
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
closestSucc
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle.
Definition: ScheduleDAGRRList.cpp:2342
llvm::SUnit::isVRegCycle
bool isVRegCycle
May use and def the same vreg.
Definition: ScheduleDAG.h:274
sourceListDAGScheduler
static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", createSourceListDAGScheduler)
Copies
SI Lower i1 Copies
Definition: SILowerI1Copies.cpp:397
llvm::ISD::EntryToken
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
llvm::ScheduleHazardRecognizer::atIssueLimit
virtual bool atIssueLimit() const
atIssueLimit - Return true if no more instructions may be issued in this cycle.
Definition: ScheduleHazardRecognizer.h:51
llvm::ScheduleDAGSDNodes::RegDefIter::GetValue
MVT GetValue() const
Definition: ScheduleDAGSDNodes.h:150
llvm::ISD::CopyFromReg
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
llvm::compression::zlib::isAvailable
bool isAvailable()
Definition: Compression.cpp:105
llvm::SUnit::hasPhysRegClobbers
bool hasPhysRegClobbers
Has any physreg defs, used or not.
Definition: ScheduleDAG.h:281
llvm::SUnit::isCallOp
bool isCallOp
Is a function call operand.
Definition: ScheduleDAG.h:276
llvm::Sched::RegPressure
@ RegPressure
Definition: TargetLowering.h:100
getNodeRegMask
static const uint32_t * getNodeRegMask(const SDNode *N)
getNodeRegMask - Returns the register mask attached to an SDNode, if any.
Definition: ScheduleDAGRRList.cpp:1338
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:647
llvm::TargetRegisterInfo::regclasses
iterator_range< regclass_iterator > regclasses() const
Definition: TargetRegisterInfo.h:761
BURRSort
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ)
Definition: ScheduleDAGRRList.cpp:2537
DisableSchedCriticalPath
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
llvm::SUnit::getHeight
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
llvm::SchedulingPriorityQueue::push
virtual void push(SUnit *U)=0
llvm::MCOperandInfo::isOptionalDef
bool isOptionalDef() const
Set if this operand is a optional def.
Definition: MCInstrDesc.h:112
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:657
llvm::cl::opt< bool >
llvm::SUnit::isPending
bool isPending
True once pending.
Definition: ScheduleDAG.h:282
llvm::MachineOperand::clobbersPhysReg
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
Definition: MachineOperand.h:626
llvm::ScheduleHazardRecognizer::HazardType
HazardType
Definition: ScheduleHazardRecognizer.h:37
llvm::TargetRegisterInfo::regsOverlap
bool regsOverlap(Register RegA, Register RegB) const
Returns true if the two registers are equal or alias each other.
Definition: TargetRegisterInfo.h:422
llvm::InlineAsm::isRegDefEarlyClobberKind
static bool isRegDefEarlyClobberKind(unsigned Flag)
Definition: InlineAsm.h:303
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:771
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:110
llvm::SUnit::CopyDstRC
const TargetRegisterClass * CopyDstRC
Is a special copy node if != nullptr.
Definition: ScheduleDAG.h:302
DisableSchedStalls
static cl::opt< bool > DisableSchedStalls("disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp"))
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::SUnit::NumRegDefsLeft
unsigned short NumRegDefsLeft
Definition: ScheduleDAG.h:272
llvm::find
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1754
llvm::TargetRegisterInfo::getRegClassName
const char * getRegClassName(const TargetRegisterClass *Class) const
Returns the name of the register class.
Definition: TargetRegisterInfo.h:777
llvm::ScheduleDAGSDNodes::RegDefIter::GetIdx
unsigned GetIdx() const
Definition: ScheduleDAGSDNodes.h:159
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::SDNode::setNodeId
void setNodeId(int Id)
Set unique node id.
Definition: SelectionDAGNodes.h:728
llvm::DenseMap
Definition: DenseMap.h:714
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:921
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
llvm::SDep::getReg
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
llvm::SUnit::isScheduled
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
llvm::SchedulingPriorityQueue::hasReadyFilter
bool hasReadyFilter() const
Definition: ScheduleDAG.h:516
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
MCRegisterInfo.h
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1868
ArrayRef.h
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::SDNode::getNodeId
int getNodeId() const
Return the unique node id.
Definition: SelectionDAGNodes.h:725
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::SUnit::CopySrcRC
const TargetRegisterClass * CopySrcRC
Definition: ScheduleDAG.h:304
DisableSchedLiveUses
static cl::opt< bool > DisableSchedLiveUses("disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp"))
llvm::MCInstrDesc::OpInfo
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:208
initVRegCycle
static void initVRegCycle(SUnit *SU)
Definition: ScheduleDAGRRList.cpp:2421
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::SDNode::getSimpleValueType
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
Definition: SelectionDAGNodes.h:992
llvm::SDNode::isMachineOpcode
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
Definition: SelectionDAGNodes.h:699
llvm::SchedulingPriorityQueue::pop
virtual SUnit * pop()=0
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::createHybridListDAGScheduler
ScheduleDAGSDNodes * createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createHybridListDAGScheduler - This creates a bottom up register pressure aware list scheduler that m...
Definition: ScheduleDAGRRList.cpp:3175
llvm::ScheduleHazardRecognizer::isEnabled
bool isEnabled() const
Definition: ScheduleHazardRecognizer.h:45
hasOnlyLiveOutUses
static bool hasOnlyLiveOutUses(const SUnit *SU)
hasOnlyLiveOutUses - Return true if SU has only value successors that are CopyToReg to a virtual regi...
Definition: ScheduleDAGRRList.cpp:2393
llvm::ScheduleDAG::StressSched
bool StressSched
Definition: ScheduleDAG.h:568
llvm::sys::fs::remove
std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
llvm::ScheduleDAG
Definition: ScheduleDAG.h:554
isOperandOf
static bool isOperandOf(const SUnit *SU, SDNode *N)
Definition: ScheduleDAGRRList.cpp:970
SelectionDAGISel.h
llvm::ScheduleDAG::dumpNode
virtual void dumpNode(const SUnit &SU) const =0
calcMaxScratches
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers,...
Definition: ScheduleDAGRRList.cpp:2360
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
IsChainDependent
static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII)
IsChainDependent - Test if Outer is reachable from Inner through chain dependencies.
Definition: ScheduleDAGRRList.cpp:440
llvm::SDep::getSUnit
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
this
Analysis the ScalarEvolution expression for r is this
Definition: README.txt:8
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
uint32_t
Compiler.h
llvm::InlineAsm::isClobberKind
static bool isClobberKind(unsigned Flag)
Definition: InlineAsm.h:306
llvm::ScheduleHazardRecognizer::EmitInstruction
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
Definition: ScheduleHazardRecognizer.h:71
TargetSubtargetInfo.h
getPhysicalRegisterVT
static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII)
getPhysicalRegisterVT - Returns the ValueType of the physical register definition of the specified no...
Definition: ScheduleDAGRRList.cpp:1276
canClobberPhysRegDefs
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberPhysRegDefs - True if SU would clobber one of SuccSU's physical register defs.
Definition: ScheduleDAGRRList.cpp:2888
resetVRegCycle
static void resetVRegCycle(SUnit *SU)
Definition: ScheduleDAGRRList.cpp:2440
llvm::CodeGenOpt::Level
Level
Definition: CodeGen.h:52
for
this could be done in SelectionDAGISel along with other special for
Definition: README.txt:104
llvm::SDep::isAssignedRegDep
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
Definition: ScheduleDAG.h:211
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:62
llvm::SUnit::SchedulingPref
Sched::Preference SchedulingPref
Scheduling preference.
Definition: ScheduleDAG.h:290
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
hybridListDAGScheduler
static RegisterScheduler hybridListDAGScheduler("list-hybrid", "Bottom-up register pressure aware list scheduling " "which tries to balance latency and register pressure", createHybridListDAGScheduler)
hasOnlyLiveInOpers
static bool hasOnlyLiveInOpers(const SUnit *SU)
hasOnlyLiveInOpers - Return true if SU has only value predecessors that are CopyFromReg from a virtua...
Definition: ScheduleDAGRRList.cpp:2371
llvm::ScheduleDAGSDNodes::RegDefIter
RegDefIter - In place iteration over the values defined by an SUnit.
Definition: ScheduleDAGSDNodes.h:138
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::SelectionDAGISel::MF
MachineFunction * MF
Definition: SelectionDAGISel.h:47
j
return j(j<< 16)
llvm::SchedulingPriorityQueue::remove
virtual void remove(SUnit *SU)=0
llvm::SchedulingPriorityQueue::releaseState
virtual void releaseState()=0
llvm::SUnit::NumSuccs
unsigned NumSuccs
Definition: ScheduleDAG.h:267
llvm::ScheduleHazardRecognizer::NoHazard
@ NoHazard
Definition: ScheduleHazardRecognizer.h:38
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::MCInstrDesc::hasOptionalDef
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:262
llvm::SUnit::getNode
SDNode * getNode() const
Returns the representative SDNode for this SUnit.
Definition: ScheduleDAG.h:355
llvm::SUnit::setHeightDirty
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
Definition: ScheduleDAG.cpp:232
ISDOpcodes.h
llvm::RegisterScheduler
Definition: SchedulerRegistry.h:31
llvm::ISD::INLINEASM_BR
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1028
Casting.h
llvm::SchedulingPriorityQueue::setCurCycle
void setCurCycle(unsigned Cycle)
Definition: ScheduleDAG.h:545
Disable2AddrHack
static cl::opt< bool > Disable2AddrHack("disable-2addr-hack", cl::Hidden, cl::init(true), cl::desc("Disable scheduler's two-address hack"))
llvm::ScheduleHazardRecognizer::RecedeCycle
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
Definition: ScheduleHazardRecognizer.h:109
DisableSchedHeight
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:145
llvm::SUnit::addPred
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
Definition: ScheduleDAG.cpp:107
llvm::SDNode::getNumValues
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
Definition: SelectionDAGNodes.h:983
llvm::SUnit::hasPhysRegDefs
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
llvm::SUnit::isTwoAddress
bool isTwoAddress
Is a two-address instruction.
Definition: ScheduleDAG.h:277
llvm::TargetRegisterInfo::getNumRegClasses
unsigned getNumRegClasses() const
Definition: TargetRegisterInfo.h:765
llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:178
llvm::ScheduleHazardRecognizer::Reset
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
Definition: ScheduleHazardRecognizer.h:67
llvm::SDNode::getMachineOpcode
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
Definition: SelectionDAGNodes.h:704
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:475
MaxReorderWindow
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
CodeGen.h
llvm::MCOI::OptionalDef
@ OptionalDef
Definition: MCInstrDesc.h:52
llvm::SUnit::isSucc
bool isSucc(const SUnit *N) const
Tests if node N is a successor of this node.
Definition: ScheduleDAG.h:439
llvm::SUnit::NodeQueueId
unsigned NodeQueueId
Queue id of node.
Definition: ScheduleDAG.h:265
llvm::MCRegAliasIterator::isValid
bool isValid() const
Definition: MCRegisterInfo.h:813
llvm::SelectionDAGISel
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
Definition: SelectionDAGISel.h:41
SmallVector.h
DisableSchedPhysRegJoin
static cl::opt< bool > DisableSchedPhysRegJoin("disable-sched-physreg-join", cl::Hidden, cl::init(false), cl::desc("Disable physreg def-use affinity"))
llvm::InlineAsm::getNumOperandRegisters
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag.
Definition: InlineAsm.h:363
CheckForLiveRegDef
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs, const TargetRegisterInfo *TRI, const SDNode *Node=nullptr)
CheckForLiveRegDef - Return true and update live register vector if the specified register def of the...
Definition: ScheduleDAGRRList.cpp:1297
N
#define N
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
DisableSchedCycles
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
llvm::MVT::Untyped
@ Untyped
Definition: MachineValueType.h:286
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:485
MachineOperand.h
llvm::ScheduleDAGSDNodes::RegDefIter::GetNode
const SDNode * GetNode() const
Definition: ScheduleDAGSDNodes.h:155
llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::omp::RTLDependInfoFields::Flags
@ Flags
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
Register.h
llvm::MCOI::TIED_TO
@ TIED_TO
Definition: MCInstrDesc.h:35
llvm::SchedulingPriorityQueue::unscheduledNode
virtual void unscheduledNode(SUnit *)
Definition: ScheduleDAG.h:543
llvm::SchedulingPriorityQueue::updateNode
virtual void updateNode(const SUnit *SU)=0
llvm::cl::desc
Definition: CommandLine.h:413
llvm::SchedulingPriorityQueue
This interface is used to plug different priorities computation algorithms into the list scheduler.
Definition: ScheduleDAG.h:496
llvm::TargetRegisterInfo::getMinimalPhysRegClass
const TargetRegisterClass * getMinimalPhysRegClass(MCRegister Reg, MVT VT=MVT::Other) const
Returns the Register Class of a physical register of the given type, picking the most sub register cl...
Definition: TargetRegisterInfo.cpp:212
llvm::TargetLoweringBase::getRepRegClassFor
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
Definition: TargetLowering.h:918
raw_ostream.h
llvm::AMDGPU::VGPRIndexMode::Id
Id
Definition: SIDefines.h:241
llvm::createBURRListDAGScheduler
ScheduleDAGSDNodes * createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up register usage reduction list scheduler.
Definition: ScheduleDAGRRList.cpp:3147
MachineFunction.h
llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:111
DisableSchedRegPressure
static cl::opt< bool > DisableSchedRegPressure("disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp"))
llvm::ScheduleHazardRecognizer
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
Definition: ScheduleHazardRecognizer.h:25
llvm::MCInstrDesc::getOperandConstraint
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:212
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1297
llvm::SDep::isCtrl
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:230
llvm::ScheduleHazardRecognizer::getHazardType
virtual HazardType getHazardType(SUnit *, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
Definition: ScheduleHazardRecognizer.h:60
llvm::SchedulingPriorityQueue::initNodes
virtual void initNodes(std::vector< SUnit > &SUnits)=0
TargetRegisterInfo.h
Debug.h
llvm::ScheduleDAGSDNodes::RegDefIter::IsValid
bool IsValid() const
Definition: ScheduleDAGSDNodes.h:148
llvm::SDNode::hasAnyUseOfValue
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
Definition: SelectionDAG.cpp:11105
llvm::SDep::getLatency
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
llvm::TargetRegisterInfo::getRegPressureLimit
virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const
Return the register pressure "high water mark" for the specific register class.
Definition: TargetRegisterInfo.h:822
llvm::ISD::TokenFactor
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
llvm::MCRegAliasIterator
MCRegAliasIterator enumerates all registers aliasing Reg.
Definition: MCRegisterInfo.h:788
llvm::SDNode::getGluedNode
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
Definition: SelectionDAGNodes.h:956
llvm::ScheduleDAGTopologicalSort::AddSUnitWithoutPredecessors
void AddSUnitWithoutPredecessors(const SUnit *SU)
Add a SUnit without predecessors to the end of the topological order.
Definition: ScheduleDAG.cpp:715
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
llvm::MCInstrDesc::ImplicitDefs
const MCPhysReg * ImplicitDefs
Definition: MCInstrDesc.h:207
SmallSet.h