LLVM 17.0.0git
ScheduleDAGRRList.cpp
Go to the documentation of this file.
1//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements bottom-up and top-down register pressure reduction list
10// schedulers, using standard algorithms. The basic approach uses a priority
11// queue of available nodes to schedule. One at a time, nodes are taken from
12// the priority queue (thus in priority order), checked for legality to
13// schedule, and emitted if legal.
14//
15//===----------------------------------------------------------------------===//
16
17#include "ScheduleDAGSDNodes.h"
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
38#include "llvm/Config/llvm-config.h"
39#include "llvm/IR/InlineAsm.h"
40#include "llvm/MC/MCInstrDesc.h"
46#include "llvm/Support/Debug.h"
50#include <algorithm>
51#include <cassert>
52#include <cstdint>
53#include <cstdlib>
54#include <iterator>
55#include <limits>
56#include <memory>
57#include <utility>
58#include <vector>
59
60using namespace llvm;
61
62#define DEBUG_TYPE "pre-RA-sched"
63
64STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
65STATISTIC(NumUnfolds, "Number of nodes unfolded");
66STATISTIC(NumDups, "Number of duplicated nodes");
67STATISTIC(NumPRCopies, "Number of physical register copies");
68
71 "Bottom-up register reduction list scheduling",
73
76 "Similar to list-burr but schedules in source "
77 "order when possible",
79
82 "Bottom-up register pressure aware list scheduling "
83 "which tries to balance latency and register pressure",
85
88 "Bottom-up register pressure aware list scheduling "
89 "which tries to balance ILP and register pressure",
91
93 "disable-sched-cycles", cl::Hidden, cl::init(false),
94 cl::desc("Disable cycle-level precision during preRA scheduling"));
95
96// Temporary sched=list-ilp flags until the heuristics are robust.
97// Some options are also available under sched=list-hybrid.
99 "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
100 cl::desc("Disable regpressure priority in sched=list-ilp"));
102 "disable-sched-live-uses", cl::Hidden, cl::init(true),
103 cl::desc("Disable live use priority in sched=list-ilp"));
105 "disable-sched-vrcycle", cl::Hidden, cl::init(false),
106 cl::desc("Disable virtual register cycle interference checks"));
108 "disable-sched-physreg-join", cl::Hidden, cl::init(false),
109 cl::desc("Disable physreg def-use affinity"));
111 "disable-sched-stalls", cl::Hidden, cl::init(true),
112 cl::desc("Disable no-stall priority in sched=list-ilp"));
114 "disable-sched-critical-path", cl::Hidden, cl::init(false),
115 cl::desc("Disable critical path priority in sched=list-ilp"));
117 "disable-sched-height", cl::Hidden, cl::init(false),
118 cl::desc("Disable scheduled-height priority in sched=list-ilp"));
120 "disable-2addr-hack", cl::Hidden, cl::init(true),
121 cl::desc("Disable scheduler's two-address hack"));
122
124 "max-sched-reorder", cl::Hidden, cl::init(6),
125 cl::desc("Number of instructions to allow ahead of the critical path "
126 "in sched=list-ilp"));
127
129 "sched-avg-ipc", cl::Hidden, cl::init(1),
130 cl::desc("Average inst/cycle whan no target itinerary exists."));
131
132namespace {
133
134//===----------------------------------------------------------------------===//
135/// ScheduleDAGRRList - The actual register reduction list scheduler
136/// implementation. This supports both top-down and bottom-up scheduling.
137///
138class ScheduleDAGRRList : public ScheduleDAGSDNodes {
139private:
140 /// NeedLatency - True if the scheduler will make use of latency information.
141 bool NeedLatency;
142
143 /// AvailableQueue - The priority queue to use for the available SUnits.
144 SchedulingPriorityQueue *AvailableQueue;
145
146 /// PendingQueue - This contains all of the instructions whose operands have
147 /// been issued, but their results are not ready yet (due to the latency of
148 /// the operation). Once the operands becomes available, the instruction is
149 /// added to the AvailableQueue.
150 std::vector<SUnit *> PendingQueue;
151
152 /// HazardRec - The hazard recognizer to use.
153 ScheduleHazardRecognizer *HazardRec;
154
155 /// CurCycle - The current scheduler state corresponds to this cycle.
156 unsigned CurCycle = 0;
157
158 /// MinAvailableCycle - Cycle of the soonest available instruction.
159 unsigned MinAvailableCycle;
160
161 /// IssueCount - Count instructions issued in this cycle
162 /// Currently valid only for bottom-up scheduling.
163 unsigned IssueCount;
164
165 /// LiveRegDefs - A set of physical registers and their definition
166 /// that are "live". These nodes must be scheduled before any other nodes that
167 /// modifies the registers can be scheduled.
168 unsigned NumLiveRegs;
169 std::unique_ptr<SUnit*[]> LiveRegDefs;
170 std::unique_ptr<SUnit*[]> LiveRegGens;
171
172 // Collect interferences between physical register use/defs.
173 // Each interference is an SUnit and set of physical registers.
174 SmallVector<SUnit*, 4> Interferences;
175
177
178 LRegsMapT LRegsMap;
179
180 /// Topo - A topological ordering for SUnits which permits fast IsReachable
181 /// and similar queries.
183
184 // Hack to keep track of the inverse of FindCallSeqStart without more crazy
185 // DAG crawling.
186 DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
187
188public:
189 ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
190 SchedulingPriorityQueue *availqueue,
191 CodeGenOpt::Level OptLevel)
192 : ScheduleDAGSDNodes(mf),
193 NeedLatency(needlatency), AvailableQueue(availqueue),
194 Topo(SUnits, nullptr) {
195 const TargetSubtargetInfo &STI = mf.getSubtarget();
196 if (DisableSchedCycles || !NeedLatency)
197 HazardRec = new ScheduleHazardRecognizer();
198 else
199 HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
200 }
201
202 ~ScheduleDAGRRList() override {
203 delete HazardRec;
204 delete AvailableQueue;
205 }
206
207 void Schedule() override;
208
209 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
210
211 /// IsReachable - Checks if SU is reachable from TargetSU.
212 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
213 return Topo.IsReachable(SU, TargetSU);
214 }
215
216 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
217 /// create a cycle.
218 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
219 return Topo.WillCreateCycle(SU, TargetSU);
220 }
221
222 /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
223 /// This returns true if this is a new predecessor.
224 /// Does *NOT* update the topological ordering! It just queues an update.
225 void AddPredQueued(SUnit *SU, const SDep &D) {
226 Topo.AddPredQueued(SU, D.getSUnit());
227 SU->addPred(D);
228 }
229
230 /// AddPred - adds a predecessor edge to SUnit SU.
231 /// This returns true if this is a new predecessor.
232 /// Updates the topological ordering if required.
233 void AddPred(SUnit *SU, const SDep &D) {
234 Topo.AddPred(SU, D.getSUnit());
235 SU->addPred(D);
236 }
237
238 /// RemovePred - removes a predecessor edge from SUnit SU.
239 /// This returns true if an edge was removed.
240 /// Updates the topological ordering if required.
241 void RemovePred(SUnit *SU, const SDep &D) {
242 Topo.RemovePred(SU, D.getSUnit());
243 SU->removePred(D);
244 }
245
246private:
247 bool isReady(SUnit *SU) {
248 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
249 AvailableQueue->isReady(SU);
250 }
251
252 void ReleasePred(SUnit *SU, const SDep *PredEdge);
253 void ReleasePredecessors(SUnit *SU);
254 void ReleasePending();
255 void AdvanceToCycle(unsigned NextCycle);
256 void AdvancePastStalls(SUnit *SU);
257 void EmitNode(SUnit *SU);
258 void ScheduleNodeBottomUp(SUnit*);
259 void CapturePred(SDep *PredEdge);
260 void UnscheduleNodeBottomUp(SUnit*);
261 void RestoreHazardCheckerBottomUp();
262 void BacktrackBottomUp(SUnit*, SUnit*);
263 SUnit *TryUnfoldSU(SUnit *);
264 SUnit *CopyAndMoveSuccessors(SUnit*);
265 void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
266 const TargetRegisterClass*,
267 const TargetRegisterClass*,
269 bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
270
271 void releaseInterferences(unsigned Reg = 0);
272
273 SUnit *PickNodeToScheduleBottomUp();
274 void ListScheduleBottomUp();
275
276 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
277 SUnit *CreateNewSUnit(SDNode *N) {
278 unsigned NumSUnits = SUnits.size();
279 SUnit *NewNode = newSUnit(N);
280 // Update the topological ordering.
281 if (NewNode->NodeNum >= NumSUnits)
282 Topo.AddSUnitWithoutPredecessors(NewNode);
283 return NewNode;
284 }
285
286 /// CreateClone - Creates a new SUnit from an existing one.
287 SUnit *CreateClone(SUnit *N) {
288 unsigned NumSUnits = SUnits.size();
289 SUnit *NewNode = Clone(N);
290 // Update the topological ordering.
291 if (NewNode->NodeNum >= NumSUnits)
292 Topo.AddSUnitWithoutPredecessors(NewNode);
293 return NewNode;
294 }
295
296 /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
297 /// need actual latency information but the hybrid scheduler does.
298 bool forceUnitLatencies() const override {
299 return !NeedLatency;
300 }
301};
302
303} // end anonymous namespace
304
305static constexpr unsigned RegSequenceCost = 1;
306
307/// GetCostForDef - Looks up the register class and cost for a given definition.
308/// Typically this just means looking up the representative register class,
309/// but for untyped values (MVT::Untyped) it means inspecting the node's
310/// opcode to determine what register class is being generated.
312 const TargetLowering *TLI,
313 const TargetInstrInfo *TII,
314 const TargetRegisterInfo *TRI,
315 unsigned &RegClass, unsigned &Cost,
316 const MachineFunction &MF) {
317 MVT VT = RegDefPos.GetValue();
318
319 // Special handling for untyped values. These values can only come from
320 // the expansion of custom DAG-to-DAG patterns.
321 if (VT == MVT::Untyped) {
322 const SDNode *Node = RegDefPos.GetNode();
323
324 // Special handling for CopyFromReg of untyped values.
325 if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
326 Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
327 const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
328 RegClass = RC->getID();
329 Cost = 1;
330 return;
331 }
332
333 unsigned Opcode = Node->getMachineOpcode();
334 if (Opcode == TargetOpcode::REG_SEQUENCE) {
335 unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
336 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
337 RegClass = RC->getID();
339 return;
340 }
341
342 unsigned Idx = RegDefPos.GetIdx();
343 const MCInstrDesc &Desc = TII->get(Opcode);
344 const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
345 assert(RC && "Not a valid register class");
346 RegClass = RC->getID();
347 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
348 // better way to determine it.
349 Cost = 1;
350 } else {
351 RegClass = TLI->getRepRegClassFor(VT)->getID();
352 Cost = TLI->getRepRegClassCostFor(VT);
353 }
354}
355
356/// Schedule - Schedule the DAG using list scheduling.
357void ScheduleDAGRRList::Schedule() {
358 LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
359 << " '" << BB->getName() << "' **********\n");
360
361 CurCycle = 0;
362 IssueCount = 0;
363 MinAvailableCycle =
364 DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max();
365 NumLiveRegs = 0;
366 // Allocate slots for each physical register, plus one for a special register
367 // to track the virtual resource of a calling sequence.
368 LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
369 LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
370 CallSeqEndForStart.clear();
371 assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
372
373 // Build the scheduling graph.
374 BuildSchedGraph(nullptr);
375
376 LLVM_DEBUG(dump());
377 Topo.MarkDirty();
378
379 AvailableQueue->initNodes(SUnits);
380
381 HazardRec->Reset();
382
383 // Execute the actual scheduling loop.
384 ListScheduleBottomUp();
385
386 AvailableQueue->releaseState();
387
388 LLVM_DEBUG({
389 dbgs() << "*** Final schedule ***\n";
390 dumpSchedule();
391 dbgs() << '\n';
392 });
393}
394
395//===----------------------------------------------------------------------===//
396// Bottom-Up Scheduling
397//===----------------------------------------------------------------------===//
398
399/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
400/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
401void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
402 SUnit *PredSU = PredEdge->getSUnit();
403
404#ifndef NDEBUG
405 if (PredSU->NumSuccsLeft == 0) {
406 dbgs() << "*** Scheduling failed! ***\n";
407 dumpNode(*PredSU);
408 dbgs() << " has been released too many times!\n";
409 llvm_unreachable(nullptr);
410 }
411#endif
412 --PredSU->NumSuccsLeft;
413
414 if (!forceUnitLatencies()) {
415 // Updating predecessor's height. This is now the cycle when the
416 // predecessor can be scheduled without causing a pipeline stall.
417 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
418 }
419
420 // If all the node's successors are scheduled, this node is ready
421 // to be scheduled. Ignore the special EntrySU node.
422 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
423 PredSU->isAvailable = true;
424
425 unsigned Height = PredSU->getHeight();
426 if (Height < MinAvailableCycle)
427 MinAvailableCycle = Height;
428
429 if (isReady(PredSU)) {
430 AvailableQueue->push(PredSU);
431 }
432 // CapturePred and others may have left the node in the pending queue, avoid
433 // adding it twice.
434 else if (!PredSU->isPending) {
435 PredSU->isPending = true;
436 PendingQueue.push_back(PredSU);
437 }
438 }
439}
440
441/// IsChainDependent - Test if Outer is reachable from Inner through
442/// chain dependencies.
443static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
444 unsigned NestLevel,
445 const TargetInstrInfo *TII) {
446 SDNode *N = Outer;
447 while (true) {
448 if (N == Inner)
449 return true;
450 // For a TokenFactor, examine each operand. There may be multiple ways
451 // to get to the CALLSEQ_BEGIN, but we need to find the path with the
452 // most nesting in order to ensure that we find the corresponding match.
453 if (N->getOpcode() == ISD::TokenFactor) {
454 for (const SDValue &Op : N->op_values())
455 if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
456 return true;
457 return false;
458 }
459 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
460 if (N->isMachineOpcode()) {
461 if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
462 ++NestLevel;
463 } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
464 if (NestLevel == 0)
465 return false;
466 --NestLevel;
467 }
468 }
469 // Otherwise, find the chain and continue climbing.
470 for (const SDValue &Op : N->op_values())
471 if (Op.getValueType() == MVT::Other) {
472 N = Op.getNode();
473 goto found_chain_operand;
474 }
475 return false;
476 found_chain_operand:;
477 if (N->getOpcode() == ISD::EntryToken)
478 return false;
479 }
480}
481
482/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
483/// the corresponding (lowered) CALLSEQ_BEGIN node.
484///
485/// NestLevel and MaxNested are used in recursion to indcate the current level
486/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
487/// level seen so far.
488///
489/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
490/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
491static SDNode *
492FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
493 const TargetInstrInfo *TII) {
494 while (true) {
495 // For a TokenFactor, examine each operand. There may be multiple ways
496 // to get to the CALLSEQ_BEGIN, but we need to find the path with the
497 // most nesting in order to ensure that we find the corresponding match.
498 if (N->getOpcode() == ISD::TokenFactor) {
499 SDNode *Best = nullptr;
500 unsigned BestMaxNest = MaxNest;
501 for (const SDValue &Op : N->op_values()) {
502 unsigned MyNestLevel = NestLevel;
503 unsigned MyMaxNest = MaxNest;
504 if (SDNode *New = FindCallSeqStart(Op.getNode(),
505 MyNestLevel, MyMaxNest, TII))
506 if (!Best || (MyMaxNest > BestMaxNest)) {
507 Best = New;
508 BestMaxNest = MyMaxNest;
509 }
510 }
511 assert(Best);
512 MaxNest = BestMaxNest;
513 return Best;
514 }
515 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
516 if (N->isMachineOpcode()) {
517 if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
518 ++NestLevel;
519 MaxNest = std::max(MaxNest, NestLevel);
520 } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
521 assert(NestLevel != 0);
522 --NestLevel;
523 if (NestLevel == 0)
524 return N;
525 }
526 }
527 // Otherwise, find the chain and continue climbing.
528 for (const SDValue &Op : N->op_values())
529 if (Op.getValueType() == MVT::Other) {
530 N = Op.getNode();
531 goto found_chain_operand;
532 }
533 return nullptr;
534 found_chain_operand:;
535 if (N->getOpcode() == ISD::EntryToken)
536 return nullptr;
537 }
538}
539
540/// Call ReleasePred for each predecessor, then update register live def/gen.
541/// Always update LiveRegDefs for a register dependence even if the current SU
542/// also defines the register. This effectively create one large live range
543/// across a sequence of two-address node. This is important because the
544/// entire chain must be scheduled together. Example:
545///
546/// flags = (3) add
547/// flags = (2) addc flags
548/// flags = (1) addc flags
549///
550/// results in
551///
552/// LiveRegDefs[flags] = 3
553/// LiveRegGens[flags] = 1
554///
555/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
556/// interference on flags.
557void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
558 // Bottom up: release predecessors
559 for (SDep &Pred : SU->Preds) {
560 ReleasePred(SU, &Pred);
561 if (Pred.isAssignedRegDep()) {
562 // This is a physical register dependency and it's impossible or
563 // expensive to copy the register. Make sure nothing that can
564 // clobber the register is scheduled between the predecessor and
565 // this node.
566 SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
567 assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
568 "interference on register dependence");
569 LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
570 if (!LiveRegGens[Pred.getReg()]) {
571 ++NumLiveRegs;
572 LiveRegGens[Pred.getReg()] = SU;
573 }
574 }
575 }
576
577 // If we're scheduling a lowered CALLSEQ_END, find the corresponding
578 // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
579 // these nodes, to prevent other calls from being interscheduled with them.
580 unsigned CallResource = TRI->getNumRegs();
581 if (!LiveRegDefs[CallResource])
582 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
583 if (Node->isMachineOpcode() &&
584 Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
585 unsigned NestLevel = 0;
586 unsigned MaxNest = 0;
587 SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
588 assert(N && "Must find call sequence start");
589
590 SUnit *Def = &SUnits[N->getNodeId()];
591 CallSeqEndForStart[Def] = SU;
592
593 ++NumLiveRegs;
594 LiveRegDefs[CallResource] = Def;
595 LiveRegGens[CallResource] = SU;
596 break;
597 }
598}
599
600/// Check to see if any of the pending instructions are ready to issue. If
601/// so, add them to the available queue.
602void ScheduleDAGRRList::ReleasePending() {
603 if (DisableSchedCycles) {
604 assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
605 return;
606 }
607
608 // If the available queue is empty, it is safe to reset MinAvailableCycle.
609 if (AvailableQueue->empty())
610 MinAvailableCycle = std::numeric_limits<unsigned>::max();
611
612 // Check to see if any of the pending instructions are ready to issue. If
613 // so, add them to the available queue.
614 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
615 unsigned ReadyCycle = PendingQueue[i]->getHeight();
616 if (ReadyCycle < MinAvailableCycle)
617 MinAvailableCycle = ReadyCycle;
618
619 if (PendingQueue[i]->isAvailable) {
620 if (!isReady(PendingQueue[i]))
621 continue;
622 AvailableQueue->push(PendingQueue[i]);
623 }
624 PendingQueue[i]->isPending = false;
625 PendingQueue[i] = PendingQueue.back();
626 PendingQueue.pop_back();
627 --i; --e;
628 }
629}
630
631/// Move the scheduler state forward by the specified number of Cycles.
632void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
633 if (NextCycle <= CurCycle)
634 return;
635
636 IssueCount = 0;
637 AvailableQueue->setCurCycle(NextCycle);
638 if (!HazardRec->isEnabled()) {
639 // Bypass lots of virtual calls in case of long latency.
640 CurCycle = NextCycle;
641 }
642 else {
643 for (; CurCycle != NextCycle; ++CurCycle) {
644 HazardRec->RecedeCycle();
645 }
646 }
647 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
648 // available Q to release pending nodes at least once before popping.
649 ReleasePending();
650}
651
652/// Move the scheduler state forward until the specified node's dependents are
653/// ready and can be scheduled with no resource conflicts.
654void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
656 return;
657
658 // FIXME: Nodes such as CopyFromReg probably should not advance the current
659 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
660 // has predecessors the cycle will be advanced when they are scheduled.
661 // But given the crude nature of modeling latency though such nodes, we
662 // currently need to treat these nodes like real instructions.
663 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
664
665 unsigned ReadyCycle = SU->getHeight();
666
667 // Bump CurCycle to account for latency. We assume the latency of other
668 // available instructions may be hidden by the stall (not a full pipe stall).
669 // This updates the hazard recognizer's cycle before reserving resources for
670 // this instruction.
671 AdvanceToCycle(ReadyCycle);
672
673 // Calls are scheduled in their preceding cycle, so don't conflict with
674 // hazards from instructions after the call. EmitNode will reset the
675 // scoreboard state before emitting the call.
676 if (SU->isCall)
677 return;
678
679 // FIXME: For resource conflicts in very long non-pipelined stages, we
680 // should probably skip ahead here to avoid useless scoreboard checks.
681 int Stalls = 0;
682 while (true) {
684 HazardRec->getHazardType(SU, -Stalls);
685
687 break;
688
689 ++Stalls;
690 }
691 AdvanceToCycle(CurCycle + Stalls);
692}
693
694/// Record this SUnit in the HazardRecognizer.
695/// Does not update CurCycle.
696void ScheduleDAGRRList::EmitNode(SUnit *SU) {
697 if (!HazardRec->isEnabled())
698 return;
699
700 // Check for phys reg copy.
701 if (!SU->getNode())
702 return;
703
704 switch (SU->getNode()->getOpcode()) {
705 default:
706 assert(SU->getNode()->isMachineOpcode() &&
707 "This target-independent node should not be scheduled.");
708 break;
710 case ISD::TokenFactor:
713 case ISD::CopyToReg:
714 case ISD::CopyFromReg:
715 case ISD::EH_LABEL:
716 // Noops don't affect the scoreboard state. Copies are likely to be
717 // removed.
718 return;
719 case ISD::INLINEASM:
721 // For inline asm, clear the pipeline state.
722 HazardRec->Reset();
723 return;
724 }
725 if (SU->isCall) {
726 // Calls are scheduled with their preceding instructions. For bottom-up
727 // scheduling, clear the pipeline state before emitting.
728 HazardRec->Reset();
729 }
730
731 HazardRec->EmitInstruction(SU);
732}
733
734static void resetVRegCycle(SUnit *SU);
735
736/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
737/// count of its predecessors. If a predecessor pending count is zero, add it to
738/// the Available queue.
739void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
740 LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
741 LLVM_DEBUG(dumpNode(*SU));
742
743#ifndef NDEBUG
744 if (CurCycle < SU->getHeight())
745 LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
746 << "] pipeline stall!\n");
747#endif
748
749 // FIXME: Do not modify node height. It may interfere with
750 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
751 // node its ready cycle can aid heuristics, and after scheduling it can
752 // indicate the scheduled cycle.
753 SU->setHeightToAtLeast(CurCycle);
754
755 // Reserve resources for the scheduled instruction.
756 EmitNode(SU);
757
758 Sequence.push_back(SU);
759
760 AvailableQueue->scheduledNode(SU);
761
762 // If HazardRec is disabled, and each inst counts as one cycle, then
763 // advance CurCycle before ReleasePredecessors to avoid useless pushes to
764 // PendingQueue for schedulers that implement HasReadyFilter.
765 if (!HazardRec->isEnabled() && AvgIPC < 2)
766 AdvanceToCycle(CurCycle + 1);
767
768 // Update liveness of predecessors before successors to avoid treating a
769 // two-address node as a live range def.
770 ReleasePredecessors(SU);
771
772 // Release all the implicit physical register defs that are live.
773 for (SDep &Succ : SU->Succs) {
774 // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
775 if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
776 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
777 --NumLiveRegs;
778 LiveRegDefs[Succ.getReg()] = nullptr;
779 LiveRegGens[Succ.getReg()] = nullptr;
780 releaseInterferences(Succ.getReg());
781 }
782 }
783 // Release the special call resource dependence, if this is the beginning
784 // of a call.
785 unsigned CallResource = TRI->getNumRegs();
786 if (LiveRegDefs[CallResource] == SU)
787 for (const SDNode *SUNode = SU->getNode(); SUNode;
788 SUNode = SUNode->getGluedNode()) {
789 if (SUNode->isMachineOpcode() &&
790 SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
791 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
792 --NumLiveRegs;
793 LiveRegDefs[CallResource] = nullptr;
794 LiveRegGens[CallResource] = nullptr;
795 releaseInterferences(CallResource);
796 }
797 }
798
799 resetVRegCycle(SU);
800
801 SU->isScheduled = true;
802
803 // Conditions under which the scheduler should eagerly advance the cycle:
804 // (1) No available instructions
805 // (2) All pipelines full, so available instructions must have hazards.
806 //
807 // If HazardRec is disabled, the cycle was pre-advanced before calling
808 // ReleasePredecessors. In that case, IssueCount should remain 0.
809 //
810 // Check AvailableQueue after ReleasePredecessors in case of zero latency.
811 if (HazardRec->isEnabled() || AvgIPC > 1) {
812 if (SU->getNode() && SU->getNode()->isMachineOpcode())
813 ++IssueCount;
814 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
815 || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
816 AdvanceToCycle(CurCycle + 1);
817 }
818}
819
820/// CapturePred - This does the opposite of ReleasePred. Since SU is being
821/// unscheduled, increase the succ left count of its predecessors. Remove
822/// them from AvailableQueue if necessary.
823void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
824 SUnit *PredSU = PredEdge->getSUnit();
825 if (PredSU->isAvailable) {
826 PredSU->isAvailable = false;
827 if (!PredSU->isPending)
828 AvailableQueue->remove(PredSU);
829 }
830
831 assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
832 "NumSuccsLeft will overflow!");
833 ++PredSU->NumSuccsLeft;
834}
835
836/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
837/// its predecessor states to reflect the change.
838void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
839 LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
840 LLVM_DEBUG(dumpNode(*SU));
841
842 for (SDep &Pred : SU->Preds) {
843 CapturePred(&Pred);
844 if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
845 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
846 assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
847 "Physical register dependency violated?");
848 --NumLiveRegs;
849 LiveRegDefs[Pred.getReg()] = nullptr;
850 LiveRegGens[Pred.getReg()] = nullptr;
851 releaseInterferences(Pred.getReg());
852 }
853 }
854
855 // Reclaim the special call resource dependence, if this is the beginning
856 // of a call.
857 unsigned CallResource = TRI->getNumRegs();
858 for (const SDNode *SUNode = SU->getNode(); SUNode;
859 SUNode = SUNode->getGluedNode()) {
860 if (SUNode->isMachineOpcode() &&
861 SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
862 SUnit *SeqEnd = CallSeqEndForStart[SU];
863 assert(SeqEnd && "Call sequence start/end must be known");
864 assert(!LiveRegDefs[CallResource]);
865 assert(!LiveRegGens[CallResource]);
866 ++NumLiveRegs;
867 LiveRegDefs[CallResource] = SU;
868 LiveRegGens[CallResource] = SeqEnd;
869 }
870 }
871
872 // Release the special call resource dependence, if this is the end
873 // of a call.
874 if (LiveRegGens[CallResource] == SU)
875 for (const SDNode *SUNode = SU->getNode(); SUNode;
876 SUNode = SUNode->getGluedNode()) {
877 if (SUNode->isMachineOpcode() &&
878 SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
879 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
880 assert(LiveRegDefs[CallResource]);
881 assert(LiveRegGens[CallResource]);
882 --NumLiveRegs;
883 LiveRegDefs[CallResource] = nullptr;
884 LiveRegGens[CallResource] = nullptr;
885 releaseInterferences(CallResource);
886 }
887 }
888
889 for (auto &Succ : SU->Succs) {
890 if (Succ.isAssignedRegDep()) {
891 auto Reg = Succ.getReg();
892 if (!LiveRegDefs[Reg])
893 ++NumLiveRegs;
894 // This becomes the nearest def. Note that an earlier def may still be
895 // pending if this is a two-address node.
896 LiveRegDefs[Reg] = SU;
897
898 // Update LiveRegGen only if was empty before this unscheduling.
899 // This is to avoid incorrect updating LiveRegGen set in previous run.
900 if (!LiveRegGens[Reg]) {
901 // Find the successor with the lowest height.
902 LiveRegGens[Reg] = Succ.getSUnit();
903 for (auto &Succ2 : SU->Succs) {
904 if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
905 Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
906 LiveRegGens[Reg] = Succ2.getSUnit();
907 }
908 }
909 }
910 }
911 if (SU->getHeight() < MinAvailableCycle)
912 MinAvailableCycle = SU->getHeight();
913
914 SU->setHeightDirty();
915 SU->isScheduled = false;
916 SU->isAvailable = true;
917 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
918 // Don't make available until backtracking is complete.
919 SU->isPending = true;
920 PendingQueue.push_back(SU);
921 }
922 else {
923 AvailableQueue->push(SU);
924 }
925 AvailableQueue->unscheduledNode(SU);
926}
927
928/// After backtracking, the hazard checker needs to be restored to a state
929/// corresponding the current cycle.
930void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
931 HazardRec->Reset();
932
933 unsigned LookAhead = std::min((unsigned)Sequence.size(),
934 HazardRec->getMaxLookAhead());
935 if (LookAhead == 0)
936 return;
937
938 std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
939 unsigned HazardCycle = (*I)->getHeight();
940 for (auto E = Sequence.end(); I != E; ++I) {
941 SUnit *SU = *I;
942 for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
943 HazardRec->RecedeCycle();
944 }
945 EmitNode(SU);
946 }
947}
948
949/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
950/// BTCycle in order to schedule a specific node.
951void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
952 SUnit *OldSU = Sequence.back();
953 while (true) {
954 Sequence.pop_back();
955 // FIXME: use ready cycle instead of height
956 CurCycle = OldSU->getHeight();
957 UnscheduleNodeBottomUp(OldSU);
958 AvailableQueue->setCurCycle(CurCycle);
959 if (OldSU == BtSU)
960 break;
961 OldSU = Sequence.back();
962 }
963
964 assert(!SU->isSucc(OldSU) && "Something is wrong!");
965
966 RestoreHazardCheckerBottomUp();
967
968 ReleasePending();
969
970 ++NumBacktracks;
971}
972
973static bool isOperandOf(const SUnit *SU, SDNode *N) {
974 for (const SDNode *SUNode = SU->getNode(); SUNode;
975 SUNode = SUNode->getGluedNode()) {
976 if (SUNode->isOperandOf(N))
977 return true;
978 }
979 return false;
980}
981
982/// TryUnfold - Attempt to unfold
983SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
984 SDNode *N = SU->getNode();
985 // Use while over if to ease fall through.
987 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
988 return nullptr;
989
990 // unfolding an x86 DEC64m operation results in store, dec, load which
991 // can't be handled here so quit
992 if (NewNodes.size() == 3)
993 return nullptr;
994
995 assert(NewNodes.size() == 2 && "Expected a load folding node!");
996
997 N = NewNodes[1];
998 SDNode *LoadNode = NewNodes[0];
999 unsigned NumVals = N->getNumValues();
1000 unsigned OldNumVals = SU->getNode()->getNumValues();
1001
1002 // LoadNode may already exist. This can happen when there is another
1003 // load from the same location and producing the same type of value
1004 // but it has different alignment or volatileness.
1005 bool isNewLoad = true;
1006 SUnit *LoadSU;
1007 if (LoadNode->getNodeId() != -1) {
1008 LoadSU = &SUnits[LoadNode->getNodeId()];
1009 // If LoadSU has already been scheduled, we should clone it but
1010 // this would negate the benefit to unfolding so just return SU.
1011 if (LoadSU->isScheduled)
1012 return SU;
1013 isNewLoad = false;
1014 } else {
1015 LoadSU = CreateNewSUnit(LoadNode);
1016 LoadNode->setNodeId(LoadSU->NodeNum);
1017
1018 InitNumRegDefsLeft(LoadSU);
1019 computeLatency(LoadSU);
1020 }
1021
1022 bool isNewN = true;
1023 SUnit *NewSU;
1024 // This can only happen when isNewLoad is false.
1025 if (N->getNodeId() != -1) {
1026 NewSU = &SUnits[N->getNodeId()];
1027 // If NewSU has already been scheduled, we need to clone it, but this
1028 // negates the benefit to unfolding so just return SU.
1029 if (NewSU->isScheduled) {
1030 return SU;
1031 }
1032 isNewN = false;
1033 } else {
1034 NewSU = CreateNewSUnit(N);
1035 N->setNodeId(NewSU->NodeNum);
1036
1037 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1038 for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1039 if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1040 NewSU->isTwoAddress = true;
1041 break;
1042 }
1043 }
1044 if (MCID.isCommutable())
1045 NewSU->isCommutable = true;
1046
1047 InitNumRegDefsLeft(NewSU);
1048 computeLatency(NewSU);
1049 }
1050
1051 LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
1052
1053 // Now that we are committed to unfolding replace DAG Uses.
1054 for (unsigned i = 0; i != NumVals; ++i)
1055 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
1056 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
1057 SDValue(LoadNode, 1));
1058
1059 // Record all the edges to and from the old SU, by category.
1060 SmallVector<SDep, 4> ChainPreds;
1061 SmallVector<SDep, 4> ChainSuccs;
1062 SmallVector<SDep, 4> LoadPreds;
1063 SmallVector<SDep, 4> NodePreds;
1064 SmallVector<SDep, 4> NodeSuccs;
1065 for (SDep &Pred : SU->Preds) {
1066 if (Pred.isCtrl())
1067 ChainPreds.push_back(Pred);
1068 else if (isOperandOf(Pred.getSUnit(), LoadNode))
1069 LoadPreds.push_back(Pred);
1070 else
1071 NodePreds.push_back(Pred);
1072 }
1073 for (SDep &Succ : SU->Succs) {
1074 if (Succ.isCtrl())
1075 ChainSuccs.push_back(Succ);
1076 else
1077 NodeSuccs.push_back(Succ);
1078 }
1079
1080 // Now assign edges to the newly-created nodes.
1081 for (const SDep &Pred : ChainPreds) {
1082 RemovePred(SU, Pred);
1083 if (isNewLoad)
1084 AddPredQueued(LoadSU, Pred);
1085 }
1086 for (const SDep &Pred : LoadPreds) {
1087 RemovePred(SU, Pred);
1088 if (isNewLoad)
1089 AddPredQueued(LoadSU, Pred);
1090 }
1091 for (const SDep &Pred : NodePreds) {
1092 RemovePred(SU, Pred);
1093 AddPredQueued(NewSU, Pred);
1094 }
1095 for (SDep &D : NodeSuccs) {
1096 SUnit *SuccDep = D.getSUnit();
1097 D.setSUnit(SU);
1098 RemovePred(SuccDep, D);
1099 D.setSUnit(NewSU);
1100 AddPredQueued(SuccDep, D);
1101 // Balance register pressure.
1102 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
1103 !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1104 --NewSU->NumRegDefsLeft;
1105 }
1106 for (SDep &D : ChainSuccs) {
1107 SUnit *SuccDep = D.getSUnit();
1108 D.setSUnit(SU);
1109 RemovePred(SuccDep, D);
1110 if (isNewLoad) {
1111 D.setSUnit(LoadSU);
1112 AddPredQueued(SuccDep, D);
1113 }
1114 }
1115
1116 // Add a data dependency to reflect that NewSU reads the value defined
1117 // by LoadSU.
1118 SDep D(LoadSU, SDep::Data, 0);
1119 D.setLatency(LoadSU->Latency);
1120 AddPredQueued(NewSU, D);
1121
1122 if (isNewLoad)
1123 AvailableQueue->addNode(LoadSU);
1124 if (isNewN)
1125 AvailableQueue->addNode(NewSU);
1126
1127 ++NumUnfolds;
1128
1129 if (NewSU->NumSuccsLeft == 0)
1130 NewSU->isAvailable = true;
1131
1132 return NewSU;
1133}
1134
1135/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1136/// successors to the newly created node.
1137SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
1138 SDNode *N = SU->getNode();
1139 if (!N)
1140 return nullptr;
1141
1142 LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
1143 LLVM_DEBUG(dumpNode(*SU));
1144
1145 if (N->getGluedNode() &&
1146 !TII->canCopyGluedNodeDuringSchedule(N)) {
1147 LLVM_DEBUG(
1148 dbgs()
1149 << "Giving up because it has incoming glue and the target does not "
1150 "want to copy it\n");
1151 return nullptr;
1152 }
1153
1154 SUnit *NewSU;
1155 bool TryUnfold = false;
1156 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1157 MVT VT = N->getSimpleValueType(i);
1158 if (VT == MVT::Glue) {
1159 LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
1160 return nullptr;
1161 } else if (VT == MVT::Other)
1162 TryUnfold = true;
1163 }
1164 for (const SDValue &Op : N->op_values()) {
1165 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
1166 if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
1167 LLVM_DEBUG(
1168 dbgs() << "Giving up because it one of the operands is glue and "
1169 "the target does not want to copy it\n");
1170 return nullptr;
1171 }
1172 }
1173
1174 // If possible unfold instruction.
1175 if (TryUnfold) {
1176 SUnit *UnfoldSU = TryUnfoldSU(SU);
1177 if (!UnfoldSU)
1178 return nullptr;
1179 SU = UnfoldSU;
1180 N = SU->getNode();
1181 // If this can be scheduled don't bother duplicating and just return
1182 if (SU->NumSuccsLeft == 0)
1183 return SU;
1184 }
1185
1186 LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1187 NewSU = CreateClone(SU);
1188
1189 // New SUnit has the exact same predecessors.
1190 for (SDep &Pred : SU->Preds)
1191 if (!Pred.isArtificial())
1192 AddPredQueued(NewSU, Pred);
1193
1194 // Make sure the clone comes after the original. (InstrEmitter assumes
1195 // this ordering.)
1196 AddPredQueued(NewSU, SDep(SU, SDep::Artificial));
1197
1198 // Only copy scheduled successors. Cut them from old node's successor
1199 // list and move them over.
1201 for (SDep &Succ : SU->Succs) {
1202 if (Succ.isArtificial())
1203 continue;
1204 SUnit *SuccSU = Succ.getSUnit();
1205 if (SuccSU->isScheduled) {
1206 SDep D = Succ;
1207 D.setSUnit(NewSU);
1208 AddPredQueued(SuccSU, D);
1209 D.setSUnit(SU);
1210 DelDeps.emplace_back(SuccSU, D);
1211 }
1212 }
1213 for (const auto &[DelSU, DelD] : DelDeps)
1214 RemovePred(DelSU, DelD);
1215
1216 AvailableQueue->updateNode(SU);
1217 AvailableQueue->addNode(NewSU);
1218
1219 ++NumDups;
1220 return NewSU;
1221}
1222
1223/// InsertCopiesAndMoveSuccs - Insert register copies and move all
1224/// scheduled successors of the given SUnit to the last copy.
1225void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1226 const TargetRegisterClass *DestRC,
1227 const TargetRegisterClass *SrcRC,
1229 SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1230 CopyFromSU->CopySrcRC = SrcRC;
1231 CopyFromSU->CopyDstRC = DestRC;
1232
1233 SUnit *CopyToSU = CreateNewSUnit(nullptr);
1234 CopyToSU->CopySrcRC = DestRC;
1235 CopyToSU->CopyDstRC = SrcRC;
1236
1237 // Only copy scheduled successors. Cut them from old node's successor
1238 // list and move them over.
1240 for (SDep &Succ : SU->Succs) {
1241 if (Succ.isArtificial())
1242 continue;
1243 SUnit *SuccSU = Succ.getSUnit();
1244 if (SuccSU->isScheduled) {
1245 SDep D = Succ;
1246 D.setSUnit(CopyToSU);
1247 AddPredQueued(SuccSU, D);
1248 DelDeps.emplace_back(SuccSU, Succ);
1249 }
1250 else {
1251 // Avoid scheduling the def-side copy before other successors. Otherwise,
1252 // we could introduce another physreg interference on the copy and
1253 // continue inserting copies indefinitely.
1254 AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1255 }
1256 }
1257 for (const auto &[DelSU, DelD] : DelDeps)
1258 RemovePred(DelSU, DelD);
1259
1260 SDep FromDep(SU, SDep::Data, Reg);
1261 FromDep.setLatency(SU->Latency);
1262 AddPredQueued(CopyFromSU, FromDep);
1263 SDep ToDep(CopyFromSU, SDep::Data, 0);
1264 ToDep.setLatency(CopyFromSU->Latency);
1265 AddPredQueued(CopyToSU, ToDep);
1266
1267 AvailableQueue->updateNode(SU);
1268 AvailableQueue->addNode(CopyFromSU);
1269 AvailableQueue->addNode(CopyToSU);
1270 Copies.push_back(CopyFromSU);
1271 Copies.push_back(CopyToSU);
1272
1273 ++NumPRCopies;
1274}
1275
1276/// getPhysicalRegisterVT - Returns the ValueType of the physical register
1277/// definition of the specified node.
1278/// FIXME: Move to SelectionDAG?
1279static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1280 const TargetInstrInfo *TII) {
1281 unsigned NumRes;
1282 if (N->getOpcode() == ISD::CopyFromReg) {
1283 // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1284 NumRes = 1;
1285 } else {
1286 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1287 assert(!MCID.implicit_defs().empty() &&
1288 "Physical reg def must be in implicit def list!");
1289 NumRes = MCID.getNumDefs();
1290 for (MCPhysReg ImpDef : MCID.implicit_defs()) {
1291 if (Reg == ImpDef)
1292 break;
1293 ++NumRes;
1294 }
1295 }
1296 return N->getSimpleValueType(NumRes);
1297}
1298
1299/// CheckForLiveRegDef - Return true and update live register vector if the
1300/// specified register def of the specified SUnit clobbers any "live" registers.
1301static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs,
1302 SmallSet<unsigned, 4> &RegAdded,
1304 const TargetRegisterInfo *TRI,
1305 const SDNode *Node = nullptr) {
1306 for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1307
1308 // Check if Ref is live.
1309 if (!LiveRegDefs[*AliasI]) continue;
1310
1311 // Allow multiple uses of the same def.
1312 if (LiveRegDefs[*AliasI] == SU) continue;
1313
1314 // Allow multiple uses of same def
1315 if (Node && LiveRegDefs[*AliasI]->getNode() == Node)
1316 continue;
1317
1318 // Add Reg to the set of interfering live regs.
1319 if (RegAdded.insert(*AliasI).second) {
1320 LRegs.push_back(*AliasI);
1321 }
1322 }
1323}
1324
1325/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1326/// by RegMask, and add them to LRegs.
1327static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1328 ArrayRef<SUnit*> LiveRegDefs,
1329 SmallSet<unsigned, 4> &RegAdded,
1331 // Look at all live registers. Skip Reg0 and the special CallResource.
1332 for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1333 if (!LiveRegDefs[i]) continue;
1334 if (LiveRegDefs[i] == SU) continue;
1335 if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1336 if (RegAdded.insert(i).second)
1337 LRegs.push_back(i);
1338 }
1339}
1340
1341/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1342static const uint32_t *getNodeRegMask(const SDNode *N) {
1343 for (const SDValue &Op : N->op_values())
1344 if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1345 return RegOp->getRegMask();
1346 return nullptr;
1347}
1348
1349/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1350/// scheduling of the given node to satisfy live physical register dependencies.
1351/// If the specific node is the last one that's available to schedule, do
1352/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1353bool ScheduleDAGRRList::
1354DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1355 if (NumLiveRegs == 0)
1356 return false;
1357
1358 SmallSet<unsigned, 4> RegAdded;
1359 // If this node would clobber any "live" register, then it's not ready.
1360 //
1361 // If SU is the currently live definition of the same register that it uses,
1362 // then we are free to schedule it.
1363 for (SDep &Pred : SU->Preds) {
1364 if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
1365 CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
1366 RegAdded, LRegs, TRI);
1367 }
1368
1369 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1370 if (Node->getOpcode() == ISD::INLINEASM ||
1371 Node->getOpcode() == ISD::INLINEASM_BR) {
1372 // Inline asm can clobber physical defs.
1373 unsigned NumOps = Node->getNumOperands();
1374 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1375 --NumOps; // Ignore the glue operand.
1376
1377 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1378 unsigned Flags =
1379 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1380 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1381
1382 ++i; // Skip the ID value.
1383 if (InlineAsm::isRegDefKind(Flags) ||
1385 InlineAsm::isClobberKind(Flags)) {
1386 // Check for def of register or earlyclobber register.
1387 for (; NumVals; --NumVals, ++i) {
1388 Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1389 if (Reg.isPhysical())
1390 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1391 }
1392 } else
1393 i += NumVals;
1394 }
1395 continue;
1396 }
1397
1398 if (Node->getOpcode() == ISD::CopyToReg) {
1399 Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
1400 if (Reg.isPhysical()) {
1401 SDNode *SrcNode = Node->getOperand(2).getNode();
1402 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI,
1403 SrcNode);
1404 }
1405 }
1406
1407 if (!Node->isMachineOpcode())
1408 continue;
1409 // If we're in the middle of scheduling a call, don't begin scheduling
1410 // another call. Also, don't allow any physical registers to be live across
1411 // the call.
1412 if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
1413 // Check the special calling-sequence resource.
1414 unsigned CallResource = TRI->getNumRegs();
1415 if (LiveRegDefs[CallResource]) {
1416 SDNode *Gen = LiveRegGens[CallResource]->getNode();
1417 while (SDNode *Glued = Gen->getGluedNode())
1418 Gen = Glued;
1419 if (!IsChainDependent(Gen, Node, 0, TII) &&
1420 RegAdded.insert(CallResource).second)
1421 LRegs.push_back(CallResource);
1422 }
1423 }
1424 if (const uint32_t *RegMask = getNodeRegMask(Node))
1425 CheckForLiveRegDefMasked(SU, RegMask,
1426 ArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1427 RegAdded, LRegs);
1428
1429 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1430 if (MCID.hasOptionalDef()) {
1431 // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1432 // This operand can be either a def of CPSR, if the S bit is set; or a use
1433 // of %noreg. When the OptionalDef is set to a valid register, we need to
1434 // handle it in the same way as an ImplicitDef.
1435 for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
1436 if (MCID.operands()[i].isOptionalDef()) {
1437 const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
1438 Register Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
1439 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1440 }
1441 }
1442 for (MCPhysReg Reg : MCID.implicit_defs())
1443 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1444 }
1445
1446 return !LRegs.empty();
1447}
1448
1449void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1450 // Add the nodes that aren't ready back onto the available list.
1451 for (unsigned i = Interferences.size(); i > 0; --i) {
1452 SUnit *SU = Interferences[i-1];
1453 LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1454 if (Reg) {
1455 SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1456 if (!is_contained(LRegs, Reg))
1457 continue;
1458 }
1459 SU->isPending = false;
1460 // The interfering node may no longer be available due to backtracking.
1461 // Furthermore, it may have been made available again, in which case it is
1462 // now already in the AvailableQueue.
1463 if (SU->isAvailable && !SU->NodeQueueId) {
1464 LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
1465 AvailableQueue->push(SU);
1466 }
1467 if (i < Interferences.size())
1468 Interferences[i-1] = Interferences.back();
1469 Interferences.pop_back();
1470 LRegsMap.erase(LRegsPos);
1471 }
1472}
1473
1474/// Return a node that can be scheduled in this cycle. Requirements:
1475/// (1) Ready: latency has been satisfied
1476/// (2) No Hazards: resources are available
1477/// (3) No Interferences: may unschedule to break register interferences.
1478SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1479 SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1480 auto FindAvailableNode = [&]() {
1481 while (CurSU) {
1483 if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1484 break;
1485 LLVM_DEBUG(dbgs() << " Interfering reg ";
1486 if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
1487 else dbgs() << printReg(LRegs[0], TRI);
1488 dbgs() << " SU #" << CurSU->NodeNum << '\n');
1489 auto [LRegsIter, LRegsInserted] = LRegsMap.try_emplace(CurSU, LRegs);
1490 if (LRegsInserted) {
1491 CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1492 Interferences.push_back(CurSU);
1493 }
1494 else {
1495 assert(CurSU->isPending && "Interferences are pending");
1496 // Update the interference with current live regs.
1497 LRegsIter->second = LRegs;
1498 }
1499 CurSU = AvailableQueue->pop();
1500 }
1501 };
1502 FindAvailableNode();
1503 if (CurSU)
1504 return CurSU;
1505
1506 // We query the topological order in the loop body, so make sure outstanding
1507 // updates are applied before entering it (we only enter the loop if there
1508 // are some interferences). If we make changes to the ordering, we exit
1509 // the loop.
1510
1511 // All candidates are delayed due to live physical reg dependencies.
1512 // Try backtracking, code duplication, or inserting cross class copies
1513 // to resolve it.
1514 for (SUnit *TrySU : Interferences) {
1515 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1516
1517 // Try unscheduling up to the point where it's safe to schedule
1518 // this node.
1519 SUnit *BtSU = nullptr;
1520 unsigned LiveCycle = std::numeric_limits<unsigned>::max();
1521 for (unsigned Reg : LRegs) {
1522 if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1523 BtSU = LiveRegGens[Reg];
1524 LiveCycle = BtSU->getHeight();
1525 }
1526 }
1527 if (!WillCreateCycle(TrySU, BtSU)) {
1528 // BacktrackBottomUp mutates Interferences!
1529 BacktrackBottomUp(TrySU, BtSU);
1530
1531 // Force the current node to be scheduled before the node that
1532 // requires the physical reg dep.
1533 if (BtSU->isAvailable) {
1534 BtSU->isAvailable = false;
1535 if (!BtSU->isPending)
1536 AvailableQueue->remove(BtSU);
1537 }
1538 LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
1539 << ") to SU(" << TrySU->NodeNum << ")\n");
1540 AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial));
1541
1542 // If one or more successors has been unscheduled, then the current
1543 // node is no longer available.
1544 if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
1545 LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1546 CurSU = AvailableQueue->pop();
1547 } else {
1548 LLVM_DEBUG(dbgs() << "TrySU available\n");
1549 // Available and in AvailableQueue
1550 AvailableQueue->remove(TrySU);
1551 CurSU = TrySU;
1552 }
1553 FindAvailableNode();
1554 // Interferences has been mutated. We must break.
1555 break;
1556 }
1557 }
1558
1559 if (!CurSU) {
1560 // Can't backtrack. If it's too expensive to copy the value, then try
1561 // duplicate the nodes that produces these "too expensive to copy"
1562 // values to break the dependency. In case even that doesn't work,
1563 // insert cross class copies.
1564 // If it's not too expensive, i.e. cost != -1, issue copies.
1565 SUnit *TrySU = Interferences[0];
1566 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1567 assert(LRegs.size() == 1 && "Can't handle this yet!");
1568 unsigned Reg = LRegs[0];
1569 SUnit *LRDef = LiveRegDefs[Reg];
1570 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1571 const TargetRegisterClass *RC =
1572 TRI->getMinimalPhysRegClass(Reg, VT);
1573 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1574
1575 // If cross copy register class is the same as RC, then it must be possible
1576 // copy the value directly. Do not try duplicate the def.
1577 // If cross copy register class is not the same as RC, then it's possible to
1578 // copy the value but it require cross register class copies and it is
1579 // expensive.
1580 // If cross copy register class is null, then it's not possible to copy
1581 // the value at all.
1582 SUnit *NewDef = nullptr;
1583 if (DestRC != RC) {
1584 NewDef = CopyAndMoveSuccessors(LRDef);
1585 if (!DestRC && !NewDef)
1586 report_fatal_error("Can't handle live physical register dependency!");
1587 }
1588 if (!NewDef) {
1589 // Issue copies, these can be expensive cross register class copies.
1591 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1592 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1593 << " to SU #" << Copies.front()->NodeNum << "\n");
1594 AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial));
1595 NewDef = Copies.back();
1596 }
1597
1598 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1599 << " to SU #" << TrySU->NodeNum << "\n");
1600 LiveRegDefs[Reg] = NewDef;
1601 AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial));
1602 TrySU->isAvailable = false;
1603 CurSU = NewDef;
1604 }
1605 assert(CurSU && "Unable to resolve live physical register dependencies!");
1606 return CurSU;
1607}
1608
1609/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1610/// schedulers.
1611void ScheduleDAGRRList::ListScheduleBottomUp() {
1612 // Release any predecessors of the special Exit node.
1613 ReleasePredecessors(&ExitSU);
1614
1615 // Add root to Available queue.
1616 if (!SUnits.empty()) {
1617 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1618 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1619 RootSU->isAvailable = true;
1620 AvailableQueue->push(RootSU);
1621 }
1622
1623 // While Available queue is not empty, grab the node with the highest
1624 // priority. If it is not ready put it back. Schedule the node.
1625 Sequence.reserve(SUnits.size());
1626 while (!AvailableQueue->empty() || !Interferences.empty()) {
1627 LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
1628 AvailableQueue->dump(this));
1629
1630 // Pick the best node to schedule taking all constraints into
1631 // consideration.
1632 SUnit *SU = PickNodeToScheduleBottomUp();
1633
1634 AdvancePastStalls(SU);
1635
1636 ScheduleNodeBottomUp(SU);
1637
1638 while (AvailableQueue->empty() && !PendingQueue.empty()) {
1639 // Advance the cycle to free resources. Skip ahead to the next ready SU.
1640 assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
1641 "MinAvailableCycle uninitialized");
1642 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1643 }
1644 }
1645
1646 // Reverse the order if it is bottom up.
1647 std::reverse(Sequence.begin(), Sequence.end());
1648
1649#ifndef NDEBUG
1650 VerifyScheduledSequence(/*isBottomUp=*/true);
1651#endif
1652}
1653
1654namespace {
1655
1656class RegReductionPQBase;
1657
1658struct queue_sort {
1659 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1660};
1661
1662#ifndef NDEBUG
1663template<class SF>
1664struct reverse_sort : public queue_sort {
1665 SF &SortFunc;
1666
1667 reverse_sort(SF &sf) : SortFunc(sf) {}
1668
1669 bool operator()(SUnit* left, SUnit* right) const {
1670 // reverse left/right rather than simply !SortFunc(left, right)
1671 // to expose different paths in the comparison logic.
1672 return SortFunc(right, left);
1673 }
1674};
1675#endif // NDEBUG
1676
1677/// bu_ls_rr_sort - Priority function for bottom up register pressure
1678// reduction scheduler.
1679struct bu_ls_rr_sort : public queue_sort {
1680 enum {
1681 IsBottomUp = true,
1682 HasReadyFilter = false
1683 };
1684
1685 RegReductionPQBase *SPQ;
1686
1687 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1688
1689 bool operator()(SUnit* left, SUnit* right) const;
1690};
1691
1692// src_ls_rr_sort - Priority function for source order scheduler.
1693struct src_ls_rr_sort : public queue_sort {
1694 enum {
1695 IsBottomUp = true,
1696 HasReadyFilter = false
1697 };
1698
1699 RegReductionPQBase *SPQ;
1700
1701 src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1702
1703 bool operator()(SUnit* left, SUnit* right) const;
1704};
1705
1706// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1707struct hybrid_ls_rr_sort : public queue_sort {
1708 enum {
1709 IsBottomUp = true,
1710 HasReadyFilter = false
1711 };
1712
1713 RegReductionPQBase *SPQ;
1714
1715 hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1716
1717 bool isReady(SUnit *SU, unsigned CurCycle) const;
1718
1719 bool operator()(SUnit* left, SUnit* right) const;
1720};
1721
1722// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1723// scheduler.
1724struct ilp_ls_rr_sort : public queue_sort {
1725 enum {
1726 IsBottomUp = true,
1727 HasReadyFilter = false
1728 };
1729
1730 RegReductionPQBase *SPQ;
1731
1732 ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1733
1734 bool isReady(SUnit *SU, unsigned CurCycle) const;
1735
1736 bool operator()(SUnit* left, SUnit* right) const;
1737};
1738
1739class RegReductionPQBase : public SchedulingPriorityQueue {
1740protected:
1741 std::vector<SUnit *> Queue;
1742 unsigned CurQueueId = 0;
1743 bool TracksRegPressure;
1744 bool SrcOrder;
1745
1746 // SUnits - The SUnits for the current graph.
1747 std::vector<SUnit> *SUnits;
1748
1749 MachineFunction &MF;
1750 const TargetInstrInfo *TII;
1751 const TargetRegisterInfo *TRI;
1752 const TargetLowering *TLI;
1753 ScheduleDAGRRList *scheduleDAG = nullptr;
1754
1755 // SethiUllmanNumbers - The SethiUllman number for each node.
1756 std::vector<unsigned> SethiUllmanNumbers;
1757
1758 /// RegPressure - Tracking current reg pressure per register class.
1759 std::vector<unsigned> RegPressure;
1760
1761 /// RegLimit - Tracking the number of allocatable registers per register
1762 /// class.
1763 std::vector<unsigned> RegLimit;
1764
1765public:
1766 RegReductionPQBase(MachineFunction &mf,
1767 bool hasReadyFilter,
1768 bool tracksrp,
1769 bool srcorder,
1770 const TargetInstrInfo *tii,
1771 const TargetRegisterInfo *tri,
1772 const TargetLowering *tli)
1773 : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
1774 SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
1775 if (TracksRegPressure) {
1776 unsigned NumRC = TRI->getNumRegClasses();
1777 RegLimit.resize(NumRC);
1778 RegPressure.resize(NumRC);
1779 std::fill(RegLimit.begin(), RegLimit.end(), 0);
1780 std::fill(RegPressure.begin(), RegPressure.end(), 0);
1781 for (const TargetRegisterClass *RC : TRI->regclasses())
1782 RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
1783 }
1784 }
1785
1786 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1787 scheduleDAG = scheduleDag;
1788 }
1789
1790 ScheduleHazardRecognizer* getHazardRec() {
1791 return scheduleDAG->getHazardRec();
1792 }
1793
1794 void initNodes(std::vector<SUnit> &sunits) override;
1795
1796 void addNode(const SUnit *SU) override;
1797
1798 void updateNode(const SUnit *SU) override;
1799
1800 void releaseState() override {
1801 SUnits = nullptr;
1802 SethiUllmanNumbers.clear();
1803 std::fill(RegPressure.begin(), RegPressure.end(), 0);
1804 }
1805
1806 unsigned getNodePriority(const SUnit *SU) const;
1807
1808 unsigned getNodeOrdering(const SUnit *SU) const {
1809 if (!SU->getNode()) return 0;
1810
1811 return SU->getNode()->getIROrder();
1812 }
1813
1814 bool empty() const override { return Queue.empty(); }
1815
1816 void push(SUnit *U) override {
1817 assert(!U->NodeQueueId && "Node in the queue already");
1818 U->NodeQueueId = ++CurQueueId;
1819 Queue.push_back(U);
1820 }
1821
1822 void remove(SUnit *SU) override {
1823 assert(!Queue.empty() && "Queue is empty!");
1824 assert(SU->NodeQueueId != 0 && "Not in queue!");
1825 std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
1826 if (I != std::prev(Queue.end()))
1827 std::swap(*I, Queue.back());
1828 Queue.pop_back();
1829 SU->NodeQueueId = 0;
1830 }
1831
1832 bool tracksRegPressure() const override { return TracksRegPressure; }
1833
1834 void dumpRegPressure() const;
1835
1836 bool HighRegPressure(const SUnit *SU) const;
1837
1838 bool MayReduceRegPressure(SUnit *SU) const;
1839
1840 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1841
1842 void scheduledNode(SUnit *SU) override;
1843
1844 void unscheduledNode(SUnit *SU) override;
1845
1846protected:
1847 bool canClobber(const SUnit *SU, const SUnit *Op);
1848 void AddPseudoTwoAddrDeps();
1849 void PrescheduleNodesWithMultipleUses();
1850 void CalculateSethiUllmanNumbers();
1851};
1852
1853template<class SF>
1854static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
1855 unsigned BestIdx = 0;
1856 // Only compute the cost for the first 1000 items in the queue, to avoid
1857 // excessive compile-times for very large queues.
1858 for (unsigned I = 1, E = std::min(Q.size(), (decltype(Q.size()))1000); I != E;
1859 I++)
1860 if (Picker(Q[BestIdx], Q[I]))
1861 BestIdx = I;
1862 SUnit *V = Q[BestIdx];
1863 if (BestIdx + 1 != Q.size())
1864 std::swap(Q[BestIdx], Q.back());
1865 Q.pop_back();
1866 return V;
1867}
1868
1869template<class SF>
1870SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
1871#ifndef NDEBUG
1872 if (DAG->StressSched) {
1873 reverse_sort<SF> RPicker(Picker);
1874 return popFromQueueImpl(Q, RPicker);
1875 }
1876#endif
1877 (void)DAG;
1878 return popFromQueueImpl(Q, Picker);
1879}
1880
1881//===----------------------------------------------------------------------===//
1882// RegReductionPriorityQueue Definition
1883//===----------------------------------------------------------------------===//
1884//
1885// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1886// to reduce register pressure.
1887//
1888template<class SF>
1889class RegReductionPriorityQueue : public RegReductionPQBase {
1890 SF Picker;
1891
1892public:
1893 RegReductionPriorityQueue(MachineFunction &mf,
1894 bool tracksrp,
1895 bool srcorder,
1896 const TargetInstrInfo *tii,
1897 const TargetRegisterInfo *tri,
1898 const TargetLowering *tli)
1899 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1900 tii, tri, tli),
1901 Picker(this) {}
1902
1903 bool isBottomUp() const override { return SF::IsBottomUp; }
1904
1905 bool isReady(SUnit *U) const override {
1906 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1907 }
1908
1909 SUnit *pop() override {
1910 if (Queue.empty()) return nullptr;
1911
1912 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1913 V->NodeQueueId = 0;
1914 return V;
1915 }
1916
1917#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1918 LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
1919 // Emulate pop() without clobbering NodeQueueIds.
1920 std::vector<SUnit *> DumpQueue = Queue;
1921 SF DumpPicker = Picker;
1922 while (!DumpQueue.empty()) {
1923 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1924 dbgs() << "Height " << SU->getHeight() << ": ";
1925 DAG->dumpNode(*SU);
1926 }
1927 }
1928#endif
1929};
1930
1931using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
1932using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
1933using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
1934using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
1935
1936} // end anonymous namespace
1937
1938//===----------------------------------------------------------------------===//
1939// Static Node Priority for Register Pressure Reduction
1940//===----------------------------------------------------------------------===//
1941
1942// Check for special nodes that bypass scheduling heuristics.
1943// Currently this pushes TokenFactor nodes down, but may be used for other
1944// pseudo-ops as well.
1945//
1946// Return -1 to schedule right above left, 1 for left above right.
1947// Return 0 if no bias exists.
1948static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1949 bool LSchedLow = left->isScheduleLow;
1950 bool RSchedLow = right->isScheduleLow;
1951 if (LSchedLow != RSchedLow)
1952 return LSchedLow < RSchedLow ? 1 : -1;
1953 return 0;
1954}
1955
1956/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1957/// Smaller number is the higher priority.
1958static unsigned
1959CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1960 if (SUNumbers[SU->NodeNum] != 0)
1961 return SUNumbers[SU->NodeNum];
1962
1963 // Use WorkList to avoid stack overflow on excessively large IRs.
1964 struct WorkState {
1965 WorkState(const SUnit *SU) : SU(SU) {}
1966 const SUnit *SU;
1967 unsigned PredsProcessed = 0;
1968 };
1969
1971 WorkList.push_back(SU);
1972 while (!WorkList.empty()) {
1973 auto &Temp = WorkList.back();
1974 auto *TempSU = Temp.SU;
1975 bool AllPredsKnown = true;
1976 // Try to find a non-evaluated pred and push it into the processing stack.
1977 for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
1978 auto &Pred = TempSU->Preds[P];
1979 if (Pred.isCtrl()) continue; // ignore chain preds
1980 SUnit *PredSU = Pred.getSUnit();
1981 if (SUNumbers[PredSU->NodeNum] == 0) {
1982#ifndef NDEBUG
1983 // In debug mode, check that we don't have such element in the stack.
1984 for (auto It : WorkList)
1985 assert(It.SU != PredSU && "Trying to push an element twice?");
1986#endif
1987 // Next time start processing this one starting from the next pred.
1988 Temp.PredsProcessed = P + 1;
1989 WorkList.push_back(PredSU);
1990 AllPredsKnown = false;
1991 break;
1992 }
1993 }
1994
1995 if (!AllPredsKnown)
1996 continue;
1997
1998 // Once all preds are known, we can calculate the answer for this one.
1999 unsigned SethiUllmanNumber = 0;
2000 unsigned Extra = 0;
2001 for (const SDep &Pred : TempSU->Preds) {
2002 if (Pred.isCtrl()) continue; // ignore chain preds
2003 SUnit *PredSU = Pred.getSUnit();
2004 unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
2005 assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
2006 if (PredSethiUllman > SethiUllmanNumber) {
2007 SethiUllmanNumber = PredSethiUllman;
2008 Extra = 0;
2009 } else if (PredSethiUllman == SethiUllmanNumber)
2010 ++Extra;
2011 }
2012
2013 SethiUllmanNumber += Extra;
2014 if (SethiUllmanNumber == 0)
2015 SethiUllmanNumber = 1;
2016 SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
2017 WorkList.pop_back();
2018 }
2019
2020 assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
2021 return SUNumbers[SU->NodeNum];
2022}
2023
2024/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
2025/// scheduling units.
2026void RegReductionPQBase::CalculateSethiUllmanNumbers() {
2027 SethiUllmanNumbers.assign(SUnits->size(), 0);
2028
2029 for (const SUnit &SU : *SUnits)
2030 CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
2031}
2032
2033void RegReductionPQBase::addNode(const SUnit *SU) {
2034 unsigned SUSize = SethiUllmanNumbers.size();
2035 if (SUnits->size() > SUSize)
2036 SethiUllmanNumbers.resize(SUSize*2, 0);
2037 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2038}
2039
2040void RegReductionPQBase::updateNode(const SUnit *SU) {
2041 SethiUllmanNumbers[SU->NodeNum] = 0;
2042 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2043}
2044
2045// Lower priority means schedule further down. For bottom-up scheduling, lower
2046// priority SUs are scheduled before higher priority SUs.
2047unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
2048 assert(SU->NodeNum < SethiUllmanNumbers.size());
2049 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2050 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2051 // CopyToReg should be close to its uses to facilitate coalescing and
2052 // avoid spilling.
2053 return 0;
2054 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2055 Opc == TargetOpcode::SUBREG_TO_REG ||
2056 Opc == TargetOpcode::INSERT_SUBREG)
2057 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2058 // close to their uses to facilitate coalescing.
2059 return 0;
2060 if (SU->NumSuccs == 0 && SU->NumPreds != 0)
2061 // If SU does not have a register use, i.e. it doesn't produce a value
2062 // that would be consumed (e.g. store), then it terminates a chain of
2063 // computation. Give it a large SethiUllman number so it will be
2064 // scheduled right before its predecessors that it doesn't lengthen
2065 // their live ranges.
2066 return 0xffff;
2067 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2068 // If SU does not have a register def, schedule it close to its uses
2069 // because it does not lengthen any live ranges.
2070 return 0;
2071#if 1
2072 return SethiUllmanNumbers[SU->NodeNum];
2073#else
2074 unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
2075 if (SU->isCallOp) {
2076 // FIXME: This assumes all of the defs are used as call operands.
2077 int NP = (int)Priority - SU->getNode()->getNumValues();
2078 return (NP > 0) ? NP : 0;
2079 }
2080 return Priority;
2081#endif
2082}
2083
2084//===----------------------------------------------------------------------===//
2085// Register Pressure Tracking
2086//===----------------------------------------------------------------------===//
2087
2088#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2089LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
2090 for (const TargetRegisterClass *RC : TRI->regclasses()) {
2091 unsigned Id = RC->getID();
2092 unsigned RP = RegPressure[Id];
2093 if (!RP) continue;
2094 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
2095 << RegLimit[Id] << '\n');
2096 }
2097}
2098#endif
2099
2100bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
2101 if (!TLI)
2102 return false;
2103
2104 for (const SDep &Pred : SU->Preds) {
2105 if (Pred.isCtrl())
2106 continue;
2107 SUnit *PredSU = Pred.getSUnit();
2108 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2109 // to cover the number of registers defined (they are all live).
2110 if (PredSU->NumRegDefsLeft == 0) {
2111 continue;
2112 }
2113 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2114 RegDefPos.IsValid(); RegDefPos.Advance()) {
2115 unsigned RCId, Cost;
2116 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2117
2118 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
2119 return true;
2120 }
2121 }
2122 return false;
2123}
2124
2125bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
2126 const SDNode *N = SU->getNode();
2127
2128 if (!N->isMachineOpcode() || !SU->NumSuccs)
2129 return false;
2130
2131 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2132 for (unsigned i = 0; i != NumDefs; ++i) {
2133 MVT VT = N->getSimpleValueType(i);
2134 if (!N->hasAnyUseOfValue(i))
2135 continue;
2136 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2137 if (RegPressure[RCId] >= RegLimit[RCId])
2138 return true;
2139 }
2140 return false;
2141}
2142
2143// Compute the register pressure contribution by this instruction by count up
2144// for uses that are not live and down for defs. Only count register classes
2145// that are already under high pressure. As a side effect, compute the number of
2146// uses of registers that are already live.
2147//
2148// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2149// so could probably be factored.
2150int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
2151 LiveUses = 0;
2152 int PDiff = 0;
2153 for (const SDep &Pred : SU->Preds) {
2154 if (Pred.isCtrl())
2155 continue;
2156 SUnit *PredSU = Pred.getSUnit();
2157 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2158 // to cover the number of registers defined (they are all live).
2159 if (PredSU->NumRegDefsLeft == 0) {
2160 if (PredSU->getNode()->isMachineOpcode())
2161 ++LiveUses;
2162 continue;
2163 }
2164 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2165 RegDefPos.IsValid(); RegDefPos.Advance()) {
2166 MVT VT = RegDefPos.GetValue();
2167 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2168 if (RegPressure[RCId] >= RegLimit[RCId])
2169 ++PDiff;
2170 }
2171 }
2172 const SDNode *N = SU->getNode();
2173
2174 if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2175 return PDiff;
2176
2177 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2178 for (unsigned i = 0; i != NumDefs; ++i) {
2179 MVT VT = N->getSimpleValueType(i);
2180 if (!N->hasAnyUseOfValue(i))
2181 continue;
2182 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2183 if (RegPressure[RCId] >= RegLimit[RCId])
2184 --PDiff;
2185 }
2186 return PDiff;
2187}
2188
2189void RegReductionPQBase::scheduledNode(SUnit *SU) {
2190 if (!TracksRegPressure)
2191 return;
2192
2193 if (!SU->getNode())
2194 return;
2195
2196 for (const SDep &Pred : SU->Preds) {
2197 if (Pred.isCtrl())
2198 continue;
2199 SUnit *PredSU = Pred.getSUnit();
2200 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2201 // to cover the number of registers defined (they are all live).
2202 if (PredSU->NumRegDefsLeft == 0) {
2203 continue;
2204 }
2205 // FIXME: The ScheduleDAG currently loses information about which of a
2206 // node's values is consumed by each dependence. Consequently, if the node
2207 // defines multiple register classes, we don't know which to pressurize
2208 // here. Instead the following loop consumes the register defs in an
2209 // arbitrary order. At least it handles the common case of clustered loads
2210 // to the same class. For precise liveness, each SDep needs to indicate the
2211 // result number. But that tightly couples the ScheduleDAG with the
2212 // SelectionDAG making updates tricky. A simpler hack would be to attach a
2213 // value type or register class to SDep.
2214 //
2215 // The most important aspect of register tracking is balancing the increase
2216 // here with the reduction further below. Note that this SU may use multiple
2217 // defs in PredSU. The can't be determined here, but we've already
2218 // compensated by reducing NumRegDefsLeft in PredSU during
2219 // ScheduleDAGSDNodes::AddSchedEdges.
2220 --PredSU->NumRegDefsLeft;
2221 unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2222 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2223 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2224 if (SkipRegDefs)
2225 continue;
2226
2227 unsigned RCId, Cost;
2228 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2229 RegPressure[RCId] += Cost;
2230 break;
2231 }
2232 }
2233
2234 // We should have this assert, but there may be dead SDNodes that never
2235 // materialize as SUnits, so they don't appear to generate liveness.
2236 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2237 int SkipRegDefs = (int)SU->NumRegDefsLeft;
2238 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2239 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2240 if (SkipRegDefs > 0)
2241 continue;
2242 unsigned RCId, Cost;
2243 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2244 if (RegPressure[RCId] < Cost) {
2245 // Register pressure tracking is imprecise. This can happen. But we try
2246 // hard not to let it happen because it likely results in poor scheduling.
2247 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
2248 << ") has too many regdefs\n");
2249 RegPressure[RCId] = 0;
2250 }
2251 else {
2252 RegPressure[RCId] -= Cost;
2253 }
2254 }
2255 LLVM_DEBUG(dumpRegPressure());
2256}
2257
2258void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2259 if (!TracksRegPressure)
2260 return;
2261
2262 const SDNode *N = SU->getNode();
2263 if (!N) return;
2264
2265 if (!N->isMachineOpcode()) {
2266 if (N->getOpcode() != ISD::CopyToReg)
2267 return;
2268 } else {
2269 unsigned Opc = N->getMachineOpcode();
2270 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2271 Opc == TargetOpcode::INSERT_SUBREG ||
2272 Opc == TargetOpcode::SUBREG_TO_REG ||
2273 Opc == TargetOpcode::REG_SEQUENCE ||
2274 Opc == TargetOpcode::IMPLICIT_DEF)
2275 return;
2276 }
2277
2278 for (const SDep &Pred : SU->Preds) {
2279 if (Pred.isCtrl())
2280 continue;
2281 SUnit *PredSU = Pred.getSUnit();
2282 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2283 // counts data deps.
2284 if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2285 continue;
2286 const SDNode *PN = PredSU->getNode();
2287 if (!PN->isMachineOpcode()) {
2288 if (PN->getOpcode() == ISD::CopyFromReg) {
2289 MVT VT = PN->getSimpleValueType(0);
2290 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2291 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2292 }
2293 continue;
2294 }
2295 unsigned POpc = PN->getMachineOpcode();
2296 if (POpc == TargetOpcode::IMPLICIT_DEF)
2297 continue;
2298 if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2299 POpc == TargetOpcode::INSERT_SUBREG ||
2300 POpc == TargetOpcode::SUBREG_TO_REG) {
2301 MVT VT = PN->getSimpleValueType(0);
2302 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2303 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2304 continue;
2305 }
2306 if (POpc == TargetOpcode::REG_SEQUENCE) {
2307 unsigned DstRCIdx =
2308 cast<ConstantSDNode>(PN->getOperand(0))->getZExtValue();
2309 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
2310 unsigned RCId = RC->getID();
2311 // REG_SEQUENCE is untyped, so getRepRegClassCostFor could not be used
2312 // here. Instead use the same constant as in GetCostForDef.
2314 continue;
2315 }
2316 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2317 for (unsigned i = 0; i != NumDefs; ++i) {
2318 MVT VT = PN->getSimpleValueType(i);
2319 if (!PN->hasAnyUseOfValue(i))
2320 continue;
2321 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2322 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2323 // Register pressure tracking is imprecise. This can happen.
2324 RegPressure[RCId] = 0;
2325 else
2326 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2327 }
2328 }
2329
2330 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2331 // may transfer data dependencies to CopyToReg.
2332 if (SU->NumSuccs && N->isMachineOpcode()) {
2333 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2334 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2335 MVT VT = N->getSimpleValueType(i);
2336 if (VT == MVT::Glue || VT == MVT::Other)
2337 continue;
2338 if (!N->hasAnyUseOfValue(i))
2339 continue;
2340 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2341 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2342 }
2343 }
2344
2345 LLVM_DEBUG(dumpRegPressure());
2346}
2347
2348//===----------------------------------------------------------------------===//
2349// Dynamic Node Priority for Register Pressure Reduction
2350//===----------------------------------------------------------------------===//
2351
2352/// closestSucc - Returns the scheduled cycle of the successor which is
2353/// closest to the current cycle.
2354static unsigned closestSucc(const SUnit *SU) {
2355 unsigned MaxHeight = 0;
2356 for (const SDep &Succ : SU->Succs) {
2357 if (Succ.isCtrl()) continue; // ignore chain succs
2358 unsigned Height = Succ.getSUnit()->getHeight();
2359 // If there are bunch of CopyToRegs stacked up, they should be considered
2360 // to be at the same position.
2361 if (Succ.getSUnit()->getNode() &&
2362 Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2363 Height = closestSucc(Succ.getSUnit())+1;
2364 if (Height > MaxHeight)
2365 MaxHeight = Height;
2366 }
2367 return MaxHeight;
2368}
2369
2370/// calcMaxScratches - Returns an cost estimate of the worse case requirement
2371/// for scratch registers, i.e. number of data dependencies.
2372static unsigned calcMaxScratches(const SUnit *SU) {
2373 unsigned Scratches = 0;
2374 for (const SDep &Pred : SU->Preds) {
2375 if (Pred.isCtrl()) continue; // ignore chain preds
2376 Scratches++;
2377 }
2378 return Scratches;
2379}
2380
2381/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2382/// CopyFromReg from a virtual register.
2383static bool hasOnlyLiveInOpers(const SUnit *SU) {
2384 bool RetVal = false;
2385 for (const SDep &Pred : SU->Preds) {
2386 if (Pred.isCtrl()) continue;
2387 const SUnit *PredSU = Pred.getSUnit();
2388 if (PredSU->getNode() &&
2389 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2390 Register Reg =
2391 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2392 if (Reg.isVirtual()) {
2393 RetVal = true;
2394 continue;
2395 }
2396 }
2397 return false;
2398 }
2399 return RetVal;
2400}
2401
2402/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2403/// CopyToReg to a virtual register. This SU def is probably a liveout and
2404/// it has no other use. It should be scheduled closer to the terminator.
2405static bool hasOnlyLiveOutUses(const SUnit *SU) {
2406 bool RetVal = false;
2407 for (const SDep &Succ : SU->Succs) {
2408 if (Succ.isCtrl()) continue;
2409 const SUnit *SuccSU = Succ.getSUnit();
2410 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2411 Register Reg =
2412 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2413 if (Reg.isVirtual()) {
2414 RetVal = true;
2415 continue;
2416 }
2417 }
2418 return false;
2419 }
2420 return RetVal;
2421}
2422
2423// Set isVRegCycle for a node with only live in opers and live out uses. Also
2424// set isVRegCycle for its CopyFromReg operands.
2425//
2426// This is only relevant for single-block loops, in which case the VRegCycle
2427// node is likely an induction variable in which the operand and target virtual
2428// registers should be coalesced (e.g. pre/post increment values). Setting the
2429// isVRegCycle flag helps the scheduler prioritize other uses of the same
2430// CopyFromReg so that this node becomes the virtual register "kill". This
2431// avoids interference between the values live in and out of the block and
2432// eliminates a copy inside the loop.
2433static void initVRegCycle(SUnit *SU) {
2435 return;
2436
2437 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2438 return;
2439
2440 LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2441
2442 SU->isVRegCycle = true;
2443
2444 for (const SDep &Pred : SU->Preds) {
2445 if (Pred.isCtrl()) continue;
2446 Pred.getSUnit()->isVRegCycle = true;
2447 }
2448}
2449
2450// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2451// CopyFromReg operands. We should no longer penalize other uses of this VReg.
2452static void resetVRegCycle(SUnit *SU) {
2453 if (!SU->isVRegCycle)
2454 return;
2455
2456 for (const SDep &Pred : SU->Preds) {
2457 if (Pred.isCtrl()) continue; // ignore chain preds
2458 SUnit *PredSU = Pred.getSUnit();
2459 if (PredSU->isVRegCycle) {
2460 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2461 "VRegCycle def must be CopyFromReg");
2462 Pred.getSUnit()->isVRegCycle = false;
2463 }
2464 }
2465}
2466
2467// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2468// means a node that defines the VRegCycle has not been scheduled yet.
2469static bool hasVRegCycleUse(const SUnit *SU) {
2470 // If this SU also defines the VReg, don't hoist it as a "use".
2471 if (SU->isVRegCycle)
2472 return false;
2473
2474 for (const SDep &Pred : SU->Preds) {
2475 if (Pred.isCtrl()) continue; // ignore chain preds
2476 if (Pred.getSUnit()->isVRegCycle &&
2477 Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2478 LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2479 return true;
2480 }
2481 }
2482 return false;
2483}
2484
2485// Check for either a dependence (latency) or resource (hazard) stall.
2486//
2487// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2488static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2489 if ((int)SPQ->getCurCycle() < Height) return true;
2490 if (SPQ->getHazardRec()->getHazardType(SU, 0)
2492 return true;
2493 return false;
2494}
2495
2496// Return -1 if left has higher priority, 1 if right has higher priority.
2497// Return 0 if latency-based priority is equivalent.
2498static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2499 RegReductionPQBase *SPQ) {
2500 // Scheduling an instruction that uses a VReg whose postincrement has not yet
2501 // been scheduled will induce a copy. Model this as an extra cycle of latency.
2502 int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2503 int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2504 int LHeight = (int)left->getHeight() + LPenalty;
2505 int RHeight = (int)right->getHeight() + RPenalty;
2506
2507 bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2508 BUHasStall(left, LHeight, SPQ);
2509 bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2510 BUHasStall(right, RHeight, SPQ);
2511
2512 // If scheduling one of the node will cause a pipeline stall, delay it.
2513 // If scheduling either one of the node will cause a pipeline stall, sort
2514 // them according to their height.
2515 if (LStall) {
2516 if (!RStall)
2517 return 1;
2518 if (LHeight != RHeight)
2519 return LHeight > RHeight ? 1 : -1;
2520 } else if (RStall)
2521 return -1;
2522
2523 // If either node is scheduling for latency, sort them by height/depth
2524 // and latency.
2525 if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2526 right->SchedulingPref == Sched::ILP)) {
2527 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2528 // is enabled, grouping instructions by cycle, then its height is already
2529 // covered so only its depth matters. We also reach this point if both stall
2530 // but have the same height.
2531 if (!SPQ->getHazardRec()->isEnabled()) {
2532 if (LHeight != RHeight)
2533 return LHeight > RHeight ? 1 : -1;
2534 }
2535 int LDepth = left->getDepth() - LPenalty;
2536 int RDepth = right->getDepth() - RPenalty;
2537 if (LDepth != RDepth) {
2538 LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2539 << ") depth " << LDepth << " vs SU (" << right->NodeNum
2540 << ") depth " << RDepth << "\n");
2541 return LDepth < RDepth ? 1 : -1;
2542 }
2543 if (left->Latency != right->Latency)
2544 return left->Latency > right->Latency ? 1 : -1;
2545 }
2546 return 0;
2547}
2548
2549static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2550 // Schedule physical register definitions close to their use. This is
2551 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2552 // long as shortening physreg live ranges is generally good, we can defer
2553 // creating a subtarget hook.
2555 bool LHasPhysReg = left->hasPhysRegDefs;
2556 bool RHasPhysReg = right->hasPhysRegDefs;
2557 if (LHasPhysReg != RHasPhysReg) {
2558 #ifndef NDEBUG
2559 static const char *const PhysRegMsg[] = { " has no physreg",
2560 " defines a physreg" };
2561 #endif
2562 LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2563 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
2564 << ") " << PhysRegMsg[RHasPhysReg] << "\n");
2565 return LHasPhysReg < RHasPhysReg;
2566 }
2567 }
2568
2569 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2570 unsigned LPriority = SPQ->getNodePriority(left);
2571 unsigned RPriority = SPQ->getNodePriority(right);
2572
2573 // Be really careful about hoisting call operands above previous calls.
2574 // Only allows it if it would reduce register pressure.
2575 if (left->isCall && right->isCallOp) {
2576 unsigned RNumVals = right->getNode()->getNumValues();
2577 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2578 }
2579 if (right->isCall && left->isCallOp) {
2580 unsigned LNumVals = left->getNode()->getNumValues();
2581 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2582 }
2583
2584 if (LPriority != RPriority)
2585 return LPriority > RPriority;
2586
2587 // One or both of the nodes are calls and their sethi-ullman numbers are the
2588 // same, then keep source order.
2589 if (left->isCall || right->isCall) {
2590 unsigned LOrder = SPQ->getNodeOrdering(left);
2591 unsigned ROrder = SPQ->getNodeOrdering(right);
2592
2593 // Prefer an ordering where the lower the non-zero order number, the higher
2594 // the preference.
2595 if ((LOrder || ROrder) && LOrder != ROrder)
2596 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2597 }
2598
2599 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2600 // e.g.
2601 // t1 = op t2, c1
2602 // t3 = op t4, c2
2603 //
2604 // and the following instructions are both ready.
2605 // t2 = op c3
2606 // t4 = op c4
2607 //
2608 // Then schedule t2 = op first.
2609 // i.e.
2610 // t4 = op c4
2611 // t2 = op c3
2612 // t1 = op t2, c1
2613 // t3 = op t4, c2
2614 //
2615 // This creates more short live intervals.
2616 unsigned LDist = closestSucc(left);
2617 unsigned RDist = closestSucc(right);
2618 if (LDist != RDist)
2619 return LDist < RDist;
2620
2621 // How many registers becomes live when the node is scheduled.
2622 unsigned LScratch = calcMaxScratches(left);
2623 unsigned RScratch = calcMaxScratches(right);
2624 if (LScratch != RScratch)
2625 return LScratch > RScratch;
2626
2627 // Comparing latency against a call makes little sense unless the node
2628 // is register pressure-neutral.
2629 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2630 return (left->NodeQueueId > right->NodeQueueId);
2631
2632 // Do not compare latencies when one or both of the nodes are calls.
2633 if (!DisableSchedCycles &&
2634 !(left->isCall || right->isCall)) {
2635 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2636 if (result != 0)
2637 return result > 0;
2638 }
2639 else {
2640 if (left->getHeight() != right->getHeight())
2641 return left->getHeight() > right->getHeight();
2642
2643 if (left->getDepth() != right->getDepth())
2644 return left->getDepth() < right->getDepth();
2645 }
2646
2647 assert(left->NodeQueueId && right->NodeQueueId &&
2648 "NodeQueueId cannot be zero");
2649 return (left->NodeQueueId > right->NodeQueueId);
2650}
2651
2652// Bottom up
2653bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2654 if (int res = checkSpecialNodes(left, right))
2655 return res > 0;
2656
2657 return BURRSort(left, right, SPQ);
2658}
2659
2660// Source order, otherwise bottom up.
2661bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2662 if (int res = checkSpecialNodes(left, right))
2663 return res > 0;
2664
2665 unsigned LOrder = SPQ->getNodeOrdering(left);
2666 unsigned ROrder = SPQ->getNodeOrdering(right);
2667
2668 // Prefer an ordering where the lower the non-zero order number, the higher
2669 // the preference.
2670 if ((LOrder || ROrder) && LOrder != ROrder)
2671 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2672
2673 return BURRSort(left, right, SPQ);
2674}
2675
2676// If the time between now and when the instruction will be ready can cover
2677// the spill code, then avoid adding it to the ready queue. This gives long
2678// stalls highest priority and allows hoisting across calls. It should also
2679// speed up processing the available queue.
2680bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2681 static const unsigned ReadyDelay = 3;
2682
2683 if (SPQ->MayReduceRegPressure(SU)) return true;
2684
2685 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2686
2687 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2689 return false;
2690
2691 return true;
2692}
2693
2694// Return true if right should be scheduled with higher priority than left.
2695bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2696 if (int res = checkSpecialNodes(left, right))
2697 return res > 0;
2698
2699 if (left->isCall || right->isCall)
2700 // No way to compute latency of calls.
2701 return BURRSort(left, right, SPQ);
2702
2703 bool LHigh = SPQ->HighRegPressure(left);
2704 bool RHigh = SPQ->HighRegPressure(right);
2705 // Avoid causing spills. If register pressure is high, schedule for
2706 // register pressure reduction.
2707 if (LHigh && !RHigh) {
2708 LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2709 << right->NodeNum << ")\n");
2710 return true;
2711 }
2712 else if (!LHigh && RHigh) {
2713 LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2714 << left->NodeNum << ")\n");
2715 return false;
2716 }
2717 if (!LHigh && !RHigh) {
2718 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2719 if (result != 0)
2720 return result > 0;
2721 }
2722 return BURRSort(left, right, SPQ);
2723}
2724
2725// Schedule as many instructions in each cycle as possible. So don't make an
2726// instruction available unless it is ready in the current cycle.
2727bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2728 if (SU->getHeight() > CurCycle) return false;
2729
2730 if (SPQ->getHazardRec()->getHazardType(SU, 0)
2732 return false;
2733
2734 return true;
2735}
2736
2737static bool canEnableCoalescing(SUnit *SU) {
2738 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2739 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2740 // CopyToReg should be close to its uses to facilitate coalescing and
2741 // avoid spilling.
2742 return true;
2743
2744 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2745 Opc == TargetOpcode::SUBREG_TO_REG ||
2746 Opc == TargetOpcode::INSERT_SUBREG)
2747 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2748 // close to their uses to facilitate coalescing.
2749 return true;
2750
2751 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2752 // If SU does not have a register def, schedule it close to its uses
2753 // because it does not lengthen any live ranges.
2754 return true;
2755
2756 return false;
2757}
2758
2759// list-ilp is currently an experimental scheduler that allows various
2760// heuristics to be enabled prior to the normal register reduction logic.
2761bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2762 if (int res = checkSpecialNodes(left, right))
2763 return res > 0;
2764
2765 if (left->isCall || right->isCall)
2766 // No way to compute latency of calls.
2767 return BURRSort(left, right, SPQ);
2768
2769 unsigned LLiveUses = 0, RLiveUses = 0;
2770 int LPDiff = 0, RPDiff = 0;
2772 LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2773 RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2774 }
2775 if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2776 LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
2777 << "): " << LPDiff << " != SU(" << right->NodeNum
2778 << "): " << RPDiff << "\n");
2779 return LPDiff > RPDiff;
2780 }
2781
2782 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2783 bool LReduce = canEnableCoalescing(left);
2784 bool RReduce = canEnableCoalescing(right);
2785 if (LReduce && !RReduce) return false;
2786 if (RReduce && !LReduce) return true;
2787 }
2788
2789 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2790 LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2791 << " != SU(" << right->NodeNum << "): " << RLiveUses
2792 << "\n");
2793 return LLiveUses < RLiveUses;
2794 }
2795
2796 if (!DisableSchedStalls) {
2797 bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2798 bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2799 if (LStall != RStall)
2800 return left->getHeight() > right->getHeight();
2801 }
2802
2804 int spread = (int)left->getDepth() - (int)right->getDepth();
2805 if (std::abs(spread) > MaxReorderWindow) {
2806 LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2807 << left->getDepth() << " != SU(" << right->NodeNum
2808 << "): " << right->getDepth() << "\n");
2809 return left->getDepth() < right->getDepth();
2810 }
2811 }
2812
2813 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2814 int spread = (int)left->getHeight() - (int)right->getHeight();
2815 if (std::abs(spread) > MaxReorderWindow)
2816 return left->getHeight() > right->getHeight();
2817 }
2818
2819 return BURRSort(left, right, SPQ);
2820}
2821
2822void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2823 SUnits = &sunits;
2824 // Add pseudo dependency edges for two-address nodes.
2825 if (!Disable2AddrHack)
2826 AddPseudoTwoAddrDeps();
2827 // Reroute edges to nodes with multiple uses.
2828 if (!TracksRegPressure && !SrcOrder)
2829 PrescheduleNodesWithMultipleUses();
2830 // Calculate node priorities.
2831 CalculateSethiUllmanNumbers();
2832
2833 // For single block loops, mark nodes that look like canonical IV increments.
2834 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2835 for (SUnit &SU : sunits)
2836 initVRegCycle(&SU);
2837}
2838
2839//===----------------------------------------------------------------------===//
2840// Preschedule for Register Pressure
2841//===----------------------------------------------------------------------===//
2842
2843bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2844 if (SU->isTwoAddress) {
2845 unsigned Opc = SU->getNode()->getMachineOpcode();
2846 const MCInstrDesc &MCID = TII->get(Opc);
2847 unsigned NumRes = MCID.getNumDefs();
2848 unsigned NumOps = MCID.getNumOperands() - NumRes;
2849 for (unsigned i = 0; i != NumOps; ++i) {
2850 if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2851 SDNode *DU = SU->getNode()->getOperand(i).getNode();
2852 if (DU->getNodeId() != -1 &&
2853 Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2854 return true;
2855 }
2856 }
2857 }
2858 return false;
2859}
2860
2861/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2862/// successor's explicit physregs whose definition can reach DepSU.
2863/// i.e. DepSU should not be scheduled above SU.
2864static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2865 ScheduleDAGRRList *scheduleDAG,
2866 const TargetInstrInfo *TII,
2867 const TargetRegisterInfo *TRI) {
2868 ArrayRef<MCPhysReg> ImpDefs =
2869 TII->get(SU->getNode()->getMachineOpcode()).implicit_defs();
2870 const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2871 if (ImpDefs.empty() && !RegMask)
2872 return false;
2873
2874 for (const SDep &Succ : SU->Succs) {
2875 SUnit *SuccSU = Succ.getSUnit();
2876 for (const SDep &SuccPred : SuccSU->Preds) {
2877 if (!SuccPred.isAssignedRegDep())
2878 continue;
2879
2880 if (RegMask &&
2881 MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2882 scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2883 return true;
2884
2885 for (MCPhysReg ImpDef : ImpDefs) {
2886 // Return true if SU clobbers this physical register use and the
2887 // definition of the register reaches from DepSU. IsReachable queries
2888 // a topological forward sort of the DAG (following the successors).
2889 if (TRI->regsOverlap(ImpDef, SuccPred.getReg()) &&
2890 scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2891 return true;
2892 }
2893 }
2894 }
2895 return false;
2896}
2897
2898/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2899/// physical register defs.
2900static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2901 const TargetInstrInfo *TII,
2902 const TargetRegisterInfo *TRI) {
2903 SDNode *N = SuccSU->getNode();
2904 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2905 ArrayRef<MCPhysReg> ImpDefs = TII->get(N->getMachineOpcode()).implicit_defs();
2906 assert(!ImpDefs.empty() && "Caller should check hasPhysRegDefs");
2907 for (const SDNode *SUNode = SU->getNode(); SUNode;
2908 SUNode = SUNode->getGluedNode()) {
2909 if (!SUNode->isMachineOpcode())
2910 continue;
2911 ArrayRef<MCPhysReg> SUImpDefs =
2912 TII->get(SUNode->getMachineOpcode()).implicit_defs();
2913 const uint32_t *SURegMask = getNodeRegMask(SUNode);
2914 if (SUImpDefs.empty() && !SURegMask)
2915 continue;
2916 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2917 MVT VT = N->getSimpleValueType(i);
2918 if (VT == MVT::Glue || VT == MVT::Other)
2919 continue;
2920 if (!N->hasAnyUseOfValue(i))
2921 continue;
2922 MCPhysReg Reg = ImpDefs[i - NumDefs];
2923 if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2924 return true;
2925 for (MCPhysReg SUReg : SUImpDefs) {
2926 if (TRI->regsOverlap(Reg, SUReg))
2927 return true;
2928 }
2929 }
2930 }
2931 return false;
2932}
2933
2934/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2935/// are not handled well by the general register pressure reduction
2936/// heuristics. When presented with code like this:
2937///
2938/// N
2939/// / |
2940/// / |
2941/// U store
2942/// |
2943/// ...
2944///
2945/// the heuristics tend to push the store up, but since the
2946/// operand of the store has another use (U), this would increase
2947/// the length of that other use (the U->N edge).
2948///
2949/// This function transforms code like the above to route U's
2950/// dependence through the store when possible, like this:
2951///
2952/// N
2953/// ||
2954/// ||
2955/// store
2956/// |
2957/// U
2958/// |
2959/// ...
2960///
2961/// This results in the store being scheduled immediately
2962/// after N, which shortens the U->N live range, reducing
2963/// register pressure.
2964void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2965 // Visit all the nodes in topological order, working top-down.
2966 for (SUnit &SU : *SUnits) {
2967 // For now, only look at nodes with no data successors, such as stores.
2968 // These are especially important, due to the heuristics in
2969 // getNodePriority for nodes with no data successors.
2970 if (SU.NumSuccs != 0)
2971 continue;
2972 // For now, only look at nodes with exactly one data predecessor.
2973 if (SU.NumPreds != 1)
2974 continue;
2975 // Avoid prescheduling copies to virtual registers, which don't behave
2976 // like other nodes from the perspective of scheduling heuristics.
2977 if (SDNode *N = SU.getNode())
2978 if (N->getOpcode() == ISD::CopyToReg &&
2979 cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual())
2980 continue;
2981
2982 SDNode *PredFrameSetup = nullptr;
2983 for (const SDep &Pred : SU.Preds)
2984 if (Pred.isCtrl() && Pred.getSUnit()) {
2985 // Find the predecessor which is not data dependence.
2986 SDNode *PredND = Pred.getSUnit()->getNode();
2987
2988 // If PredND is FrameSetup, we should not pre-scheduled the node,
2989 // or else, when bottom up scheduling, ADJCALLSTACKDOWN and
2990 // ADJCALLSTACKUP may hold CallResource too long and make other
2991 // calls can't be scheduled. If there's no other available node
2992 // to schedule, the schedular will try to rename the register by
2993 // creating copy to avoid the conflict which will fail because
2994 // CallResource is not a real physical register.
2995 if (PredND && PredND->isMachineOpcode() &&
2996 (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
2997 PredFrameSetup = PredND;
2998 break;
2999 }
3000 }
3001 // Skip the node has FrameSetup parent.
3002 if (PredFrameSetup != nullptr)
3003 continue;
3004
3005 // Locate the single data predecessor.
3006 SUnit *PredSU = nullptr;
3007 for (const SDep &Pred : SU.Preds)
3008 if (!Pred.isCtrl()) {
3009 PredSU = Pred.getSUnit();
3010 break;
3011 }
3012 assert(PredSU);
3013
3014 // Don't rewrite edges that carry physregs, because that requires additional
3015 // support infrastructure.
3016 if (PredSU->hasPhysRegDefs)
3017 continue;
3018 // Short-circuit the case where SU is PredSU's only data successor.
3019 if (PredSU->NumSuccs == 1)
3020 continue;
3021 // Avoid prescheduling to copies from virtual registers, which don't behave
3022 // like other nodes from the perspective of scheduling heuristics.
3023 if (SDNode *N = SU.getNode())
3024 if (N->getOpcode() == ISD::CopyFromReg &&
3025 cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual())
3026 continue;
3027
3028 // Perform checks on the successors of PredSU.
3029 for (const SDep &PredSucc : PredSU->Succs) {
3030 SUnit *PredSuccSU = PredSucc.getSUnit();
3031 if (PredSuccSU == &SU) continue;
3032 // If PredSU has another successor with no data successors, for
3033 // now don't attempt to choose either over the other.
3034 if (PredSuccSU->NumSuccs == 0)
3035 goto outer_loop_continue;
3036 // Don't break physical register dependencies.
3037 if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
3038 if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
3039 goto outer_loop_continue;
3040 // Don't introduce graph cycles.
3041 if (scheduleDAG->IsReachable(&SU, PredSuccSU))
3042 goto outer_loop_continue;
3043 }
3044
3045 // Ok, the transformation is safe and the heuristics suggest it is
3046 // profitable. Update the graph.
3047 LLVM_DEBUG(
3048 dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
3049 << PredSU->NodeNum
3050 << " to guide scheduling in the presence of multiple uses\n");
3051 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
3052 SDep Edge = PredSU->Succs[i];
3053 assert(!Edge.isAssignedRegDep());
3054 SUnit *SuccSU = Edge.getSUnit();
3055 if (SuccSU != &SU) {
3056 Edge.setSUnit(PredSU);
3057 scheduleDAG->RemovePred(SuccSU, Edge);
3058 scheduleDAG->AddPredQueued(&SU, Edge);
3059 Edge.setSUnit(&SU);
3060 scheduleDAG->AddPredQueued(SuccSU, Edge);
3061 --i;
3062 }
3063 }
3064 outer_loop_continue:;
3065 }
3066}
3067
3068/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
3069/// it as a def&use operand. Add a pseudo control edge from it to the other
3070/// node (if it won't create a cycle) so the two-address one will be scheduled
3071/// first (lower in the schedule). If both nodes are two-address, favor the
3072/// one that has a CopyToReg use (more likely to be a loop induction update).
3073/// If both are two-address, but one is commutable while the other is not
3074/// commutable, favor the one that's not commutable.
3075void RegReductionPQBase::AddPseudoTwoAddrDeps() {
3076 for (SUnit &SU : *SUnits) {
3077 if (!SU.isTwoAddress)
3078 continue;
3079
3080 SDNode *Node = SU.getNode();
3081 if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
3082 continue;
3083
3084 bool isLiveOut = hasOnlyLiveOutUses(&SU);
3085 unsigned Opc = Node->getMachineOpcode();
3086 const MCInstrDesc &MCID = TII->get(Opc);
3087 unsigned NumRes = MCID.getNumDefs();
3088 unsigned NumOps = MCID.getNumOperands() - NumRes;
3089 for (unsigned j = 0; j != NumOps; ++j) {
3090 if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
3091 continue;
3092 SDNode *DU = SU.getNode()->getOperand(j).getNode();
3093 if (DU->getNodeId() == -1)
3094 continue;
3095 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
3096 if (!DUSU)
3097 continue;
3098 for (const SDep &Succ : DUSU->Succs) {
3099 if (Succ.isCtrl())
3100 continue;
3101 SUnit *SuccSU = Succ.getSUnit();
3102 if (SuccSU == &SU)
3103 continue;
3104 // Be conservative. Ignore if nodes aren't at roughly the same
3105 // depth and height.
3106 if (SuccSU->getHeight() < SU.getHeight() &&
3107 (SU.getHeight() - SuccSU->getHeight()) > 1)
3108 continue;
3109 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
3110 // constrains whatever is using the copy, instead of the copy
3111 // itself. In the case that the copy is coalesced, this
3112 // preserves the intent of the pseudo two-address heurietics.
3113 while (SuccSU->Succs.size() == 1 &&
3114 SuccSU->getNode()->isMachineOpcode() &&
3115 SuccSU->getNode()->getMachineOpcode() ==
3116 TargetOpcode::COPY_TO_REGCLASS)
3117 SuccSU = SuccSU->Succs.front().getSUnit();
3118 // Don't constrain non-instruction nodes.
3119 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
3120 continue;
3121 // Don't constrain nodes with physical register defs if the
3122 // predecessor can clobber them.
3123 if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
3124 if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
3125 continue;
3126 }
3127 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3128 // these may be coalesced away. We want them close to their uses.
3129 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3130 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3131 SuccOpc == TargetOpcode::INSERT_SUBREG ||
3132 SuccOpc == TargetOpcode::SUBREG_TO_REG)
3133 continue;
3134 if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
3135 (!canClobber(SuccSU, DUSU) ||
3136 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
3137 (!SU.isCommutable && SuccSU->isCommutable)) &&
3138 !scheduleDAG->IsReachable(SuccSU, &SU)) {
3140 << " Adding a pseudo-two-addr edge from SU #"
3141 << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3142 scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial));
3143 }
3144 }
3145 }
3146 }
3147}
3148
3149//===----------------------------------------------------------------------===//
3150// Public Constructor Functions
3151//===----------------------------------------------------------------------===//
3152
3155 CodeGenOpt::Level OptLevel) {
3156 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3157 const TargetInstrInfo *TII = STI.getInstrInfo();
3158 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3159
3160 BURegReductionPriorityQueue *PQ =
3161 new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
3162 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3163 PQ->setScheduleDAG(SD);
3164 return SD;
3165}
3166
3169 CodeGenOpt::Level OptLevel) {
3170 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3171 const TargetInstrInfo *TII = STI.getInstrInfo();
3172 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3173
3174 SrcRegReductionPriorityQueue *PQ =
3175 new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3176 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3177 PQ->setScheduleDAG(SD);
3178 return SD;
3179}
3180
3183 CodeGenOpt::Level OptLevel) {
3184 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3185 const TargetInstrInfo *TII = STI.getInstrInfo();
3186 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3187 const TargetLowering *TLI = IS->TLI;
3188
3189 HybridBURRPriorityQueue *PQ =
3190 new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3191
3192 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3193 PQ->setScheduleDAG(SD);
3194 return SD;
3195}
3196
3199 CodeGenOpt::Level OptLevel) {
3200 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3201 const TargetInstrInfo *TII = STI.getInstrInfo();
3202 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3203 const TargetLowering *TLI = IS->TLI;
3204
3205 ILPBURRPriorityQueue *PQ =
3206 new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3207 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3208 PQ->setScheduleDAG(SD);
3209 return SD;
3210}
for(auto &MBB :MF)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
This file contains some templates that are useful if you are working with the STL at all.
static bool canEnableCoalescing(SUnit *SU)
static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", createSourceListDAGScheduler)
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
static cl::opt< bool > DisableSchedStalls("disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp"))
static bool hasOnlyLiveInOpers(const SUnit *SU)
hasOnlyLiveInOpers - Return true if SU has only value predecessors that are CopyFromReg from a virtua...
static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII)
IsChainDependent - Test if Outer is reachable from Inner through chain dependencies.
static bool hasOnlyLiveOutUses(const SUnit *SU)
hasOnlyLiveOutUses - Return true if SU has only value successors that are CopyToReg to a virtual regi...
static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII)
getPhysicalRegisterVT - Returns the ValueType of the physical register definition of the specified no...
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
static cl::opt< bool > Disable2AddrHack("disable-2addr-hack", cl::Hidden, cl::init(true), cl::desc("Disable scheduler's two-address hack"))
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs, const TargetRegisterInfo *TRI, const SDNode *Node=nullptr)
CheckForLiveRegDef - Return true and update live register vector if the specified register def of the...
static RegisterScheduler ILPListDAGScheduler("list-ilp", "Bottom-up register pressure aware list scheduling " "which tries to balance ILP and register pressure", createILPListDAGScheduler)
static void resetVRegCycle(SUnit *SU)
static RegisterScheduler hybridListDAGScheduler("list-hybrid", "Bottom-up register pressure aware list scheduling " "which tries to balance latency and register pressure", createHybridListDAGScheduler)
static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberReachingPhysRegUse - True if SU would clobber one of it's successor's explicit physregs who...
static cl::opt< bool > DisableSchedPhysRegJoin("disable-sched-physreg-join", cl::Hidden, cl::init(false), cl::desc("Disable physreg def-use affinity"))
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberPhysRegDefs - True if SU would clobber one of SuccSU's physical register defs.
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF)
GetCostForDef - Looks up the register class and cost for a given definition.
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ)
static cl::opt< bool > DisableSchedRegPressure("disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp"))
static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ)
static void initVRegCycle(SUnit *SU)
static constexpr unsigned RegSequenceCost
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII)
FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate the corresponding (lowered) C...
static bool isOperandOf(const SUnit *SU, SDNode *N)
static cl::opt< bool > DisableSchedVRegCycle("disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks"))
static int checkSpecialNodes(const SUnit *left, const SUnit *right)
static cl::opt< bool > DisableSchedLiveUses("disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp"))
static const uint32_t * getNodeRegMask(const SDNode *N)
getNodeRegMask - Returns the register mask attached to an SDNode, if any.
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle.
static cl::opt< unsigned > AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists."))
static bool hasVRegCycleUse(const SUnit *SU)
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler)
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers,...
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ)
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, ArrayRef< SUnit * > LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs)
CheckForLiveRegDefMasked - Check for any live physregs that are clobbered by RegMask,...
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file describes how to lower LLVM code to machine code.
@ Flags
Definition: TextStubV5.cpp:93
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
static bool isClobberKind(unsigned Flag)
Definition: InlineAsm.h:306
static bool isRegDefKind(unsigned Flag)
Definition: InlineAsm.h:299
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag.
Definition: InlineAsm.h:363
static bool isRegDefEarlyClobberKind(unsigned Flag)
Definition: InlineAsm.h:303
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:265
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:580
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
Definition: MCInstrDesc.h:481
MCRegAliasIterator enumerates all registers aliasing Reg.
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
int getNodeId() const
Return the unique node id.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getIROrder() const
Return the node ordering.
void setNodeId(int Id)
Set unique node id.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
Scheduling dependency.
Definition: ScheduleDAG.h:49
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
Definition: ScheduleDAG.h:211
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
void setSUnit(SUnit *SU)
Definition: ScheduleDAG.h:483
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
void setHeightToAtLeast(unsigned NewHeight)
If NewHeight is greater than this node's height value, set it to be the new height value.
unsigned NumSuccs
Definition: ScheduleDAG.h:267
unsigned NumPreds
Definition: ScheduleDAG.h:266
unsigned NodeQueueId
Queue id of node.
Definition: ScheduleDAG.h:265
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:269
bool hasPhysRegClobbers
Has any physreg defs, used or not.
Definition: ScheduleDAG.h:281
bool isCallOp
Is a function call operand.
Definition: ScheduleDAG.h:276
const TargetRegisterClass * CopyDstRC
Is a special copy node if != nullptr.
Definition: ScheduleDAG.h:302
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
bool isSucc(const SUnit *N) const
Tests if node N is a successor of this node.
Definition: ScheduleDAG.h:439
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
unsigned short NumRegDefsLeft
Definition: ScheduleDAG.h:272
bool isPending
True once pending.
Definition: ScheduleDAG.h:282
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
bool isAvailable
True once available.
Definition: ScheduleDAG.h:283
bool isScheduleLow
True if preferable to schedule low.
Definition: ScheduleDAG.h:286
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
Sched::Preference SchedulingPref
Scheduling preference.
Definition: ScheduleDAG.h:290
const TargetRegisterClass * CopySrcRC
Definition: ScheduleDAG.h:304
SDNode * getNode() const
Returns the representative SDNode for this SUnit.
Definition: ScheduleDAG.h:355
bool isTwoAddress
Is a two-address instruction.
Definition: ScheduleDAG.h:277
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:278
bool isVRegCycle
May use and def the same vreg.
Definition: ScheduleDAG.h:274
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
RegDefIter - In place iteration over the values defined by an SUnit.
ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
SUnit * newSUnit(SDNode *N)
NewSUnit - Creates a new SUnit and return a ptr to it.
virtual void Schedule()=0
Schedule - Order nodes according to selected style, filling in the Sequence member.
virtual bool forceUnitLatencies() const
ForceUnitLatencies - Return true if all scheduling edges should be given a latency value of one.
SUnit * Clone(SUnit *Old)
Clone - Creates a clone of the specified SUnit.
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:693
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
bool WillCreateCycle(SUnit *TargetSU, SUnit *SU)
Returns true if addPred(TargetSU, SU) creates a cycle.
void MarkDirty()
Mark the ordering as temporarily broken, after a new node has been added.
Definition: ScheduleDAG.h:767
void AddSUnitWithoutPredecessors(const SUnit *SU)
Add a SUnit without predecessors to the end of the topological order.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
void AddPredQueued(SUnit *Y, SUnit *X)
Queues an update to the topological ordering to accommodate an edge to be added from SUnit X to SUnit...
virtual void dumpNode(const SUnit &SU) const =0
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
virtual bool atIssueLimit() const
atIssueLimit - Return true if no more instructions may be issued in this cycle.
virtual HazardType getHazardType(SUnit *, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
This interface is used to plug different priorities computation algorithms into the list scheduler.
Definition: ScheduleDAG.h:496
void setCurCycle(unsigned Cycle)
Definition: ScheduleDAG.h:545
virtual void remove(SUnit *SU)=0
virtual void releaseState()=0
virtual SUnit * pop()=0
virtual void scheduledNode(SUnit *)
As each node is scheduled, this method is invoked.
Definition: ScheduleDAG.h:541
virtual bool isReady(SUnit *) const
Definition: ScheduleDAG.h:520
virtual bool tracksRegPressure() const
Definition: ScheduleDAG.h:518
virtual void dump(ScheduleDAG *) const
Definition: ScheduleDAG.h:536
virtual void initNodes(std::vector< SUnit > &SUnits)=0
virtual bool empty() const =0
virtual void unscheduledNode(SUnit *)
Definition: ScheduleDAG.h:543
virtual void addNode(const SUnit *SU)=0
virtual void updateNode(const SUnit *SU)=0
virtual void push(SUnit *U)=0
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const TargetLowering * TLI
MachineFunction * MF
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual uint8_t getRepRegClassCostFor(MVT VT) const
Return the cost of the 'representative' register class for the specified value type.
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const
Return the register pressure "high water mark" for the specific register class.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Level
Code generation optimization level.
Definition: CodeGen.h:57
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ EH_LABEL
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:1045
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1241
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1040
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1037
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
constexpr double e
Definition: MathExtras.h:31
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition: PtrState.h:41
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1839
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
ScheduleDAGSDNodes * createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up list scheduler that schedules nodes in source c...
ScheduleDAGSDNodes * createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel)
createBURRListDAGScheduler - This creates a bottom up register usage reduction list scheduler.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1976
ScheduleDAGSDNodes * createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createHybridListDAGScheduler - This creates a bottom up register pressure aware list scheduler that m...
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
ScheduleDAGSDNodes * createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level)
createILPListDAGScheduler - This creates a bottom up register pressure aware list scheduler that trie...
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N