LLVM 18.0.0git
ScheduleDAGRRList.cpp
Go to the documentation of this file.
1//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This implements bottom-up and top-down register pressure reduction list
10// schedulers, using standard algorithms. The basic approach uses a priority
11// queue of available nodes to schedule. One at a time, nodes are taken from
12// the priority queue (thus in priority order), checked for legality to
13// schedule, and emitted if legal.
14//
15//===----------------------------------------------------------------------===//
16
17#include "ScheduleDAGSDNodes.h"
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/Statistic.h"
39#include "llvm/Config/llvm-config.h"
40#include "llvm/IR/InlineAsm.h"
41#include "llvm/MC/MCInstrDesc.h"
47#include "llvm/Support/Debug.h"
50#include <algorithm>
51#include <cassert>
52#include <cstdint>
53#include <cstdlib>
54#include <iterator>
55#include <limits>
56#include <memory>
57#include <utility>
58#include <vector>
59
60using namespace llvm;
61
62#define DEBUG_TYPE "pre-RA-sched"
63
64STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
65STATISTIC(NumUnfolds, "Number of nodes unfolded");
66STATISTIC(NumDups, "Number of duplicated nodes");
67STATISTIC(NumPRCopies, "Number of physical register copies");
68
71 "Bottom-up register reduction list scheduling",
73
76 "Similar to list-burr but schedules in source "
77 "order when possible",
79
82 "Bottom-up register pressure aware list scheduling "
83 "which tries to balance latency and register pressure",
85
88 "Bottom-up register pressure aware list scheduling "
89 "which tries to balance ILP and register pressure",
91
93 "disable-sched-cycles", cl::Hidden, cl::init(false),
94 cl::desc("Disable cycle-level precision during preRA scheduling"));
95
96// Temporary sched=list-ilp flags until the heuristics are robust.
97// Some options are also available under sched=list-hybrid.
99 "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
100 cl::desc("Disable regpressure priority in sched=list-ilp"));
102 "disable-sched-live-uses", cl::Hidden, cl::init(true),
103 cl::desc("Disable live use priority in sched=list-ilp"));
105 "disable-sched-vrcycle", cl::Hidden, cl::init(false),
106 cl::desc("Disable virtual register cycle interference checks"));
108 "disable-sched-physreg-join", cl::Hidden, cl::init(false),
109 cl::desc("Disable physreg def-use affinity"));
111 "disable-sched-stalls", cl::Hidden, cl::init(true),
112 cl::desc("Disable no-stall priority in sched=list-ilp"));
114 "disable-sched-critical-path", cl::Hidden, cl::init(false),
115 cl::desc("Disable critical path priority in sched=list-ilp"));
117 "disable-sched-height", cl::Hidden, cl::init(false),
118 cl::desc("Disable scheduled-height priority in sched=list-ilp"));
120 "disable-2addr-hack", cl::Hidden, cl::init(true),
121 cl::desc("Disable scheduler's two-address hack"));
122
124 "max-sched-reorder", cl::Hidden, cl::init(6),
125 cl::desc("Number of instructions to allow ahead of the critical path "
126 "in sched=list-ilp"));
127
129 "sched-avg-ipc", cl::Hidden, cl::init(1),
130 cl::desc("Average inst/cycle whan no target itinerary exists."));
131
132namespace {
133
134//===----------------------------------------------------------------------===//
135/// ScheduleDAGRRList - The actual register reduction list scheduler
136/// implementation. This supports both top-down and bottom-up scheduling.
137///
138class ScheduleDAGRRList : public ScheduleDAGSDNodes {
139private:
140 /// NeedLatency - True if the scheduler will make use of latency information.
141 bool NeedLatency;
142
143 /// AvailableQueue - The priority queue to use for the available SUnits.
144 SchedulingPriorityQueue *AvailableQueue;
145
146 /// PendingQueue - This contains all of the instructions whose operands have
147 /// been issued, but their results are not ready yet (due to the latency of
148 /// the operation). Once the operands becomes available, the instruction is
149 /// added to the AvailableQueue.
150 std::vector<SUnit *> PendingQueue;
151
152 /// HazardRec - The hazard recognizer to use.
153 ScheduleHazardRecognizer *HazardRec;
154
155 /// CurCycle - The current scheduler state corresponds to this cycle.
156 unsigned CurCycle = 0;
157
158 /// MinAvailableCycle - Cycle of the soonest available instruction.
159 unsigned MinAvailableCycle = ~0u;
160
161 /// IssueCount - Count instructions issued in this cycle
162 /// Currently valid only for bottom-up scheduling.
163 unsigned IssueCount = 0u;
164
165 /// LiveRegDefs - A set of physical registers and their definition
166 /// that are "live". These nodes must be scheduled before any other nodes that
167 /// modifies the registers can be scheduled.
168 unsigned NumLiveRegs = 0u;
169 std::unique_ptr<SUnit*[]> LiveRegDefs;
170 std::unique_ptr<SUnit*[]> LiveRegGens;
171
172 // Collect interferences between physical register use/defs.
173 // Each interference is an SUnit and set of physical registers.
174 SmallVector<SUnit*, 4> Interferences;
175
177
178 LRegsMapT LRegsMap;
179
180 /// Topo - A topological ordering for SUnits which permits fast IsReachable
181 /// and similar queries.
183
184 // Hack to keep track of the inverse of FindCallSeqStart without more crazy
185 // DAG crawling.
186 DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
187
188public:
189 ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
190 SchedulingPriorityQueue *availqueue,
191 CodeGenOptLevel OptLevel)
192 : ScheduleDAGSDNodes(mf), NeedLatency(needlatency),
193 AvailableQueue(availqueue), Topo(SUnits, nullptr) {
194 const TargetSubtargetInfo &STI = mf.getSubtarget();
195 if (DisableSchedCycles || !NeedLatency)
196 HazardRec = new ScheduleHazardRecognizer();
197 else
198 HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
199 }
200
201 ~ScheduleDAGRRList() override {
202 delete HazardRec;
203 delete AvailableQueue;
204 }
205
206 void Schedule() override;
207
208 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
209
210 /// IsReachable - Checks if SU is reachable from TargetSU.
211 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
212 return Topo.IsReachable(SU, TargetSU);
213 }
214
215 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
216 /// create a cycle.
217 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
218 return Topo.WillCreateCycle(SU, TargetSU);
219 }
220
221 /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
222 /// This returns true if this is a new predecessor.
223 /// Does *NOT* update the topological ordering! It just queues an update.
224 void AddPredQueued(SUnit *SU, const SDep &D) {
225 Topo.AddPredQueued(SU, D.getSUnit());
226 SU->addPred(D);
227 }
228
229 /// AddPred - adds a predecessor edge to SUnit SU.
230 /// This returns true if this is a new predecessor.
231 /// Updates the topological ordering if required.
232 void AddPred(SUnit *SU, const SDep &D) {
233 Topo.AddPred(SU, D.getSUnit());
234 SU->addPred(D);
235 }
236
237 /// RemovePred - removes a predecessor edge from SUnit SU.
238 /// This returns true if an edge was removed.
239 /// Updates the topological ordering if required.
240 void RemovePred(SUnit *SU, const SDep &D) {
241 Topo.RemovePred(SU, D.getSUnit());
242 SU->removePred(D);
243 }
244
245private:
246 bool isReady(SUnit *SU) {
247 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
248 AvailableQueue->isReady(SU);
249 }
250
251 void ReleasePred(SUnit *SU, const SDep *PredEdge);
252 void ReleasePredecessors(SUnit *SU);
253 void ReleasePending();
254 void AdvanceToCycle(unsigned NextCycle);
255 void AdvancePastStalls(SUnit *SU);
256 void EmitNode(SUnit *SU);
257 void ScheduleNodeBottomUp(SUnit*);
258 void CapturePred(SDep *PredEdge);
259 void UnscheduleNodeBottomUp(SUnit*);
260 void RestoreHazardCheckerBottomUp();
261 void BacktrackBottomUp(SUnit*, SUnit*);
262 SUnit *TryUnfoldSU(SUnit *);
263 SUnit *CopyAndMoveSuccessors(SUnit*);
264 void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
265 const TargetRegisterClass*,
266 const TargetRegisterClass*,
268 bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
269
270 void releaseInterferences(unsigned Reg = 0);
271
272 SUnit *PickNodeToScheduleBottomUp();
273 void ListScheduleBottomUp();
274
275 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
276 SUnit *CreateNewSUnit(SDNode *N) {
277 unsigned NumSUnits = SUnits.size();
278 SUnit *NewNode = newSUnit(N);
279 // Update the topological ordering.
280 if (NewNode->NodeNum >= NumSUnits)
281 Topo.AddSUnitWithoutPredecessors(NewNode);
282 return NewNode;
283 }
284
285 /// CreateClone - Creates a new SUnit from an existing one.
286 SUnit *CreateClone(SUnit *N) {
287 unsigned NumSUnits = SUnits.size();
288 SUnit *NewNode = Clone(N);
289 // Update the topological ordering.
290 if (NewNode->NodeNum >= NumSUnits)
291 Topo.AddSUnitWithoutPredecessors(NewNode);
292 return NewNode;
293 }
294
295 /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
296 /// need actual latency information but the hybrid scheduler does.
297 bool forceUnitLatencies() const override {
298 return !NeedLatency;
299 }
300};
301
302} // end anonymous namespace
303
304static constexpr unsigned RegSequenceCost = 1;
305
306/// GetCostForDef - Looks up the register class and cost for a given definition.
307/// Typically this just means looking up the representative register class,
308/// but for untyped values (MVT::Untyped) it means inspecting the node's
309/// opcode to determine what register class is being generated.
311 const TargetLowering *TLI,
312 const TargetInstrInfo *TII,
313 const TargetRegisterInfo *TRI,
314 unsigned &RegClass, unsigned &Cost,
315 const MachineFunction &MF) {
316 MVT VT = RegDefPos.GetValue();
317
318 // Special handling for untyped values. These values can only come from
319 // the expansion of custom DAG-to-DAG patterns.
320 if (VT == MVT::Untyped) {
321 const SDNode *Node = RegDefPos.GetNode();
322
323 // Special handling for CopyFromReg of untyped values.
324 if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
325 Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
326 const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
327 RegClass = RC->getID();
328 Cost = 1;
329 return;
330 }
331
332 unsigned Opcode = Node->getMachineOpcode();
333 if (Opcode == TargetOpcode::REG_SEQUENCE) {
334 unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
335 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
336 RegClass = RC->getID();
338 return;
339 }
340
341 unsigned Idx = RegDefPos.GetIdx();
342 const MCInstrDesc &Desc = TII->get(Opcode);
343 const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
344 assert(RC && "Not a valid register class");
345 RegClass = RC->getID();
346 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
347 // better way to determine it.
348 Cost = 1;
349 } else {
350 RegClass = TLI->getRepRegClassFor(VT)->getID();
351 Cost = TLI->getRepRegClassCostFor(VT);
352 }
353}
354
355/// Schedule - Schedule the DAG using list scheduling.
356void ScheduleDAGRRList::Schedule() {
357 LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
358 << " '" << BB->getName() << "' **********\n");
359
360 CurCycle = 0;
361 IssueCount = 0;
362 MinAvailableCycle =
363 DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max();
364 NumLiveRegs = 0;
365 // Allocate slots for each physical register, plus one for a special register
366 // to track the virtual resource of a calling sequence.
367 LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
368 LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
369 CallSeqEndForStart.clear();
370 assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
371
372 // Build the scheduling graph.
373 BuildSchedGraph(nullptr);
374
375 LLVM_DEBUG(dump());
376 Topo.MarkDirty();
377
378 AvailableQueue->initNodes(SUnits);
379
380 HazardRec->Reset();
381
382 // Execute the actual scheduling loop.
383 ListScheduleBottomUp();
384
385 AvailableQueue->releaseState();
386
387 LLVM_DEBUG({
388 dbgs() << "*** Final schedule ***\n";
389 dumpSchedule();
390 dbgs() << '\n';
391 });
392}
393
394//===----------------------------------------------------------------------===//
395// Bottom-Up Scheduling
396//===----------------------------------------------------------------------===//
397
398/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
399/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
400void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
401 SUnit *PredSU = PredEdge->getSUnit();
402
403#ifndef NDEBUG
404 if (PredSU->NumSuccsLeft == 0) {
405 dbgs() << "*** Scheduling failed! ***\n";
406 dumpNode(*PredSU);
407 dbgs() << " has been released too many times!\n";
408 llvm_unreachable(nullptr);
409 }
410#endif
411 --PredSU->NumSuccsLeft;
412
413 if (!forceUnitLatencies()) {
414 // Updating predecessor's height. This is now the cycle when the
415 // predecessor can be scheduled without causing a pipeline stall.
416 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
417 }
418
419 // If all the node's successors are scheduled, this node is ready
420 // to be scheduled. Ignore the special EntrySU node.
421 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
422 PredSU->isAvailable = true;
423
424 unsigned Height = PredSU->getHeight();
425 if (Height < MinAvailableCycle)
426 MinAvailableCycle = Height;
427
428 if (isReady(PredSU)) {
429 AvailableQueue->push(PredSU);
430 }
431 // CapturePred and others may have left the node in the pending queue, avoid
432 // adding it twice.
433 else if (!PredSU->isPending) {
434 PredSU->isPending = true;
435 PendingQueue.push_back(PredSU);
436 }
437 }
438}
439
440/// IsChainDependent - Test if Outer is reachable from Inner through
441/// chain dependencies.
442static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
443 unsigned NestLevel,
444 const TargetInstrInfo *TII) {
445 SDNode *N = Outer;
446 while (true) {
447 if (N == Inner)
448 return true;
449 // For a TokenFactor, examine each operand. There may be multiple ways
450 // to get to the CALLSEQ_BEGIN, but we need to find the path with the
451 // most nesting in order to ensure that we find the corresponding match.
452 if (N->getOpcode() == ISD::TokenFactor) {
453 for (const SDValue &Op : N->op_values())
454 if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII))
455 return true;
456 return false;
457 }
458 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
459 if (N->isMachineOpcode()) {
460 if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
461 ++NestLevel;
462 } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
463 if (NestLevel == 0)
464 return false;
465 --NestLevel;
466 }
467 }
468 // Otherwise, find the chain and continue climbing.
469 for (const SDValue &Op : N->op_values())
470 if (Op.getValueType() == MVT::Other) {
471 N = Op.getNode();
472 goto found_chain_operand;
473 }
474 return false;
475 found_chain_operand:;
476 if (N->getOpcode() == ISD::EntryToken)
477 return false;
478 }
479}
480
481/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
482/// the corresponding (lowered) CALLSEQ_BEGIN node.
483///
484/// NestLevel and MaxNested are used in recursion to indcate the current level
485/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
486/// level seen so far.
487///
488/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
489/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
490static SDNode *
491FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
492 const TargetInstrInfo *TII) {
493 while (true) {
494 // For a TokenFactor, examine each operand. There may be multiple ways
495 // to get to the CALLSEQ_BEGIN, but we need to find the path with the
496 // most nesting in order to ensure that we find the corresponding match.
497 if (N->getOpcode() == ISD::TokenFactor) {
498 SDNode *Best = nullptr;
499 unsigned BestMaxNest = MaxNest;
500 for (const SDValue &Op : N->op_values()) {
501 unsigned MyNestLevel = NestLevel;
502 unsigned MyMaxNest = MaxNest;
503 if (SDNode *New = FindCallSeqStart(Op.getNode(),
504 MyNestLevel, MyMaxNest, TII))
505 if (!Best || (MyMaxNest > BestMaxNest)) {
506 Best = New;
507 BestMaxNest = MyMaxNest;
508 }
509 }
510 assert(Best);
511 MaxNest = BestMaxNest;
512 return Best;
513 }
514 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
515 if (N->isMachineOpcode()) {
516 if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
517 ++NestLevel;
518 MaxNest = std::max(MaxNest, NestLevel);
519 } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
520 assert(NestLevel != 0);
521 --NestLevel;
522 if (NestLevel == 0)
523 return N;
524 }
525 }
526 // Otherwise, find the chain and continue climbing.
527 for (const SDValue &Op : N->op_values())
528 if (Op.getValueType() == MVT::Other) {
529 N = Op.getNode();
530 goto found_chain_operand;
531 }
532 return nullptr;
533 found_chain_operand:;
534 if (N->getOpcode() == ISD::EntryToken)
535 return nullptr;
536 }
537}
538
539/// Call ReleasePred for each predecessor, then update register live def/gen.
540/// Always update LiveRegDefs for a register dependence even if the current SU
541/// also defines the register. This effectively create one large live range
542/// across a sequence of two-address node. This is important because the
543/// entire chain must be scheduled together. Example:
544///
545/// flags = (3) add
546/// flags = (2) addc flags
547/// flags = (1) addc flags
548///
549/// results in
550///
551/// LiveRegDefs[flags] = 3
552/// LiveRegGens[flags] = 1
553///
554/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
555/// interference on flags.
556void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
557 // Bottom up: release predecessors
558 for (SDep &Pred : SU->Preds) {
559 ReleasePred(SU, &Pred);
560 if (Pred.isAssignedRegDep()) {
561 // This is a physical register dependency and it's impossible or
562 // expensive to copy the register. Make sure nothing that can
563 // clobber the register is scheduled between the predecessor and
564 // this node.
565 SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef;
566 assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) &&
567 "interference on register dependence");
568 LiveRegDefs[Pred.getReg()] = Pred.getSUnit();
569 if (!LiveRegGens[Pred.getReg()]) {
570 ++NumLiveRegs;
571 LiveRegGens[Pred.getReg()] = SU;
572 }
573 }
574 }
575
576 // If we're scheduling a lowered CALLSEQ_END, find the corresponding
577 // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
578 // these nodes, to prevent other calls from being interscheduled with them.
579 unsigned CallResource = TRI->getNumRegs();
580 if (!LiveRegDefs[CallResource])
581 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
582 if (Node->isMachineOpcode() &&
583 Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
584 unsigned NestLevel = 0;
585 unsigned MaxNest = 0;
586 SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
587 assert(N && "Must find call sequence start");
588
589 SUnit *Def = &SUnits[N->getNodeId()];
590 CallSeqEndForStart[Def] = SU;
591
592 ++NumLiveRegs;
593 LiveRegDefs[CallResource] = Def;
594 LiveRegGens[CallResource] = SU;
595 break;
596 }
597}
598
599/// Check to see if any of the pending instructions are ready to issue. If
600/// so, add them to the available queue.
601void ScheduleDAGRRList::ReleasePending() {
602 if (DisableSchedCycles) {
603 assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
604 return;
605 }
606
607 // If the available queue is empty, it is safe to reset MinAvailableCycle.
608 if (AvailableQueue->empty())
609 MinAvailableCycle = std::numeric_limits<unsigned>::max();
610
611 // Check to see if any of the pending instructions are ready to issue. If
612 // so, add them to the available queue.
613 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
614 unsigned ReadyCycle = PendingQueue[i]->getHeight();
615 if (ReadyCycle < MinAvailableCycle)
616 MinAvailableCycle = ReadyCycle;
617
618 if (PendingQueue[i]->isAvailable) {
619 if (!isReady(PendingQueue[i]))
620 continue;
621 AvailableQueue->push(PendingQueue[i]);
622 }
623 PendingQueue[i]->isPending = false;
624 PendingQueue[i] = PendingQueue.back();
625 PendingQueue.pop_back();
626 --i; --e;
627 }
628}
629
630/// Move the scheduler state forward by the specified number of Cycles.
631void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
632 if (NextCycle <= CurCycle)
633 return;
634
635 IssueCount = 0;
636 AvailableQueue->setCurCycle(NextCycle);
637 if (!HazardRec->isEnabled()) {
638 // Bypass lots of virtual calls in case of long latency.
639 CurCycle = NextCycle;
640 }
641 else {
642 for (; CurCycle != NextCycle; ++CurCycle) {
643 HazardRec->RecedeCycle();
644 }
645 }
646 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
647 // available Q to release pending nodes at least once before popping.
648 ReleasePending();
649}
650
651/// Move the scheduler state forward until the specified node's dependents are
652/// ready and can be scheduled with no resource conflicts.
653void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
655 return;
656
657 // FIXME: Nodes such as CopyFromReg probably should not advance the current
658 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
659 // has predecessors the cycle will be advanced when they are scheduled.
660 // But given the crude nature of modeling latency though such nodes, we
661 // currently need to treat these nodes like real instructions.
662 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
663
664 unsigned ReadyCycle = SU->getHeight();
665
666 // Bump CurCycle to account for latency. We assume the latency of other
667 // available instructions may be hidden by the stall (not a full pipe stall).
668 // This updates the hazard recognizer's cycle before reserving resources for
669 // this instruction.
670 AdvanceToCycle(ReadyCycle);
671
672 // Calls are scheduled in their preceding cycle, so don't conflict with
673 // hazards from instructions after the call. EmitNode will reset the
674 // scoreboard state before emitting the call.
675 if (SU->isCall)
676 return;
677
678 // FIXME: For resource conflicts in very long non-pipelined stages, we
679 // should probably skip ahead here to avoid useless scoreboard checks.
680 int Stalls = 0;
681 while (true) {
683 HazardRec->getHazardType(SU, -Stalls);
684
686 break;
687
688 ++Stalls;
689 }
690 AdvanceToCycle(CurCycle + Stalls);
691}
692
693/// Record this SUnit in the HazardRecognizer.
694/// Does not update CurCycle.
695void ScheduleDAGRRList::EmitNode(SUnit *SU) {
696 if (!HazardRec->isEnabled())
697 return;
698
699 // Check for phys reg copy.
700 if (!SU->getNode())
701 return;
702
703 switch (SU->getNode()->getOpcode()) {
704 default:
705 assert(SU->getNode()->isMachineOpcode() &&
706 "This target-independent node should not be scheduled.");
707 break;
709 case ISD::TokenFactor:
712 case ISD::CopyToReg:
713 case ISD::CopyFromReg:
714 case ISD::EH_LABEL:
715 // Noops don't affect the scoreboard state. Copies are likely to be
716 // removed.
717 return;
718 case ISD::INLINEASM:
720 // For inline asm, clear the pipeline state.
721 HazardRec->Reset();
722 return;
723 }
724 if (SU->isCall) {
725 // Calls are scheduled with their preceding instructions. For bottom-up
726 // scheduling, clear the pipeline state before emitting.
727 HazardRec->Reset();
728 }
729
730 HazardRec->EmitInstruction(SU);
731}
732
733static void resetVRegCycle(SUnit *SU);
734
735/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
736/// count of its predecessors. If a predecessor pending count is zero, add it to
737/// the Available queue.
738void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
739 LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
740 LLVM_DEBUG(dumpNode(*SU));
741
742#ifndef NDEBUG
743 if (CurCycle < SU->getHeight())
744 LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight()
745 << "] pipeline stall!\n");
746#endif
747
748 // FIXME: Do not modify node height. It may interfere with
749 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
750 // node its ready cycle can aid heuristics, and after scheduling it can
751 // indicate the scheduled cycle.
752 SU->setHeightToAtLeast(CurCycle);
753
754 // Reserve resources for the scheduled instruction.
755 EmitNode(SU);
756
757 Sequence.push_back(SU);
758
759 AvailableQueue->scheduledNode(SU);
760
761 // If HazardRec is disabled, and each inst counts as one cycle, then
762 // advance CurCycle before ReleasePredecessors to avoid useless pushes to
763 // PendingQueue for schedulers that implement HasReadyFilter.
764 if (!HazardRec->isEnabled() && AvgIPC < 2)
765 AdvanceToCycle(CurCycle + 1);
766
767 // Update liveness of predecessors before successors to avoid treating a
768 // two-address node as a live range def.
769 ReleasePredecessors(SU);
770
771 // Release all the implicit physical register defs that are live.
772 for (SDep &Succ : SU->Succs) {
773 // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node.
774 if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) {
775 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
776 --NumLiveRegs;
777 LiveRegDefs[Succ.getReg()] = nullptr;
778 LiveRegGens[Succ.getReg()] = nullptr;
779 releaseInterferences(Succ.getReg());
780 }
781 }
782 // Release the special call resource dependence, if this is the beginning
783 // of a call.
784 unsigned CallResource = TRI->getNumRegs();
785 if (LiveRegDefs[CallResource] == SU)
786 for (const SDNode *SUNode = SU->getNode(); SUNode;
787 SUNode = SUNode->getGluedNode()) {
788 if (SUNode->isMachineOpcode() &&
789 SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
790 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
791 --NumLiveRegs;
792 LiveRegDefs[CallResource] = nullptr;
793 LiveRegGens[CallResource] = nullptr;
794 releaseInterferences(CallResource);
795 }
796 }
797
798 resetVRegCycle(SU);
799
800 SU->isScheduled = true;
801
802 // Conditions under which the scheduler should eagerly advance the cycle:
803 // (1) No available instructions
804 // (2) All pipelines full, so available instructions must have hazards.
805 //
806 // If HazardRec is disabled, the cycle was pre-advanced before calling
807 // ReleasePredecessors. In that case, IssueCount should remain 0.
808 //
809 // Check AvailableQueue after ReleasePredecessors in case of zero latency.
810 if (HazardRec->isEnabled() || AvgIPC > 1) {
811 if (SU->getNode() && SU->getNode()->isMachineOpcode())
812 ++IssueCount;
813 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
814 || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
815 AdvanceToCycle(CurCycle + 1);
816 }
817}
818
819/// CapturePred - This does the opposite of ReleasePred. Since SU is being
820/// unscheduled, increase the succ left count of its predecessors. Remove
821/// them from AvailableQueue if necessary.
822void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
823 SUnit *PredSU = PredEdge->getSUnit();
824 if (PredSU->isAvailable) {
825 PredSU->isAvailable = false;
826 if (!PredSU->isPending)
827 AvailableQueue->remove(PredSU);
828 }
829
830 assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
831 "NumSuccsLeft will overflow!");
832 ++PredSU->NumSuccsLeft;
833}
834
835/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
836/// its predecessor states to reflect the change.
837void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
838 LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
839 LLVM_DEBUG(dumpNode(*SU));
840
841 for (SDep &Pred : SU->Preds) {
842 CapturePred(&Pred);
843 if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){
844 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
845 assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() &&
846 "Physical register dependency violated?");
847 --NumLiveRegs;
848 LiveRegDefs[Pred.getReg()] = nullptr;
849 LiveRegGens[Pred.getReg()] = nullptr;
850 releaseInterferences(Pred.getReg());
851 }
852 }
853
854 // Reclaim the special call resource dependence, if this is the beginning
855 // of a call.
856 unsigned CallResource = TRI->getNumRegs();
857 for (const SDNode *SUNode = SU->getNode(); SUNode;
858 SUNode = SUNode->getGluedNode()) {
859 if (SUNode->isMachineOpcode() &&
860 SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
861 SUnit *SeqEnd = CallSeqEndForStart[SU];
862 assert(SeqEnd && "Call sequence start/end must be known");
863 assert(!LiveRegDefs[CallResource]);
864 assert(!LiveRegGens[CallResource]);
865 ++NumLiveRegs;
866 LiveRegDefs[CallResource] = SU;
867 LiveRegGens[CallResource] = SeqEnd;
868 }
869 }
870
871 // Release the special call resource dependence, if this is the end
872 // of a call.
873 if (LiveRegGens[CallResource] == SU)
874 for (const SDNode *SUNode = SU->getNode(); SUNode;
875 SUNode = SUNode->getGluedNode()) {
876 if (SUNode->isMachineOpcode() &&
877 SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
878 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
879 assert(LiveRegDefs[CallResource]);
880 assert(LiveRegGens[CallResource]);
881 --NumLiveRegs;
882 LiveRegDefs[CallResource] = nullptr;
883 LiveRegGens[CallResource] = nullptr;
884 releaseInterferences(CallResource);
885 }
886 }
887
888 for (auto &Succ : SU->Succs) {
889 if (Succ.isAssignedRegDep()) {
890 auto Reg = Succ.getReg();
891 if (!LiveRegDefs[Reg])
892 ++NumLiveRegs;
893 // This becomes the nearest def. Note that an earlier def may still be
894 // pending if this is a two-address node.
895 LiveRegDefs[Reg] = SU;
896
897 // Update LiveRegGen only if was empty before this unscheduling.
898 // This is to avoid incorrect updating LiveRegGen set in previous run.
899 if (!LiveRegGens[Reg]) {
900 // Find the successor with the lowest height.
901 LiveRegGens[Reg] = Succ.getSUnit();
902 for (auto &Succ2 : SU->Succs) {
903 if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg &&
904 Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight())
905 LiveRegGens[Reg] = Succ2.getSUnit();
906 }
907 }
908 }
909 }
910 if (SU->getHeight() < MinAvailableCycle)
911 MinAvailableCycle = SU->getHeight();
912
913 SU->setHeightDirty();
914 SU->isScheduled = false;
915 SU->isAvailable = true;
916 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
917 // Don't make available until backtracking is complete.
918 SU->isPending = true;
919 PendingQueue.push_back(SU);
920 }
921 else {
922 AvailableQueue->push(SU);
923 }
924 AvailableQueue->unscheduledNode(SU);
925}
926
927/// After backtracking, the hazard checker needs to be restored to a state
928/// corresponding the current cycle.
929void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
930 HazardRec->Reset();
931
932 unsigned LookAhead = std::min((unsigned)Sequence.size(),
933 HazardRec->getMaxLookAhead());
934 if (LookAhead == 0)
935 return;
936
937 std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
938 unsigned HazardCycle = (*I)->getHeight();
939 for (auto E = Sequence.end(); I != E; ++I) {
940 SUnit *SU = *I;
941 for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
942 HazardRec->RecedeCycle();
943 }
944 EmitNode(SU);
945 }
946}
947
948/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
949/// BTCycle in order to schedule a specific node.
950void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
951 SUnit *OldSU = Sequence.back();
952 while (true) {
953 Sequence.pop_back();
954 // FIXME: use ready cycle instead of height
955 CurCycle = OldSU->getHeight();
956 UnscheduleNodeBottomUp(OldSU);
957 AvailableQueue->setCurCycle(CurCycle);
958 if (OldSU == BtSU)
959 break;
960 OldSU = Sequence.back();
961 }
962
963 assert(!SU->isSucc(OldSU) && "Something is wrong!");
964
965 RestoreHazardCheckerBottomUp();
966
967 ReleasePending();
968
969 ++NumBacktracks;
970}
971
972static bool isOperandOf(const SUnit *SU, SDNode *N) {
973 for (const SDNode *SUNode = SU->getNode(); SUNode;
974 SUNode = SUNode->getGluedNode()) {
975 if (SUNode->isOperandOf(N))
976 return true;
977 }
978 return false;
979}
980
981/// TryUnfold - Attempt to unfold
982SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
983 SDNode *N = SU->getNode();
984 // Use while over if to ease fall through.
986 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
987 return nullptr;
988
989 // unfolding an x86 DEC64m operation results in store, dec, load which
990 // can't be handled here so quit
991 if (NewNodes.size() == 3)
992 return nullptr;
993
994 assert(NewNodes.size() == 2 && "Expected a load folding node!");
995
996 N = NewNodes[1];
997 SDNode *LoadNode = NewNodes[0];
998 unsigned NumVals = N->getNumValues();
999 unsigned OldNumVals = SU->getNode()->getNumValues();
1000
1001 // LoadNode may already exist. This can happen when there is another
1002 // load from the same location and producing the same type of value
1003 // but it has different alignment or volatileness.
1004 bool isNewLoad = true;
1005 SUnit *LoadSU;
1006 if (LoadNode->getNodeId() != -1) {
1007 LoadSU = &SUnits[LoadNode->getNodeId()];
1008 // If LoadSU has already been scheduled, we should clone it but
1009 // this would negate the benefit to unfolding so just return SU.
1010 if (LoadSU->isScheduled)
1011 return SU;
1012 isNewLoad = false;
1013 } else {
1014 LoadSU = CreateNewSUnit(LoadNode);
1015 LoadNode->setNodeId(LoadSU->NodeNum);
1016
1017 InitNumRegDefsLeft(LoadSU);
1018 computeLatency(LoadSU);
1019 }
1020
1021 bool isNewN = true;
1022 SUnit *NewSU;
1023 // This can only happen when isNewLoad is false.
1024 if (N->getNodeId() != -1) {
1025 NewSU = &SUnits[N->getNodeId()];
1026 // If NewSU has already been scheduled, we need to clone it, but this
1027 // negates the benefit to unfolding so just return SU.
1028 if (NewSU->isScheduled) {
1029 return SU;
1030 }
1031 isNewN = false;
1032 } else {
1033 NewSU = CreateNewSUnit(N);
1034 N->setNodeId(NewSU->NodeNum);
1035
1036 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1037 for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1038 if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1039 NewSU->isTwoAddress = true;
1040 break;
1041 }
1042 }
1043 if (MCID.isCommutable())
1044 NewSU->isCommutable = true;
1045
1046 InitNumRegDefsLeft(NewSU);
1047 computeLatency(NewSU);
1048 }
1049
1050 LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
1051
1052 // Now that we are committed to unfolding replace DAG Uses.
1053 for (unsigned i = 0; i != NumVals; ++i)
1054 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
1055 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
1056 SDValue(LoadNode, 1));
1057
1058 // Record all the edges to and from the old SU, by category.
1059 SmallVector<SDep, 4> ChainPreds;
1060 SmallVector<SDep, 4> ChainSuccs;
1061 SmallVector<SDep, 4> LoadPreds;
1062 SmallVector<SDep, 4> NodePreds;
1063 SmallVector<SDep, 4> NodeSuccs;
1064 for (SDep &Pred : SU->Preds) {
1065 if (Pred.isCtrl())
1066 ChainPreds.push_back(Pred);
1067 else if (isOperandOf(Pred.getSUnit(), LoadNode))
1068 LoadPreds.push_back(Pred);
1069 else
1070 NodePreds.push_back(Pred);
1071 }
1072 for (SDep &Succ : SU->Succs) {
1073 if (Succ.isCtrl())
1074 ChainSuccs.push_back(Succ);
1075 else
1076 NodeSuccs.push_back(Succ);
1077 }
1078
1079 // Now assign edges to the newly-created nodes.
1080 for (const SDep &Pred : ChainPreds) {
1081 RemovePred(SU, Pred);
1082 if (isNewLoad)
1083 AddPredQueued(LoadSU, Pred);
1084 }
1085 for (const SDep &Pred : LoadPreds) {
1086 RemovePred(SU, Pred);
1087 if (isNewLoad)
1088 AddPredQueued(LoadSU, Pred);
1089 }
1090 for (const SDep &Pred : NodePreds) {
1091 RemovePred(SU, Pred);
1092 AddPredQueued(NewSU, Pred);
1093 }
1094 for (SDep &D : NodeSuccs) {
1095 SUnit *SuccDep = D.getSUnit();
1096 D.setSUnit(SU);
1097 RemovePred(SuccDep, D);
1098 D.setSUnit(NewSU);
1099 AddPredQueued(SuccDep, D);
1100 // Balance register pressure.
1101 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
1102 !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1103 --NewSU->NumRegDefsLeft;
1104 }
1105 for (SDep &D : ChainSuccs) {
1106 SUnit *SuccDep = D.getSUnit();
1107 D.setSUnit(SU);
1108 RemovePred(SuccDep, D);
1109 if (isNewLoad) {
1110 D.setSUnit(LoadSU);
1111 AddPredQueued(SuccDep, D);
1112 }
1113 }
1114
1115 // Add a data dependency to reflect that NewSU reads the value defined
1116 // by LoadSU.
1117 SDep D(LoadSU, SDep::Data, 0);
1118 D.setLatency(LoadSU->Latency);
1119 AddPredQueued(NewSU, D);
1120
1121 if (isNewLoad)
1122 AvailableQueue->addNode(LoadSU);
1123 if (isNewN)
1124 AvailableQueue->addNode(NewSU);
1125
1126 ++NumUnfolds;
1127
1128 if (NewSU->NumSuccsLeft == 0)
1129 NewSU->isAvailable = true;
1130
1131 return NewSU;
1132}
1133
1134/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
1135/// successors to the newly created node.
1136SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
1137 SDNode *N = SU->getNode();
1138 if (!N)
1139 return nullptr;
1140
1141 LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n");
1142 LLVM_DEBUG(dumpNode(*SU));
1143
1144 if (N->getGluedNode() &&
1145 !TII->canCopyGluedNodeDuringSchedule(N)) {
1146 LLVM_DEBUG(
1147 dbgs()
1148 << "Giving up because it has incoming glue and the target does not "
1149 "want to copy it\n");
1150 return nullptr;
1151 }
1152
1153 SUnit *NewSU;
1154 bool TryUnfold = false;
1155 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1156 MVT VT = N->getSimpleValueType(i);
1157 if (VT == MVT::Glue) {
1158 LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n");
1159 return nullptr;
1160 } else if (VT == MVT::Other)
1161 TryUnfold = true;
1162 }
1163 for (const SDValue &Op : N->op_values()) {
1164 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
1165 if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) {
1166 LLVM_DEBUG(
1167 dbgs() << "Giving up because it one of the operands is glue and "
1168 "the target does not want to copy it\n");
1169 return nullptr;
1170 }
1171 }
1172
1173 // If possible unfold instruction.
1174 if (TryUnfold) {
1175 SUnit *UnfoldSU = TryUnfoldSU(SU);
1176 if (!UnfoldSU)
1177 return nullptr;
1178 SU = UnfoldSU;
1179 N = SU->getNode();
1180 // If this can be scheduled don't bother duplicating and just return
1181 if (SU->NumSuccsLeft == 0)
1182 return SU;
1183 }
1184
1185 LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
1186 NewSU = CreateClone(SU);
1187
1188 // New SUnit has the exact same predecessors.
1189 for (SDep &Pred : SU->Preds)
1190 if (!Pred.isArtificial())
1191 AddPredQueued(NewSU, Pred);
1192
1193 // Make sure the clone comes after the original. (InstrEmitter assumes
1194 // this ordering.)
1195 AddPredQueued(NewSU, SDep(SU, SDep::Artificial));
1196
1197 // Only copy scheduled successors. Cut them from old node's successor
1198 // list and move them over.
1200 for (SDep &Succ : SU->Succs) {
1201 if (Succ.isArtificial())
1202 continue;
1203 SUnit *SuccSU = Succ.getSUnit();
1204 if (SuccSU->isScheduled) {
1205 SDep D = Succ;
1206 D.setSUnit(NewSU);
1207 AddPredQueued(SuccSU, D);
1208 D.setSUnit(SU);
1209 DelDeps.emplace_back(SuccSU, D);
1210 }
1211 }
1212 for (const auto &[DelSU, DelD] : DelDeps)
1213 RemovePred(DelSU, DelD);
1214
1215 AvailableQueue->updateNode(SU);
1216 AvailableQueue->addNode(NewSU);
1217
1218 ++NumDups;
1219 return NewSU;
1220}
1221
1222/// InsertCopiesAndMoveSuccs - Insert register copies and move all
1223/// scheduled successors of the given SUnit to the last copy.
1224void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1225 const TargetRegisterClass *DestRC,
1226 const TargetRegisterClass *SrcRC,
1228 SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1229 CopyFromSU->CopySrcRC = SrcRC;
1230 CopyFromSU->CopyDstRC = DestRC;
1231
1232 SUnit *CopyToSU = CreateNewSUnit(nullptr);
1233 CopyToSU->CopySrcRC = DestRC;
1234 CopyToSU->CopyDstRC = SrcRC;
1235
1236 // Only copy scheduled successors. Cut them from old node's successor
1237 // list and move them over.
1239 for (SDep &Succ : SU->Succs) {
1240 if (Succ.isArtificial())
1241 continue;
1242 SUnit *SuccSU = Succ.getSUnit();
1243 if (SuccSU->isScheduled) {
1244 SDep D = Succ;
1245 D.setSUnit(CopyToSU);
1246 AddPredQueued(SuccSU, D);
1247 DelDeps.emplace_back(SuccSU, Succ);
1248 }
1249 else {
1250 // Avoid scheduling the def-side copy before other successors. Otherwise,
1251 // we could introduce another physreg interference on the copy and
1252 // continue inserting copies indefinitely.
1253 AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1254 }
1255 }
1256 for (const auto &[DelSU, DelD] : DelDeps)
1257 RemovePred(DelSU, DelD);
1258
1259 SDep FromDep(SU, SDep::Data, Reg);
1260 FromDep.setLatency(SU->Latency);
1261 AddPredQueued(CopyFromSU, FromDep);
1262 SDep ToDep(CopyFromSU, SDep::Data, 0);
1263 ToDep.setLatency(CopyFromSU->Latency);
1264 AddPredQueued(CopyToSU, ToDep);
1265
1266 AvailableQueue->updateNode(SU);
1267 AvailableQueue->addNode(CopyFromSU);
1268 AvailableQueue->addNode(CopyToSU);
1269 Copies.push_back(CopyFromSU);
1270 Copies.push_back(CopyToSU);
1271
1272 ++NumPRCopies;
1273}
1274
1275/// getPhysicalRegisterVT - Returns the ValueType of the physical register
1276/// definition of the specified node.
1277/// FIXME: Move to SelectionDAG?
1278static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1279 const TargetInstrInfo *TII) {
1280 unsigned NumRes;
1281 if (N->getOpcode() == ISD::CopyFromReg) {
1282 // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1283 NumRes = 1;
1284 } else {
1285 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1286 assert(!MCID.implicit_defs().empty() &&
1287 "Physical reg def must be in implicit def list!");
1288 NumRes = MCID.getNumDefs();
1289 for (MCPhysReg ImpDef : MCID.implicit_defs()) {
1290 if (Reg == ImpDef)
1291 break;
1292 ++NumRes;
1293 }
1294 }
1295 return N->getSimpleValueType(NumRes);
1296}
1297
1298/// CheckForLiveRegDef - Return true and update live register vector if the
1299/// specified register def of the specified SUnit clobbers any "live" registers.
1300static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs,
1301 SmallSet<unsigned, 4> &RegAdded,
1303 const TargetRegisterInfo *TRI,
1304 const SDNode *Node = nullptr) {
1305 for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1306
1307 // Check if Ref is live.
1308 if (!LiveRegDefs[*AliasI]) continue;
1309
1310 // Allow multiple uses of the same def.
1311 if (LiveRegDefs[*AliasI] == SU) continue;
1312
1313 // Allow multiple uses of same def
1314 if (Node && LiveRegDefs[*AliasI]->getNode() == Node)
1315 continue;
1316
1317 // Add Reg to the set of interfering live regs.
1318 if (RegAdded.insert(*AliasI).second) {
1319 LRegs.push_back(*AliasI);
1320 }
1321 }
1322}
1323
1324/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1325/// by RegMask, and add them to LRegs.
1326static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1327 ArrayRef<SUnit*> LiveRegDefs,
1328 SmallSet<unsigned, 4> &RegAdded,
1330 // Look at all live registers. Skip Reg0 and the special CallResource.
1331 for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1332 if (!LiveRegDefs[i]) continue;
1333 if (LiveRegDefs[i] == SU) continue;
1334 if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1335 if (RegAdded.insert(i).second)
1336 LRegs.push_back(i);
1337 }
1338}
1339
1340/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1341static const uint32_t *getNodeRegMask(const SDNode *N) {
1342 for (const SDValue &Op : N->op_values())
1343 if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode()))
1344 return RegOp->getRegMask();
1345 return nullptr;
1346}
1347
1348/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1349/// scheduling of the given node to satisfy live physical register dependencies.
1350/// If the specific node is the last one that's available to schedule, do
1351/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1352bool ScheduleDAGRRList::
1353DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1354 if (NumLiveRegs == 0)
1355 return false;
1356
1357 SmallSet<unsigned, 4> RegAdded;
1358 // If this node would clobber any "live" register, then it's not ready.
1359 //
1360 // If SU is the currently live definition of the same register that it uses,
1361 // then we are free to schedule it.
1362 for (SDep &Pred : SU->Preds) {
1363 if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU)
1364 CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(),
1365 RegAdded, LRegs, TRI);
1366 }
1367
1368 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1369 if (Node->getOpcode() == ISD::INLINEASM ||
1370 Node->getOpcode() == ISD::INLINEASM_BR) {
1371 // Inline asm can clobber physical defs.
1372 unsigned NumOps = Node->getNumOperands();
1373 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1374 --NumOps; // Ignore the glue operand.
1375
1376 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1377 unsigned Flags =
1378 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1379 const InlineAsm::Flag F(Flags);
1380 unsigned NumVals = F.getNumOperandRegisters();
1381
1382 ++i; // Skip the ID value.
1383 if (F.isRegDefKind() || F.isRegDefEarlyClobberKind() ||
1384 F.isClobberKind()) {
1385 // Check for def of register or earlyclobber register.
1386 for (; NumVals; --NumVals, ++i) {
1387 Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1388 if (Reg.isPhysical())
1389 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1390 }
1391 } else
1392 i += NumVals;
1393 }
1394 continue;
1395 }
1396
1397 if (Node->getOpcode() == ISD::CopyToReg) {
1398 Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
1399 if (Reg.isPhysical()) {
1400 SDNode *SrcNode = Node->getOperand(2).getNode();
1401 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI,
1402 SrcNode);
1403 }
1404 }
1405
1406 if (!Node->isMachineOpcode())
1407 continue;
1408 // If we're in the middle of scheduling a call, don't begin scheduling
1409 // another call. Also, don't allow any physical registers to be live across
1410 // the call.
1411 if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
1412 // Check the special calling-sequence resource.
1413 unsigned CallResource = TRI->getNumRegs();
1414 if (LiveRegDefs[CallResource]) {
1415 SDNode *Gen = LiveRegGens[CallResource]->getNode();
1416 while (SDNode *Glued = Gen->getGluedNode())
1417 Gen = Glued;
1418 if (!IsChainDependent(Gen, Node, 0, TII) &&
1419 RegAdded.insert(CallResource).second)
1420 LRegs.push_back(CallResource);
1421 }
1422 }
1423 if (const uint32_t *RegMask = getNodeRegMask(Node))
1424 CheckForLiveRegDefMasked(SU, RegMask,
1425 ArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
1426 RegAdded, LRegs);
1427
1428 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1429 if (MCID.hasOptionalDef()) {
1430 // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit.
1431 // This operand can be either a def of CPSR, if the S bit is set; or a use
1432 // of %noreg. When the OptionalDef is set to a valid register, we need to
1433 // handle it in the same way as an ImplicitDef.
1434 for (unsigned i = 0; i < MCID.getNumDefs(); ++i)
1435 if (MCID.operands()[i].isOptionalDef()) {
1436 const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues());
1437 Register Reg = cast<RegisterSDNode>(OptionalDef)->getReg();
1438 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1439 }
1440 }
1441 for (MCPhysReg Reg : MCID.implicit_defs())
1442 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
1443 }
1444
1445 return !LRegs.empty();
1446}
1447
1448void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1449 // Add the nodes that aren't ready back onto the available list.
1450 for (unsigned i = Interferences.size(); i > 0; --i) {
1451 SUnit *SU = Interferences[i-1];
1452 LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1453 if (Reg) {
1454 SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1455 if (!is_contained(LRegs, Reg))
1456 continue;
1457 }
1458 SU->isPending = false;
1459 // The interfering node may no longer be available due to backtracking.
1460 // Furthermore, it may have been made available again, in which case it is
1461 // now already in the AvailableQueue.
1462 if (SU->isAvailable && !SU->NodeQueueId) {
1463 LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n');
1464 AvailableQueue->push(SU);
1465 }
1466 if (i < Interferences.size())
1467 Interferences[i-1] = Interferences.back();
1468 Interferences.pop_back();
1469 LRegsMap.erase(LRegsPos);
1470 }
1471}
1472
1473/// Return a node that can be scheduled in this cycle. Requirements:
1474/// (1) Ready: latency has been satisfied
1475/// (2) No Hazards: resources are available
1476/// (3) No Interferences: may unschedule to break register interferences.
1477SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1478 SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1479 auto FindAvailableNode = [&]() {
1480 while (CurSU) {
1482 if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1483 break;
1484 LLVM_DEBUG(dbgs() << " Interfering reg ";
1485 if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource";
1486 else dbgs() << printReg(LRegs[0], TRI);
1487 dbgs() << " SU #" << CurSU->NodeNum << '\n');
1488 auto [LRegsIter, LRegsInserted] = LRegsMap.try_emplace(CurSU, LRegs);
1489 if (LRegsInserted) {
1490 CurSU->isPending = true; // This SU is not in AvailableQueue right now.
1491 Interferences.push_back(CurSU);
1492 }
1493 else {
1494 assert(CurSU->isPending && "Interferences are pending");
1495 // Update the interference with current live regs.
1496 LRegsIter->second = LRegs;
1497 }
1498 CurSU = AvailableQueue->pop();
1499 }
1500 };
1501 FindAvailableNode();
1502 if (CurSU)
1503 return CurSU;
1504
1505 // We query the topological order in the loop body, so make sure outstanding
1506 // updates are applied before entering it (we only enter the loop if there
1507 // are some interferences). If we make changes to the ordering, we exit
1508 // the loop.
1509
1510 // All candidates are delayed due to live physical reg dependencies.
1511 // Try backtracking, code duplication, or inserting cross class copies
1512 // to resolve it.
1513 for (SUnit *TrySU : Interferences) {
1514 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1515
1516 // Try unscheduling up to the point where it's safe to schedule
1517 // this node.
1518 SUnit *BtSU = nullptr;
1519 unsigned LiveCycle = std::numeric_limits<unsigned>::max();
1520 for (unsigned Reg : LRegs) {
1521 if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1522 BtSU = LiveRegGens[Reg];
1523 LiveCycle = BtSU->getHeight();
1524 }
1525 }
1526 if (!WillCreateCycle(TrySU, BtSU)) {
1527 // BacktrackBottomUp mutates Interferences!
1528 BacktrackBottomUp(TrySU, BtSU);
1529
1530 // Force the current node to be scheduled before the node that
1531 // requires the physical reg dep.
1532 if (BtSU->isAvailable) {
1533 BtSU->isAvailable = false;
1534 if (!BtSU->isPending)
1535 AvailableQueue->remove(BtSU);
1536 }
1537 LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
1538 << ") to SU(" << TrySU->NodeNum << ")\n");
1539 AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial));
1540
1541 // If one or more successors has been unscheduled, then the current
1542 // node is no longer available.
1543 if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
1544 LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
1545 CurSU = AvailableQueue->pop();
1546 } else {
1547 LLVM_DEBUG(dbgs() << "TrySU available\n");
1548 // Available and in AvailableQueue
1549 AvailableQueue->remove(TrySU);
1550 CurSU = TrySU;
1551 }
1552 FindAvailableNode();
1553 // Interferences has been mutated. We must break.
1554 break;
1555 }
1556 }
1557
1558 if (!CurSU) {
1559 // Can't backtrack. If it's too expensive to copy the value, then try
1560 // duplicate the nodes that produces these "too expensive to copy"
1561 // values to break the dependency. In case even that doesn't work,
1562 // insert cross class copies.
1563 // If it's not too expensive, i.e. cost != -1, issue copies.
1564 SUnit *TrySU = Interferences[0];
1565 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1566 assert(LRegs.size() == 1 && "Can't handle this yet!");
1567 unsigned Reg = LRegs[0];
1568 SUnit *LRDef = LiveRegDefs[Reg];
1569 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1570 const TargetRegisterClass *RC =
1571 TRI->getMinimalPhysRegClass(Reg, VT);
1572 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1573
1574 // If cross copy register class is the same as RC, then it must be possible
1575 // copy the value directly. Do not try duplicate the def.
1576 // If cross copy register class is not the same as RC, then it's possible to
1577 // copy the value but it require cross register class copies and it is
1578 // expensive.
1579 // If cross copy register class is null, then it's not possible to copy
1580 // the value at all.
1581 SUnit *NewDef = nullptr;
1582 if (DestRC != RC) {
1583 NewDef = CopyAndMoveSuccessors(LRDef);
1584 if (!DestRC && !NewDef)
1585 report_fatal_error("Can't handle live physical register dependency!");
1586 }
1587 if (!NewDef) {
1588 // Issue copies, these can be expensive cross register class copies.
1590 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1591 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
1592 << " to SU #" << Copies.front()->NodeNum << "\n");
1593 AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial));
1594 NewDef = Copies.back();
1595 }
1596
1597 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
1598 << " to SU #" << TrySU->NodeNum << "\n");
1599 LiveRegDefs[Reg] = NewDef;
1600 AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial));
1601 TrySU->isAvailable = false;
1602 CurSU = NewDef;
1603 }
1604 assert(CurSU && "Unable to resolve live physical register dependencies!");
1605 return CurSU;
1606}
1607
1608/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1609/// schedulers.
1610void ScheduleDAGRRList::ListScheduleBottomUp() {
1611 // Release any predecessors of the special Exit node.
1612 ReleasePredecessors(&ExitSU);
1613
1614 // Add root to Available queue.
1615 if (!SUnits.empty()) {
1616 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1617 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1618 RootSU->isAvailable = true;
1619 AvailableQueue->push(RootSU);
1620 }
1621
1622 // While Available queue is not empty, grab the node with the highest
1623 // priority. If it is not ready put it back. Schedule the node.
1624 Sequence.reserve(SUnits.size());
1625 while (!AvailableQueue->empty() || !Interferences.empty()) {
1626 LLVM_DEBUG(dbgs() << "\nExamining Available:\n";
1627 AvailableQueue->dump(this));
1628
1629 // Pick the best node to schedule taking all constraints into
1630 // consideration.
1631 SUnit *SU = PickNodeToScheduleBottomUp();
1632
1633 AdvancePastStalls(SU);
1634
1635 ScheduleNodeBottomUp(SU);
1636
1637 while (AvailableQueue->empty() && !PendingQueue.empty()) {
1638 // Advance the cycle to free resources. Skip ahead to the next ready SU.
1639 assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
1640 "MinAvailableCycle uninitialized");
1641 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1642 }
1643 }
1644
1645 // Reverse the order if it is bottom up.
1646 std::reverse(Sequence.begin(), Sequence.end());
1647
1648#ifndef NDEBUG
1649 VerifyScheduledSequence(/*isBottomUp=*/true);
1650#endif
1651}
1652
1653namespace {
1654
1655class RegReductionPQBase;
1656
1657struct queue_sort {
1658 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1659};
1660
1661#ifndef NDEBUG
1662template<class SF>
1663struct reverse_sort : public queue_sort {
1664 SF &SortFunc;
1665
1666 reverse_sort(SF &sf) : SortFunc(sf) {}
1667
1668 bool operator()(SUnit* left, SUnit* right) const {
1669 // reverse left/right rather than simply !SortFunc(left, right)
1670 // to expose different paths in the comparison logic.
1671 return SortFunc(right, left);
1672 }
1673};
1674#endif // NDEBUG
1675
1676/// bu_ls_rr_sort - Priority function for bottom up register pressure
1677// reduction scheduler.
1678struct bu_ls_rr_sort : public queue_sort {
1679 enum {
1680 IsBottomUp = true,
1681 HasReadyFilter = false
1682 };
1683
1684 RegReductionPQBase *SPQ;
1685
1686 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1687
1688 bool operator()(SUnit* left, SUnit* right) const;
1689};
1690
1691// src_ls_rr_sort - Priority function for source order scheduler.
1692struct src_ls_rr_sort : public queue_sort {
1693 enum {
1694 IsBottomUp = true,
1695 HasReadyFilter = false
1696 };
1697
1698 RegReductionPQBase *SPQ;
1699
1700 src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1701
1702 bool operator()(SUnit* left, SUnit* right) const;
1703};
1704
1705// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1706struct hybrid_ls_rr_sort : public queue_sort {
1707 enum {
1708 IsBottomUp = true,
1709 HasReadyFilter = false
1710 };
1711
1712 RegReductionPQBase *SPQ;
1713
1714 hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1715
1716 bool isReady(SUnit *SU, unsigned CurCycle) const;
1717
1718 bool operator()(SUnit* left, SUnit* right) const;
1719};
1720
1721// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1722// scheduler.
1723struct ilp_ls_rr_sort : public queue_sort {
1724 enum {
1725 IsBottomUp = true,
1726 HasReadyFilter = false
1727 };
1728
1729 RegReductionPQBase *SPQ;
1730
1731 ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1732
1733 bool isReady(SUnit *SU, unsigned CurCycle) const;
1734
1735 bool operator()(SUnit* left, SUnit* right) const;
1736};
1737
1738class RegReductionPQBase : public SchedulingPriorityQueue {
1739protected:
1740 std::vector<SUnit *> Queue;
1741 unsigned CurQueueId = 0;
1742 bool TracksRegPressure;
1743 bool SrcOrder;
1744
1745 // SUnits - The SUnits for the current graph.
1746 std::vector<SUnit> *SUnits = nullptr;
1747
1748 MachineFunction &MF;
1749 const TargetInstrInfo *TII = nullptr;
1750 const TargetRegisterInfo *TRI = nullptr;
1751 const TargetLowering *TLI = nullptr;
1752 ScheduleDAGRRList *scheduleDAG = nullptr;
1753
1754 // SethiUllmanNumbers - The SethiUllman number for each node.
1755 std::vector<unsigned> SethiUllmanNumbers;
1756
1757 /// RegPressure - Tracking current reg pressure per register class.
1758 std::vector<unsigned> RegPressure;
1759
1760 /// RegLimit - Tracking the number of allocatable registers per register
1761 /// class.
1762 std::vector<unsigned> RegLimit;
1763
1764public:
1765 RegReductionPQBase(MachineFunction &mf,
1766 bool hasReadyFilter,
1767 bool tracksrp,
1768 bool srcorder,
1769 const TargetInstrInfo *tii,
1770 const TargetRegisterInfo *tri,
1771 const TargetLowering *tli)
1772 : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
1773 SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
1774 if (TracksRegPressure) {
1775 unsigned NumRC = TRI->getNumRegClasses();
1776 RegLimit.resize(NumRC);
1777 RegPressure.resize(NumRC);
1778 std::fill(RegLimit.begin(), RegLimit.end(), 0);
1779 std::fill(RegPressure.begin(), RegPressure.end(), 0);
1780 for (const TargetRegisterClass *RC : TRI->regclasses())
1781 RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF);
1782 }
1783 }
1784
1785 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1786 scheduleDAG = scheduleDag;
1787 }
1788
1789 ScheduleHazardRecognizer* getHazardRec() {
1790 return scheduleDAG->getHazardRec();
1791 }
1792
1793 void initNodes(std::vector<SUnit> &sunits) override;
1794
1795 void addNode(const SUnit *SU) override;
1796
1797 void updateNode(const SUnit *SU) override;
1798
1799 void releaseState() override {
1800 SUnits = nullptr;
1801 SethiUllmanNumbers.clear();
1802 std::fill(RegPressure.begin(), RegPressure.end(), 0);
1803 }
1804
1805 unsigned getNodePriority(const SUnit *SU) const;
1806
1807 unsigned getNodeOrdering(const SUnit *SU) const {
1808 if (!SU->getNode()) return 0;
1809
1810 return SU->getNode()->getIROrder();
1811 }
1812
1813 bool empty() const override { return Queue.empty(); }
1814
1815 void push(SUnit *U) override {
1816 assert(!U->NodeQueueId && "Node in the queue already");
1817 U->NodeQueueId = ++CurQueueId;
1818 Queue.push_back(U);
1819 }
1820
1821 void remove(SUnit *SU) override {
1822 assert(!Queue.empty() && "Queue is empty!");
1823 assert(SU->NodeQueueId != 0 && "Not in queue!");
1824 std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
1825 if (I != std::prev(Queue.end()))
1826 std::swap(*I, Queue.back());
1827 Queue.pop_back();
1828 SU->NodeQueueId = 0;
1829 }
1830
1831 bool tracksRegPressure() const override { return TracksRegPressure; }
1832
1833 void dumpRegPressure() const;
1834
1835 bool HighRegPressure(const SUnit *SU) const;
1836
1837 bool MayReduceRegPressure(SUnit *SU) const;
1838
1839 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1840
1841 void scheduledNode(SUnit *SU) override;
1842
1843 void unscheduledNode(SUnit *SU) override;
1844
1845protected:
1846 bool canClobber(const SUnit *SU, const SUnit *Op);
1847 void AddPseudoTwoAddrDeps();
1848 void PrescheduleNodesWithMultipleUses();
1849 void CalculateSethiUllmanNumbers();
1850};
1851
1852template<class SF>
1853static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
1854 unsigned BestIdx = 0;
1855 // Only compute the cost for the first 1000 items in the queue, to avoid
1856 // excessive compile-times for very large queues.
1857 for (unsigned I = 1, E = std::min(Q.size(), (decltype(Q.size()))1000); I != E;
1858 I++)
1859 if (Picker(Q[BestIdx], Q[I]))
1860 BestIdx = I;
1861 SUnit *V = Q[BestIdx];
1862 if (BestIdx + 1 != Q.size())
1863 std::swap(Q[BestIdx], Q.back());
1864 Q.pop_back();
1865 return V;
1866}
1867
1868template<class SF>
1869SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
1870#ifndef NDEBUG
1871 if (DAG->StressSched) {
1872 reverse_sort<SF> RPicker(Picker);
1873 return popFromQueueImpl(Q, RPicker);
1874 }
1875#endif
1876 (void)DAG;
1877 return popFromQueueImpl(Q, Picker);
1878}
1879
1880//===----------------------------------------------------------------------===//
1881// RegReductionPriorityQueue Definition
1882//===----------------------------------------------------------------------===//
1883//
1884// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1885// to reduce register pressure.
1886//
1887template<class SF>
1888class RegReductionPriorityQueue : public RegReductionPQBase {
1889 SF Picker;
1890
1891public:
1892 RegReductionPriorityQueue(MachineFunction &mf,
1893 bool tracksrp,
1894 bool srcorder,
1895 const TargetInstrInfo *tii,
1896 const TargetRegisterInfo *tri,
1897 const TargetLowering *tli)
1898 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1899 tii, tri, tli),
1900 Picker(this) {}
1901
1902 bool isBottomUp() const override { return SF::IsBottomUp; }
1903
1904 bool isReady(SUnit *U) const override {
1905 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1906 }
1907
1908 SUnit *pop() override {
1909 if (Queue.empty()) return nullptr;
1910
1911 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1912 V->NodeQueueId = 0;
1913 return V;
1914 }
1915
1916#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1917 LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
1918 // Emulate pop() without clobbering NodeQueueIds.
1919 std::vector<SUnit *> DumpQueue = Queue;
1920 SF DumpPicker = Picker;
1921 while (!DumpQueue.empty()) {
1922 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1923 dbgs() << "Height " << SU->getHeight() << ": ";
1924 DAG->dumpNode(*SU);
1925 }
1926 }
1927#endif
1928};
1929
1930using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
1931using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
1932using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
1933using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
1934
1935} // end anonymous namespace
1936
1937//===----------------------------------------------------------------------===//
1938// Static Node Priority for Register Pressure Reduction
1939//===----------------------------------------------------------------------===//
1940
1941// Check for special nodes that bypass scheduling heuristics.
1942// Currently this pushes TokenFactor nodes down, but may be used for other
1943// pseudo-ops as well.
1944//
1945// Return -1 to schedule right above left, 1 for left above right.
1946// Return 0 if no bias exists.
1947static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1948 bool LSchedLow = left->isScheduleLow;
1949 bool RSchedLow = right->isScheduleLow;
1950 if (LSchedLow != RSchedLow)
1951 return LSchedLow < RSchedLow ? 1 : -1;
1952 return 0;
1953}
1954
1955/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1956/// Smaller number is the higher priority.
1957static unsigned
1958CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1959 if (SUNumbers[SU->NodeNum] != 0)
1960 return SUNumbers[SU->NodeNum];
1961
1962 // Use WorkList to avoid stack overflow on excessively large IRs.
1963 struct WorkState {
1964 WorkState(const SUnit *SU) : SU(SU) {}
1965 const SUnit *SU;
1966 unsigned PredsProcessed = 0;
1967 };
1968
1970 WorkList.push_back(SU);
1971 while (!WorkList.empty()) {
1972 auto &Temp = WorkList.back();
1973 auto *TempSU = Temp.SU;
1974 bool AllPredsKnown = true;
1975 // Try to find a non-evaluated pred and push it into the processing stack.
1976 for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) {
1977 auto &Pred = TempSU->Preds[P];
1978 if (Pred.isCtrl()) continue; // ignore chain preds
1979 SUnit *PredSU = Pred.getSUnit();
1980 if (SUNumbers[PredSU->NodeNum] == 0) {
1981#ifndef NDEBUG
1982 // In debug mode, check that we don't have such element in the stack.
1983 for (auto It : WorkList)
1984 assert(It.SU != PredSU && "Trying to push an element twice?");
1985#endif
1986 // Next time start processing this one starting from the next pred.
1987 Temp.PredsProcessed = P + 1;
1988 WorkList.push_back(PredSU);
1989 AllPredsKnown = false;
1990 break;
1991 }
1992 }
1993
1994 if (!AllPredsKnown)
1995 continue;
1996
1997 // Once all preds are known, we can calculate the answer for this one.
1998 unsigned SethiUllmanNumber = 0;
1999 unsigned Extra = 0;
2000 for (const SDep &Pred : TempSU->Preds) {
2001 if (Pred.isCtrl()) continue; // ignore chain preds
2002 SUnit *PredSU = Pred.getSUnit();
2003 unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum];
2004 assert(PredSethiUllman > 0 && "We should have evaluated this pred!");
2005 if (PredSethiUllman > SethiUllmanNumber) {
2006 SethiUllmanNumber = PredSethiUllman;
2007 Extra = 0;
2008 } else if (PredSethiUllman == SethiUllmanNumber)
2009 ++Extra;
2010 }
2011
2012 SethiUllmanNumber += Extra;
2013 if (SethiUllmanNumber == 0)
2014 SethiUllmanNumber = 1;
2015 SUNumbers[TempSU->NodeNum] = SethiUllmanNumber;
2016 WorkList.pop_back();
2017 }
2018
2019 assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!");
2020 return SUNumbers[SU->NodeNum];
2021}
2022
2023/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
2024/// scheduling units.
2025void RegReductionPQBase::CalculateSethiUllmanNumbers() {
2026 SethiUllmanNumbers.assign(SUnits->size(), 0);
2027
2028 for (const SUnit &SU : *SUnits)
2029 CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers);
2030}
2031
2032void RegReductionPQBase::addNode(const SUnit *SU) {
2033 unsigned SUSize = SethiUllmanNumbers.size();
2034 if (SUnits->size() > SUSize)
2035 SethiUllmanNumbers.resize(SUSize*2, 0);
2036 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2037}
2038
2039void RegReductionPQBase::updateNode(const SUnit *SU) {
2040 SethiUllmanNumbers[SU->NodeNum] = 0;
2041 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
2042}
2043
2044// Lower priority means schedule further down. For bottom-up scheduling, lower
2045// priority SUs are scheduled before higher priority SUs.
2046unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
2047 assert(SU->NodeNum < SethiUllmanNumbers.size());
2048 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2049 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2050 // CopyToReg should be close to its uses to facilitate coalescing and
2051 // avoid spilling.
2052 return 0;
2053 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2054 Opc == TargetOpcode::SUBREG_TO_REG ||
2055 Opc == TargetOpcode::INSERT_SUBREG)
2056 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2057 // close to their uses to facilitate coalescing.
2058 return 0;
2059 if (SU->NumSuccs == 0 && SU->NumPreds != 0)
2060 // If SU does not have a register use, i.e. it doesn't produce a value
2061 // that would be consumed (e.g. store), then it terminates a chain of
2062 // computation. Give it a large SethiUllman number so it will be
2063 // scheduled right before its predecessors that it doesn't lengthen
2064 // their live ranges.
2065 return 0xffff;
2066 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2067 // If SU does not have a register def, schedule it close to its uses
2068 // because it does not lengthen any live ranges.
2069 return 0;
2070#if 1
2071 return SethiUllmanNumbers[SU->NodeNum];
2072#else
2073 unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
2074 if (SU->isCallOp) {
2075 // FIXME: This assumes all of the defs are used as call operands.
2076 int NP = (int)Priority - SU->getNode()->getNumValues();
2077 return (NP > 0) ? NP : 0;
2078 }
2079 return Priority;
2080#endif
2081}
2082
2083//===----------------------------------------------------------------------===//
2084// Register Pressure Tracking
2085//===----------------------------------------------------------------------===//
2086
2087#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2088LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const {
2089 for (const TargetRegisterClass *RC : TRI->regclasses()) {
2090 unsigned Id = RC->getID();
2091 unsigned RP = RegPressure[Id];
2092 if (!RP) continue;
2093 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
2094 << RegLimit[Id] << '\n');
2095 }
2096}
2097#endif
2098
2099bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
2100 if (!TLI)
2101 return false;
2102
2103 for (const SDep &Pred : SU->Preds) {
2104 if (Pred.isCtrl())
2105 continue;
2106 SUnit *PredSU = Pred.getSUnit();
2107 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2108 // to cover the number of registers defined (they are all live).
2109 if (PredSU->NumRegDefsLeft == 0) {
2110 continue;
2111 }
2112 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2113 RegDefPos.IsValid(); RegDefPos.Advance()) {
2114 unsigned RCId, Cost;
2115 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2116
2117 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
2118 return true;
2119 }
2120 }
2121 return false;
2122}
2123
2124bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
2125 const SDNode *N = SU->getNode();
2126
2127 if (!N->isMachineOpcode() || !SU->NumSuccs)
2128 return false;
2129
2130 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2131 for (unsigned i = 0; i != NumDefs; ++i) {
2132 MVT VT = N->getSimpleValueType(i);
2133 if (!N->hasAnyUseOfValue(i))
2134 continue;
2135 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2136 if (RegPressure[RCId] >= RegLimit[RCId])
2137 return true;
2138 }
2139 return false;
2140}
2141
2142// Compute the register pressure contribution by this instruction by count up
2143// for uses that are not live and down for defs. Only count register classes
2144// that are already under high pressure. As a side effect, compute the number of
2145// uses of registers that are already live.
2146//
2147// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
2148// so could probably be factored.
2149int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
2150 LiveUses = 0;
2151 int PDiff = 0;
2152 for (const SDep &Pred : SU->Preds) {
2153 if (Pred.isCtrl())
2154 continue;
2155 SUnit *PredSU = Pred.getSUnit();
2156 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2157 // to cover the number of registers defined (they are all live).
2158 if (PredSU->NumRegDefsLeft == 0) {
2159 if (PredSU->getNode()->isMachineOpcode())
2160 ++LiveUses;
2161 continue;
2162 }
2163 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2164 RegDefPos.IsValid(); RegDefPos.Advance()) {
2165 MVT VT = RegDefPos.GetValue();
2166 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2167 if (RegPressure[RCId] >= RegLimit[RCId])
2168 ++PDiff;
2169 }
2170 }
2171 const SDNode *N = SU->getNode();
2172
2173 if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2174 return PDiff;
2175
2176 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2177 for (unsigned i = 0; i != NumDefs; ++i) {
2178 MVT VT = N->getSimpleValueType(i);
2179 if (!N->hasAnyUseOfValue(i))
2180 continue;
2181 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2182 if (RegPressure[RCId] >= RegLimit[RCId])
2183 --PDiff;
2184 }
2185 return PDiff;
2186}
2187
2188void RegReductionPQBase::scheduledNode(SUnit *SU) {
2189 if (!TracksRegPressure)
2190 return;
2191
2192 if (!SU->getNode())
2193 return;
2194
2195 for (const SDep &Pred : SU->Preds) {
2196 if (Pred.isCtrl())
2197 continue;
2198 SUnit *PredSU = Pred.getSUnit();
2199 // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2200 // to cover the number of registers defined (they are all live).
2201 if (PredSU->NumRegDefsLeft == 0) {
2202 continue;
2203 }
2204 // FIXME: The ScheduleDAG currently loses information about which of a
2205 // node's values is consumed by each dependence. Consequently, if the node
2206 // defines multiple register classes, we don't know which to pressurize
2207 // here. Instead the following loop consumes the register defs in an
2208 // arbitrary order. At least it handles the common case of clustered loads
2209 // to the same class. For precise liveness, each SDep needs to indicate the
2210 // result number. But that tightly couples the ScheduleDAG with the
2211 // SelectionDAG making updates tricky. A simpler hack would be to attach a
2212 // value type or register class to SDep.
2213 //
2214 // The most important aspect of register tracking is balancing the increase
2215 // here with the reduction further below. Note that this SU may use multiple
2216 // defs in PredSU. The can't be determined here, but we've already
2217 // compensated by reducing NumRegDefsLeft in PredSU during
2218 // ScheduleDAGSDNodes::AddSchedEdges.
2219 --PredSU->NumRegDefsLeft;
2220 unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2221 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2222 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2223 if (SkipRegDefs)
2224 continue;
2225
2226 unsigned RCId, Cost;
2227 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2228 RegPressure[RCId] += Cost;
2229 break;
2230 }
2231 }
2232
2233 // We should have this assert, but there may be dead SDNodes that never
2234 // materialize as SUnits, so they don't appear to generate liveness.
2235 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2236 int SkipRegDefs = (int)SU->NumRegDefsLeft;
2237 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2238 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2239 if (SkipRegDefs > 0)
2240 continue;
2241 unsigned RCId, Cost;
2242 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2243 if (RegPressure[RCId] < Cost) {
2244 // Register pressure tracking is imprecise. This can happen. But we try
2245 // hard not to let it happen because it likely results in poor scheduling.
2246 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum
2247 << ") has too many regdefs\n");
2248 RegPressure[RCId] = 0;
2249 }
2250 else {
2251 RegPressure[RCId] -= Cost;
2252 }
2253 }
2254 LLVM_DEBUG(dumpRegPressure());
2255}
2256
2257void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2258 if (!TracksRegPressure)
2259 return;
2260
2261 const SDNode *N = SU->getNode();
2262 if (!N) return;
2263
2264 if (!N->isMachineOpcode()) {
2265 if (N->getOpcode() != ISD::CopyToReg)
2266 return;
2267 } else {
2268 unsigned Opc = N->getMachineOpcode();
2269 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2270 Opc == TargetOpcode::INSERT_SUBREG ||
2271 Opc == TargetOpcode::SUBREG_TO_REG ||
2272 Opc == TargetOpcode::REG_SEQUENCE ||
2273 Opc == TargetOpcode::IMPLICIT_DEF)
2274 return;
2275 }
2276
2277 for (const SDep &Pred : SU->Preds) {
2278 if (Pred.isCtrl())
2279 continue;
2280 SUnit *PredSU = Pred.getSUnit();
2281 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2282 // counts data deps.
2283 if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2284 continue;
2285 const SDNode *PN = PredSU->getNode();
2286 if (!PN->isMachineOpcode()) {
2287 if (PN->getOpcode() == ISD::CopyFromReg) {
2288 MVT VT = PN->getSimpleValueType(0);
2289 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2290 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2291 }
2292 continue;
2293 }
2294 unsigned POpc = PN->getMachineOpcode();
2295 if (POpc == TargetOpcode::IMPLICIT_DEF)
2296 continue;
2297 if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2298 POpc == TargetOpcode::INSERT_SUBREG ||
2299 POpc == TargetOpcode::SUBREG_TO_REG) {
2300 MVT VT = PN->getSimpleValueType(0);
2301 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2302 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2303 continue;
2304 }
2305 if (POpc == TargetOpcode::REG_SEQUENCE) {
2306 unsigned DstRCIdx =
2307 cast<ConstantSDNode>(PN->getOperand(0))->getZExtValue();
2308 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
2309 unsigned RCId = RC->getID();
2310 // REG_SEQUENCE is untyped, so getRepRegClassCostFor could not be used
2311 // here. Instead use the same constant as in GetCostForDef.
2313 continue;
2314 }
2315 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2316 for (unsigned i = 0; i != NumDefs; ++i) {
2317 MVT VT = PN->getSimpleValueType(i);
2318 if (!PN->hasAnyUseOfValue(i))
2319 continue;
2320 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2321 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2322 // Register pressure tracking is imprecise. This can happen.
2323 RegPressure[RCId] = 0;
2324 else
2325 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2326 }
2327 }
2328
2329 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2330 // may transfer data dependencies to CopyToReg.
2331 if (SU->NumSuccs && N->isMachineOpcode()) {
2332 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2333 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2334 MVT VT = N->getSimpleValueType(i);
2335 if (VT == MVT::Glue || VT == MVT::Other)
2336 continue;
2337 if (!N->hasAnyUseOfValue(i))
2338 continue;
2339 unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2340 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2341 }
2342 }
2343
2344 LLVM_DEBUG(dumpRegPressure());
2345}
2346
2347//===----------------------------------------------------------------------===//
2348// Dynamic Node Priority for Register Pressure Reduction
2349//===----------------------------------------------------------------------===//
2350
2351/// closestSucc - Returns the scheduled cycle of the successor which is
2352/// closest to the current cycle.
2353static unsigned closestSucc(const SUnit *SU) {
2354 unsigned MaxHeight = 0;
2355 for (const SDep &Succ : SU->Succs) {
2356 if (Succ.isCtrl()) continue; // ignore chain succs
2357 unsigned Height = Succ.getSUnit()->getHeight();
2358 // If there are bunch of CopyToRegs stacked up, they should be considered
2359 // to be at the same position.
2360 if (Succ.getSUnit()->getNode() &&
2361 Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2362 Height = closestSucc(Succ.getSUnit())+1;
2363 if (Height > MaxHeight)
2364 MaxHeight = Height;
2365 }
2366 return MaxHeight;
2367}
2368
2369/// calcMaxScratches - Returns an cost estimate of the worse case requirement
2370/// for scratch registers, i.e. number of data dependencies.
2371static unsigned calcMaxScratches(const SUnit *SU) {
2372 unsigned Scratches = 0;
2373 for (const SDep &Pred : SU->Preds) {
2374 if (Pred.isCtrl()) continue; // ignore chain preds
2375 Scratches++;
2376 }
2377 return Scratches;
2378}
2379
2380/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2381/// CopyFromReg from a virtual register.
2382static bool hasOnlyLiveInOpers(const SUnit *SU) {
2383 bool RetVal = false;
2384 for (const SDep &Pred : SU->Preds) {
2385 if (Pred.isCtrl()) continue;
2386 const SUnit *PredSU = Pred.getSUnit();
2387 if (PredSU->getNode() &&
2388 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2389 Register Reg =
2390 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2391 if (Reg.isVirtual()) {
2392 RetVal = true;
2393 continue;
2394 }
2395 }
2396 return false;
2397 }
2398 return RetVal;
2399}
2400
2401/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2402/// CopyToReg to a virtual register. This SU def is probably a liveout and
2403/// it has no other use. It should be scheduled closer to the terminator.
2404static bool hasOnlyLiveOutUses(const SUnit *SU) {
2405 bool RetVal = false;
2406 for (const SDep &Succ : SU->Succs) {
2407 if (Succ.isCtrl()) continue;
2408 const SUnit *SuccSU = Succ.getSUnit();
2409 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2410 Register Reg =
2411 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2412 if (Reg.isVirtual()) {
2413 RetVal = true;
2414 continue;
2415 }
2416 }
2417 return false;
2418 }
2419 return RetVal;
2420}
2421
2422// Set isVRegCycle for a node with only live in opers and live out uses. Also
2423// set isVRegCycle for its CopyFromReg operands.
2424//
2425// This is only relevant for single-block loops, in which case the VRegCycle
2426// node is likely an induction variable in which the operand and target virtual
2427// registers should be coalesced (e.g. pre/post increment values). Setting the
2428// isVRegCycle flag helps the scheduler prioritize other uses of the same
2429// CopyFromReg so that this node becomes the virtual register "kill". This
2430// avoids interference between the values live in and out of the block and
2431// eliminates a copy inside the loop.
2432static void initVRegCycle(SUnit *SU) {
2434 return;
2435
2436 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2437 return;
2438
2439 LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2440
2441 SU->isVRegCycle = true;
2442
2443 for (const SDep &Pred : SU->Preds) {
2444 if (Pred.isCtrl()) continue;
2445 Pred.getSUnit()->isVRegCycle = true;
2446 }
2447}
2448
2449// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2450// CopyFromReg operands. We should no longer penalize other uses of this VReg.
2451static void resetVRegCycle(SUnit *SU) {
2452 if (!SU->isVRegCycle)
2453 return;
2454
2455 for (const SDep &Pred : SU->Preds) {
2456 if (Pred.isCtrl()) continue; // ignore chain preds
2457 SUnit *PredSU = Pred.getSUnit();
2458 if (PredSU->isVRegCycle) {
2459 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2460 "VRegCycle def must be CopyFromReg");
2461 Pred.getSUnit()->isVRegCycle = false;
2462 }
2463 }
2464}
2465
2466// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2467// means a node that defines the VRegCycle has not been scheduled yet.
2468static bool hasVRegCycleUse(const SUnit *SU) {
2469 // If this SU also defines the VReg, don't hoist it as a "use".
2470 if (SU->isVRegCycle)
2471 return false;
2472
2473 for (const SDep &Pred : SU->Preds) {
2474 if (Pred.isCtrl()) continue; // ignore chain preds
2475 if (Pred.getSUnit()->isVRegCycle &&
2476 Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2477 LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
2478 return true;
2479 }
2480 }
2481 return false;
2482}
2483
2484// Check for either a dependence (latency) or resource (hazard) stall.
2485//
2486// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2487static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2488 if ((int)SPQ->getCurCycle() < Height) return true;
2489 if (SPQ->getHazardRec()->getHazardType(SU, 0)
2491 return true;
2492 return false;
2493}
2494
2495// Return -1 if left has higher priority, 1 if right has higher priority.
2496// Return 0 if latency-based priority is equivalent.
2497static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2498 RegReductionPQBase *SPQ) {
2499 // Scheduling an instruction that uses a VReg whose postincrement has not yet
2500 // been scheduled will induce a copy. Model this as an extra cycle of latency.
2501 int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2502 int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2503 int LHeight = (int)left->getHeight() + LPenalty;
2504 int RHeight = (int)right->getHeight() + RPenalty;
2505
2506 bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2507 BUHasStall(left, LHeight, SPQ);
2508 bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2509 BUHasStall(right, RHeight, SPQ);
2510
2511 // If scheduling one of the node will cause a pipeline stall, delay it.
2512 // If scheduling either one of the node will cause a pipeline stall, sort
2513 // them according to their height.
2514 if (LStall) {
2515 if (!RStall)
2516 return 1;
2517 if (LHeight != RHeight)
2518 return LHeight > RHeight ? 1 : -1;
2519 } else if (RStall)
2520 return -1;
2521
2522 // If either node is scheduling for latency, sort them by height/depth
2523 // and latency.
2524 if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2525 right->SchedulingPref == Sched::ILP)) {
2526 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2527 // is enabled, grouping instructions by cycle, then its height is already
2528 // covered so only its depth matters. We also reach this point if both stall
2529 // but have the same height.
2530 if (!SPQ->getHazardRec()->isEnabled()) {
2531 if (LHeight != RHeight)
2532 return LHeight > RHeight ? 1 : -1;
2533 }
2534 int LDepth = left->getDepth() - LPenalty;
2535 int RDepth = right->getDepth() - RPenalty;
2536 if (LDepth != RDepth) {
2537 LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
2538 << ") depth " << LDepth << " vs SU (" << right->NodeNum
2539 << ") depth " << RDepth << "\n");
2540 return LDepth < RDepth ? 1 : -1;
2541 }
2542 if (left->Latency != right->Latency)
2543 return left->Latency > right->Latency ? 1 : -1;
2544 }
2545 return 0;
2546}
2547
2548static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2549 // Schedule physical register definitions close to their use. This is
2550 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2551 // long as shortening physreg live ranges is generally good, we can defer
2552 // creating a subtarget hook.
2554 bool LHasPhysReg = left->hasPhysRegDefs;
2555 bool RHasPhysReg = right->hasPhysRegDefs;
2556 if (LHasPhysReg != RHasPhysReg) {
2557 #ifndef NDEBUG
2558 static const char *const PhysRegMsg[] = { " has no physreg",
2559 " defines a physreg" };
2560 #endif
2561 LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") "
2562 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum
2563 << ") " << PhysRegMsg[RHasPhysReg] << "\n");
2564 return LHasPhysReg < RHasPhysReg;
2565 }
2566 }
2567
2568 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2569 unsigned LPriority = SPQ->getNodePriority(left);
2570 unsigned RPriority = SPQ->getNodePriority(right);
2571
2572 // Be really careful about hoisting call operands above previous calls.
2573 // Only allows it if it would reduce register pressure.
2574 if (left->isCall && right->isCallOp) {
2575 unsigned RNumVals = right->getNode()->getNumValues();
2576 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2577 }
2578 if (right->isCall && left->isCallOp) {
2579 unsigned LNumVals = left->getNode()->getNumValues();
2580 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2581 }
2582
2583 if (LPriority != RPriority)
2584 return LPriority > RPriority;
2585
2586 // One or both of the nodes are calls and their sethi-ullman numbers are the
2587 // same, then keep source order.
2588 if (left->isCall || right->isCall) {
2589 unsigned LOrder = SPQ->getNodeOrdering(left);
2590 unsigned ROrder = SPQ->getNodeOrdering(right);
2591
2592 // Prefer an ordering where the lower the non-zero order number, the higher
2593 // the preference.
2594 if ((LOrder || ROrder) && LOrder != ROrder)
2595 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2596 }
2597
2598 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2599 // e.g.
2600 // t1 = op t2, c1
2601 // t3 = op t4, c2
2602 //
2603 // and the following instructions are both ready.
2604 // t2 = op c3
2605 // t4 = op c4
2606 //
2607 // Then schedule t2 = op first.
2608 // i.e.
2609 // t4 = op c4
2610 // t2 = op c3
2611 // t1 = op t2, c1
2612 // t3 = op t4, c2
2613 //
2614 // This creates more short live intervals.
2615 unsigned LDist = closestSucc(left);
2616 unsigned RDist = closestSucc(right);
2617 if (LDist != RDist)
2618 return LDist < RDist;
2619
2620 // How many registers becomes live when the node is scheduled.
2621 unsigned LScratch = calcMaxScratches(left);
2622 unsigned RScratch = calcMaxScratches(right);
2623 if (LScratch != RScratch)
2624 return LScratch > RScratch;
2625
2626 // Comparing latency against a call makes little sense unless the node
2627 // is register pressure-neutral.
2628 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2629 return (left->NodeQueueId > right->NodeQueueId);
2630
2631 // Do not compare latencies when one or both of the nodes are calls.
2632 if (!DisableSchedCycles &&
2633 !(left->isCall || right->isCall)) {
2634 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2635 if (result != 0)
2636 return result > 0;
2637 }
2638 else {
2639 if (left->getHeight() != right->getHeight())
2640 return left->getHeight() > right->getHeight();
2641
2642 if (left->getDepth() != right->getDepth())
2643 return left->getDepth() < right->getDepth();
2644 }
2645
2646 assert(left->NodeQueueId && right->NodeQueueId &&
2647 "NodeQueueId cannot be zero");
2648 return (left->NodeQueueId > right->NodeQueueId);
2649}
2650
2651// Bottom up
2652bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2653 if (int res = checkSpecialNodes(left, right))
2654 return res > 0;
2655
2656 return BURRSort(left, right, SPQ);
2657}
2658
2659// Source order, otherwise bottom up.
2660bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2661 if (int res = checkSpecialNodes(left, right))
2662 return res > 0;
2663
2664 unsigned LOrder = SPQ->getNodeOrdering(left);
2665 unsigned ROrder = SPQ->getNodeOrdering(right);
2666
2667 // Prefer an ordering where the lower the non-zero order number, the higher
2668 // the preference.
2669 if ((LOrder || ROrder) && LOrder != ROrder)
2670 return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2671
2672 return BURRSort(left, right, SPQ);
2673}
2674
2675// If the time between now and when the instruction will be ready can cover
2676// the spill code, then avoid adding it to the ready queue. This gives long
2677// stalls highest priority and allows hoisting across calls. It should also
2678// speed up processing the available queue.
2679bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2680 static const unsigned ReadyDelay = 3;
2681
2682 if (SPQ->MayReduceRegPressure(SU)) return true;
2683
2684 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2685
2686 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2688 return false;
2689
2690 return true;
2691}
2692
2693// Return true if right should be scheduled with higher priority than left.
2694bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2695 if (int res = checkSpecialNodes(left, right))
2696 return res > 0;
2697
2698 if (left->isCall || right->isCall)
2699 // No way to compute latency of calls.
2700 return BURRSort(left, right, SPQ);
2701
2702 bool LHigh = SPQ->HighRegPressure(left);
2703 bool RHigh = SPQ->HighRegPressure(right);
2704 // Avoid causing spills. If register pressure is high, schedule for
2705 // register pressure reduction.
2706 if (LHigh && !RHigh) {
2707 LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
2708 << right->NodeNum << ")\n");
2709 return true;
2710 }
2711 else if (!LHigh && RHigh) {
2712 LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
2713 << left->NodeNum << ")\n");
2714 return false;
2715 }
2716 if (!LHigh && !RHigh) {
2717 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2718 if (result != 0)
2719 return result > 0;
2720 }
2721 return BURRSort(left, right, SPQ);
2722}
2723
2724// Schedule as many instructions in each cycle as possible. So don't make an
2725// instruction available unless it is ready in the current cycle.
2726bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2727 if (SU->getHeight() > CurCycle) return false;
2728
2729 if (SPQ->getHazardRec()->getHazardType(SU, 0)
2731 return false;
2732
2733 return true;
2734}
2735
2736static bool canEnableCoalescing(SUnit *SU) {
2737 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2738 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2739 // CopyToReg should be close to its uses to facilitate coalescing and
2740 // avoid spilling.
2741 return true;
2742
2743 if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2744 Opc == TargetOpcode::SUBREG_TO_REG ||
2745 Opc == TargetOpcode::INSERT_SUBREG)
2746 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2747 // close to their uses to facilitate coalescing.
2748 return true;
2749
2750 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2751 // If SU does not have a register def, schedule it close to its uses
2752 // because it does not lengthen any live ranges.
2753 return true;
2754
2755 return false;
2756}
2757
2758// list-ilp is currently an experimental scheduler that allows various
2759// heuristics to be enabled prior to the normal register reduction logic.
2760bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2761 if (int res = checkSpecialNodes(left, right))
2762 return res > 0;
2763
2764 if (left->isCall || right->isCall)
2765 // No way to compute latency of calls.
2766 return BURRSort(left, right, SPQ);
2767
2768 unsigned LLiveUses = 0, RLiveUses = 0;
2769 int LPDiff = 0, RPDiff = 0;
2771 LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2772 RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2773 }
2774 if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2775 LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum
2776 << "): " << LPDiff << " != SU(" << right->NodeNum
2777 << "): " << RPDiff << "\n");
2778 return LPDiff > RPDiff;
2779 }
2780
2781 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2782 bool LReduce = canEnableCoalescing(left);
2783 bool RReduce = canEnableCoalescing(right);
2784 if (LReduce && !RReduce) return false;
2785 if (RReduce && !LReduce) return true;
2786 }
2787
2788 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2789 LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2790 << " != SU(" << right->NodeNum << "): " << RLiveUses
2791 << "\n");
2792 return LLiveUses < RLiveUses;
2793 }
2794
2795 if (!DisableSchedStalls) {
2796 bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2797 bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2798 if (LStall != RStall)
2799 return left->getHeight() > right->getHeight();
2800 }
2801
2803 int spread = (int)left->getDepth() - (int)right->getDepth();
2804 if (std::abs(spread) > MaxReorderWindow) {
2805 LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2806 << left->getDepth() << " != SU(" << right->NodeNum
2807 << "): " << right->getDepth() << "\n");
2808 return left->getDepth() < right->getDepth();
2809 }
2810 }
2811
2812 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2813 int spread = (int)left->getHeight() - (int)right->getHeight();
2814 if (std::abs(spread) > MaxReorderWindow)
2815 return left->getHeight() > right->getHeight();
2816 }
2817
2818 return BURRSort(left, right, SPQ);
2819}
2820
2821void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2822 SUnits = &sunits;
2823 // Add pseudo dependency edges for two-address nodes.
2824 if (!Disable2AddrHack)
2825 AddPseudoTwoAddrDeps();
2826 // Reroute edges to nodes with multiple uses.
2827 if (!TracksRegPressure && !SrcOrder)
2828 PrescheduleNodesWithMultipleUses();
2829 // Calculate node priorities.
2830 CalculateSethiUllmanNumbers();
2831
2832 // For single block loops, mark nodes that look like canonical IV increments.
2833 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB))
2834 for (SUnit &SU : sunits)
2835 initVRegCycle(&SU);
2836}
2837
2838//===----------------------------------------------------------------------===//
2839// Preschedule for Register Pressure
2840//===----------------------------------------------------------------------===//
2841
2842bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2843 if (SU->isTwoAddress) {
2844 unsigned Opc = SU->getNode()->getMachineOpcode();
2845 const MCInstrDesc &MCID = TII->get(Opc);
2846 unsigned NumRes = MCID.getNumDefs();
2847 unsigned NumOps = MCID.getNumOperands() - NumRes;
2848 for (unsigned i = 0; i != NumOps; ++i) {
2849 if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2850 SDNode *DU = SU->getNode()->getOperand(i).getNode();
2851 if (DU->getNodeId() != -1 &&
2852 Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2853 return true;
2854 }
2855 }
2856 }
2857 return false;
2858}
2859
2860/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2861/// successor's explicit physregs whose definition can reach DepSU.
2862/// i.e. DepSU should not be scheduled above SU.
2863static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2864 ScheduleDAGRRList *scheduleDAG,
2865 const TargetInstrInfo *TII,
2866 const TargetRegisterInfo *TRI) {
2867 ArrayRef<MCPhysReg> ImpDefs =
2868 TII->get(SU->getNode()->getMachineOpcode()).implicit_defs();
2869 const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2870 if (ImpDefs.empty() && !RegMask)
2871 return false;
2872
2873 for (const SDep &Succ : SU->Succs) {
2874 SUnit *SuccSU = Succ.getSUnit();
2875 for (const SDep &SuccPred : SuccSU->Preds) {
2876 if (!SuccPred.isAssignedRegDep())
2877 continue;
2878
2879 if (RegMask &&
2880 MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) &&
2881 scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2882 return true;
2883
2884 for (MCPhysReg ImpDef : ImpDefs) {
2885 // Return true if SU clobbers this physical register use and the
2886 // definition of the register reaches from DepSU. IsReachable queries
2887 // a topological forward sort of the DAG (following the successors).
2888 if (TRI->regsOverlap(ImpDef, SuccPred.getReg()) &&
2889 scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit()))
2890 return true;
2891 }
2892 }
2893 }
2894 return false;
2895}
2896
2897/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2898/// physical register defs.
2899static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2900 const TargetInstrInfo *TII,
2901 const TargetRegisterInfo *TRI) {
2902 SDNode *N = SuccSU->getNode();
2903 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2904 ArrayRef<MCPhysReg> ImpDefs = TII->get(N->getMachineOpcode()).implicit_defs();
2905 assert(!ImpDefs.empty() && "Caller should check hasPhysRegDefs");
2906 for (const SDNode *SUNode = SU->getNode(); SUNode;
2907 SUNode = SUNode->getGluedNode()) {
2908 if (!SUNode->isMachineOpcode())
2909 continue;
2910 ArrayRef<MCPhysReg> SUImpDefs =
2911 TII->get(SUNode->getMachineOpcode()).implicit_defs();
2912 const uint32_t *SURegMask = getNodeRegMask(SUNode);
2913 if (SUImpDefs.empty() && !SURegMask)
2914 continue;
2915 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2916 MVT VT = N->getSimpleValueType(i);
2917 if (VT == MVT::Glue || VT == MVT::Other)
2918 continue;
2919 if (!N->hasAnyUseOfValue(i))
2920 continue;
2921 MCPhysReg Reg = ImpDefs[i - NumDefs];
2922 if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2923 return true;
2924 for (MCPhysReg SUReg : SUImpDefs) {
2925 if (TRI->regsOverlap(Reg, SUReg))
2926 return true;
2927 }
2928 }
2929 }
2930 return false;
2931}
2932
2933/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2934/// are not handled well by the general register pressure reduction
2935/// heuristics. When presented with code like this:
2936///
2937/// N
2938/// / |
2939/// / |
2940/// U store
2941/// |
2942/// ...
2943///
2944/// the heuristics tend to push the store up, but since the
2945/// operand of the store has another use (U), this would increase
2946/// the length of that other use (the U->N edge).
2947///
2948/// This function transforms code like the above to route U's
2949/// dependence through the store when possible, like this:
2950///
2951/// N
2952/// ||
2953/// ||
2954/// store
2955/// |
2956/// U
2957/// |
2958/// ...
2959///
2960/// This results in the store being scheduled immediately
2961/// after N, which shortens the U->N live range, reducing
2962/// register pressure.
2963void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2964 // Visit all the nodes in topological order, working top-down.
2965 for (SUnit &SU : *SUnits) {
2966 // For now, only look at nodes with no data successors, such as stores.
2967 // These are especially important, due to the heuristics in
2968 // getNodePriority for nodes with no data successors.
2969 if (SU.NumSuccs != 0)
2970 continue;
2971 // For now, only look at nodes with exactly one data predecessor.
2972 if (SU.NumPreds != 1)
2973 continue;
2974 // Avoid prescheduling copies to virtual registers, which don't behave
2975 // like other nodes from the perspective of scheduling heuristics.
2976 if (SDNode *N = SU.getNode())
2977 if (N->getOpcode() == ISD::CopyToReg &&
2978 cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual())
2979 continue;
2980
2981 SDNode *PredFrameSetup = nullptr;
2982 for (const SDep &Pred : SU.Preds)
2983 if (Pred.isCtrl() && Pred.getSUnit()) {
2984 // Find the predecessor which is not data dependence.
2985 SDNode *PredND = Pred.getSUnit()->getNode();
2986
2987 // If PredND is FrameSetup, we should not pre-scheduled the node,
2988 // or else, when bottom up scheduling, ADJCALLSTACKDOWN and
2989 // ADJCALLSTACKUP may hold CallResource too long and make other
2990 // calls can't be scheduled. If there's no other available node
2991 // to schedule, the schedular will try to rename the register by
2992 // creating copy to avoid the conflict which will fail because
2993 // CallResource is not a real physical register.
2994 if (PredND && PredND->isMachineOpcode() &&
2995 (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
2996 PredFrameSetup = PredND;
2997 break;
2998 }
2999 }
3000 // Skip the node has FrameSetup parent.
3001 if (PredFrameSetup != nullptr)
3002 continue;
3003
3004 // Locate the single data predecessor.
3005 SUnit *PredSU = nullptr;
3006 for (const SDep &Pred : SU.Preds)
3007 if (!Pred.isCtrl()) {
3008 PredSU = Pred.getSUnit();
3009 break;
3010 }
3011 assert(PredSU);
3012
3013 // Don't rewrite edges that carry physregs, because that requires additional
3014 // support infrastructure.
3015 if (PredSU->hasPhysRegDefs)
3016 continue;
3017 // Short-circuit the case where SU is PredSU's only data successor.
3018 if (PredSU->NumSuccs == 1)
3019 continue;
3020 // Avoid prescheduling to copies from virtual registers, which don't behave
3021 // like other nodes from the perspective of scheduling heuristics.
3022 if (SDNode *N = SU.getNode())
3023 if (N->getOpcode() == ISD::CopyFromReg &&
3024 cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual())
3025 continue;
3026
3027 // Perform checks on the successors of PredSU.
3028 for (const SDep &PredSucc : PredSU->Succs) {
3029 SUnit *PredSuccSU = PredSucc.getSUnit();
3030 if (PredSuccSU == &SU) continue;
3031 // If PredSU has another successor with no data successors, for
3032 // now don't attempt to choose either over the other.
3033 if (PredSuccSU->NumSuccs == 0)
3034 goto outer_loop_continue;
3035 // Don't break physical register dependencies.
3036 if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
3037 if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI))
3038 goto outer_loop_continue;
3039 // Don't introduce graph cycles.
3040 if (scheduleDAG->IsReachable(&SU, PredSuccSU))
3041 goto outer_loop_continue;
3042 }
3043
3044 // Ok, the transformation is safe and the heuristics suggest it is
3045 // profitable. Update the graph.
3046 LLVM_DEBUG(
3047 dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #"
3048 << PredSU->NodeNum
3049 << " to guide scheduling in the presence of multiple uses\n");
3050 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
3051 SDep Edge = PredSU->Succs[i];
3052 assert(!Edge.isAssignedRegDep());
3053 SUnit *SuccSU = Edge.getSUnit();
3054 if (SuccSU != &SU) {
3055 Edge.setSUnit(PredSU);
3056 scheduleDAG->RemovePred(SuccSU, Edge);
3057 scheduleDAG->AddPredQueued(&SU, Edge);
3058 Edge.setSUnit(&SU);
3059 scheduleDAG->AddPredQueued(SuccSU, Edge);
3060 --i;
3061 }
3062 }
3063 outer_loop_continue:;
3064 }
3065}
3066
3067/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
3068/// it as a def&use operand. Add a pseudo control edge from it to the other
3069/// node (if it won't create a cycle) so the two-address one will be scheduled
3070/// first (lower in the schedule). If both nodes are two-address, favor the
3071/// one that has a CopyToReg use (more likely to be a loop induction update).
3072/// If both are two-address, but one is commutable while the other is not
3073/// commutable, favor the one that's not commutable.
3074void RegReductionPQBase::AddPseudoTwoAddrDeps() {
3075 for (SUnit &SU : *SUnits) {
3076 if (!SU.isTwoAddress)
3077 continue;
3078
3079 SDNode *Node = SU.getNode();
3080 if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode())
3081 continue;
3082
3083 bool isLiveOut = hasOnlyLiveOutUses(&SU);
3084 unsigned Opc = Node->getMachineOpcode();
3085 const MCInstrDesc &MCID = TII->get(Opc);
3086 unsigned NumRes = MCID.getNumDefs();
3087 unsigned NumOps = MCID.getNumOperands() - NumRes;
3088 for (unsigned j = 0; j != NumOps; ++j) {
3089 if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
3090 continue;
3091 SDNode *DU = SU.getNode()->getOperand(j).getNode();
3092 if (DU->getNodeId() == -1)
3093 continue;
3094 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
3095 if (!DUSU)
3096 continue;
3097 for (const SDep &Succ : DUSU->Succs) {
3098 if (Succ.isCtrl())
3099 continue;
3100 SUnit *SuccSU = Succ.getSUnit();
3101 if (SuccSU == &SU)
3102 continue;
3103 // Be conservative. Ignore if nodes aren't at roughly the same
3104 // depth and height.
3105 if (SuccSU->getHeight() < SU.getHeight() &&
3106 (SU.getHeight() - SuccSU->getHeight()) > 1)
3107 continue;
3108 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
3109 // constrains whatever is using the copy, instead of the copy
3110 // itself. In the case that the copy is coalesced, this
3111 // preserves the intent of the pseudo two-address heurietics.
3112 while (SuccSU->Succs.size() == 1 &&
3113 SuccSU->getNode()->isMachineOpcode() &&
3114 SuccSU->getNode()->getMachineOpcode() ==
3115 TargetOpcode::COPY_TO_REGCLASS)
3116 SuccSU = SuccSU->Succs.front().getSUnit();
3117 // Don't constrain non-instruction nodes.
3118 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
3119 continue;
3120 // Don't constrain nodes with physical register defs if the
3121 // predecessor can clobber them.
3122 if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) {
3123 if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI))
3124 continue;
3125 }
3126 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
3127 // these may be coalesced away. We want them close to their uses.
3128 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
3129 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
3130 SuccOpc == TargetOpcode::INSERT_SUBREG ||
3131 SuccOpc == TargetOpcode::SUBREG_TO_REG)
3132 continue;
3133 if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) &&
3134 (!canClobber(SuccSU, DUSU) ||
3135 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
3136 (!SU.isCommutable && SuccSU->isCommutable)) &&
3137 !scheduleDAG->IsReachable(SuccSU, &SU)) {
3139 << " Adding a pseudo-two-addr edge from SU #"
3140 << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
3141 scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial));
3142 }
3143 }
3144 }
3145 }
3146}
3147
3148//===----------------------------------------------------------------------===//
3149// Public Constructor Functions
3150//===----------------------------------------------------------------------===//
3151
3153 CodeGenOptLevel OptLevel) {
3154 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3155 const TargetInstrInfo *TII = STI.getInstrInfo();
3156 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3157
3158 BURegReductionPriorityQueue *PQ =
3159 new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
3160 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3161 PQ->setScheduleDAG(SD);
3162 return SD;
3163}
3164
3167 CodeGenOptLevel OptLevel) {
3168 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3169 const TargetInstrInfo *TII = STI.getInstrInfo();
3170 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3171
3172 SrcRegReductionPriorityQueue *PQ =
3173 new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3174 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3175 PQ->setScheduleDAG(SD);
3176 return SD;
3177}
3178
3181 CodeGenOptLevel OptLevel) {
3182 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3183 const TargetInstrInfo *TII = STI.getInstrInfo();
3184 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3185 const TargetLowering *TLI = IS->TLI;
3186
3187 HybridBURRPriorityQueue *PQ =
3188 new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3189
3190 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3191 PQ->setScheduleDAG(SD);
3192 return SD;
3193}
3194
3196 CodeGenOptLevel OptLevel) {
3197 const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3198 const TargetInstrInfo *TII = STI.getInstrInfo();
3199 const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3200 const TargetLowering *TLI = IS->TLI;
3201
3202 ILPBURRPriorityQueue *PQ =
3203 new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3204 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3205 PQ->setScheduleDAG(SD);
3206 return SD;
3207}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:510
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
const HexagonInstrInfo * TII
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
This file contains some templates that are useful if you are working with the STL at all.
static bool canEnableCoalescing(SUnit *SU)
static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", createSourceListDAGScheduler)
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
static cl::opt< bool > DisableSchedStalls("disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp"))
static bool hasOnlyLiveInOpers(const SUnit *SU)
hasOnlyLiveInOpers - Return true if SU has only value predecessors that are CopyFromReg from a virtua...
static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII)
IsChainDependent - Test if Outer is reachable from Inner through chain dependencies.
static bool hasOnlyLiveOutUses(const SUnit *SU)
hasOnlyLiveOutUses - Return true if SU has only value successors that are CopyToReg to a virtual regi...
static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII)
getPhysicalRegisterVT - Returns the ValueType of the physical register definition of the specified no...
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
static cl::opt< bool > Disable2AddrHack("disable-2addr-hack", cl::Hidden, cl::init(true), cl::desc("Disable scheduler's two-address hack"))
static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, SUnit **LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs, const TargetRegisterInfo *TRI, const SDNode *Node=nullptr)
CheckForLiveRegDef - Return true and update live register vector if the specified register def of the...
static RegisterScheduler ILPListDAGScheduler("list-ilp", "Bottom-up register pressure aware list scheduling " "which tries to balance ILP and register pressure", createILPListDAGScheduler)
static void resetVRegCycle(SUnit *SU)
static RegisterScheduler hybridListDAGScheduler("list-hybrid", "Bottom-up register pressure aware list scheduling " "which tries to balance latency and register pressure", createHybridListDAGScheduler)
static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberReachingPhysRegUse - True if SU would clobber one of it's successor's explicit physregs who...
static cl::opt< bool > DisableSchedPhysRegJoin("disable-sched-physreg-join", cl::Hidden, cl::init(false), cl::desc("Disable physreg def-use affinity"))
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
canClobberPhysRegDefs - True if SU would clobber one of SuccSU's physical register defs.
static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetLowering *TLI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF)
GetCostForDef - Looks up the register class and cost for a given definition.
static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ)
static cl::opt< bool > DisableSchedRegPressure("disable-sched-reg-pressure", cl::Hidden, cl::init(false), cl::desc("Disable regpressure priority in sched=list-ilp"))
static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ)
static void initVRegCycle(SUnit *SU)
static constexpr unsigned RegSequenceCost
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII)
FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate the corresponding (lowered) C...
static bool isOperandOf(const SUnit *SU, SDNode *N)
static cl::opt< bool > DisableSchedVRegCycle("disable-sched-vrcycle", cl::Hidden, cl::init(false), cl::desc("Disable virtual register cycle interference checks"))
static int checkSpecialNodes(const SUnit *left, const SUnit *right)
static cl::opt< bool > DisableSchedLiveUses("disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp"))
static const uint32_t * getNodeRegMask(const SDNode *N)
getNodeRegMask - Returns the register mask attached to an SDNode, if any.
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle.
static cl::opt< unsigned > AvgIPC("sched-avg-ipc", cl::Hidden, cl::init(1), cl::desc("Average inst/cycle whan no target itinerary exists."))
static bool hasVRegCycleUse(const SUnit *SU)
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler)
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers,...
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ)
static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, ArrayRef< SUnit * > LiveRegDefs, SmallSet< unsigned, 4 > &RegAdded, SmallVectorImpl< unsigned > &LRegs)
CheckForLiveRegDefMasked - Check for any live physregs that are clobbered by RegMask,...
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file describes how to lower LLVM code to machine code.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
This class represents an Operation in the Expression.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:265
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:579
bool isCommutable() const
Return true if this may be a 2- or 3-address instruction (of the form "X = op Y, Z,...
Definition: MCInstrDesc.h:481
MCRegAliasIterator enumerates all registers aliasing Reg.
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
int getNodeId() const
Return the unique node id.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getIROrder() const
Return the node ordering.
void setNodeId(int Id)
Set unique node id.
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
SDNode * getGluedNode() const
If this node has a glue operand, return the node to which the glue operand points.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
Scheduling dependency.
Definition: ScheduleDAG.h:49
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
Definition: ScheduleDAG.h:211
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Definition: ScheduleDAG.h:161
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
void setSUnit(SUnit *SU)
Definition: ScheduleDAG.h:483
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
void setHeightToAtLeast(unsigned NewHeight)
If NewHeight is greater than this node's height value, set it to be the new height value.
unsigned NumSuccs
Definition: ScheduleDAG.h:267
unsigned NumPreds
Definition: ScheduleDAG.h:266
unsigned NodeQueueId
Queue id of node.
Definition: ScheduleDAG.h:265
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
unsigned NumSuccsLeft
Definition: ScheduleDAG.h:269
bool hasPhysRegClobbers
Has any physreg defs, used or not.
Definition: ScheduleDAG.h:281
bool isCallOp
Is a function call operand.
Definition: ScheduleDAG.h:276
const TargetRegisterClass * CopyDstRC
Is a special copy node if != nullptr.
Definition: ScheduleDAG.h:302
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:406
void setHeightDirty()
Sets a flag in this node to indicate that its stored Height value will require recomputation the next...
bool isSucc(const SUnit *N) const
Tests if node N is a successor of this node.
Definition: ScheduleDAG.h:439
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
unsigned short NumRegDefsLeft
Definition: ScheduleDAG.h:272
bool isPending
True once pending.
Definition: ScheduleDAG.h:282
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:398
bool isScheduled
True once scheduled.
Definition: ScheduleDAG.h:284
bool isAvailable
True once available.
Definition: ScheduleDAG.h:283
bool isScheduleLow
True if preferable to schedule low.
Definition: ScheduleDAG.h:286
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
Sched::Preference SchedulingPref
Scheduling preference.
Definition: ScheduleDAG.h:290
const TargetRegisterClass * CopySrcRC
Definition: ScheduleDAG.h:304
SDNode * getNode() const
Returns the representative SDNode for this SUnit.
Definition: ScheduleDAG.h:355
bool isTwoAddress
Is a two-address instruction.
Definition: ScheduleDAG.h:277
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:278
bool isVRegCycle
May use and def the same vreg.
Definition: ScheduleDAG.h:274
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
RegDefIter - In place iteration over the values defined by an SUnit.
ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
SUnit * newSUnit(SDNode *N)
NewSUnit - Creates a new SUnit and return a ptr to it.
virtual void Schedule()=0
Schedule - Order nodes according to selected style, filling in the Sequence member.
virtual bool forceUnitLatencies() const
ForceUnitLatencies - Return true if all scheduling edges should be given a latency value of one.
SUnit * Clone(SUnit *Old)
Clone - Creates a clone of the specified SUnit.
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:702
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an edge to be removed from the specified node N from ...
bool WillCreateCycle(SUnit *TargetSU, SUnit *SU)
Returns true if addPred(TargetSU, SU) creates a cycle.
void MarkDirty()
Mark the ordering as temporarily broken, after a new node has been added.
Definition: ScheduleDAG.h:776
void AddSUnitWithoutPredecessors(const SUnit *SU)
Add a SUnit without predecessors to the end of the topological order.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
void AddPredQueued(SUnit *Y, SUnit *X)
Queues an update to the topological ordering to accommodate an edge to be added from SUnit X to SUnit...
virtual void dumpNode(const SUnit &SU) const =0
HazardRecognizer - This determines whether or not an instruction can be issued this cycle,...
virtual void RecedeCycle()
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
virtual void Reset()
Reset - This callback is invoked when a new block of instructions is about to be schedule.
virtual void EmitInstruction(SUnit *)
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
virtual bool atIssueLimit() const
atIssueLimit - Return true if no more instructions may be issued in this cycle.
virtual HazardType getHazardType(SUnit *, int Stalls=0)
getHazardType - Return the hazard type of emitting this node.
This interface is used to plug different priorities computation algorithms into the list scheduler.
Definition: ScheduleDAG.h:496
void setCurCycle(unsigned Cycle)
Definition: ScheduleDAG.h:545
virtual void remove(SUnit *SU)=0
virtual void releaseState()=0
virtual SUnit * pop()=0
virtual void scheduledNode(SUnit *)
As each node is scheduled, this method is invoked.
Definition: ScheduleDAG.h:541
virtual bool isReady(SUnit *) const
Definition: ScheduleDAG.h:520
virtual bool tracksRegPressure() const
Definition: ScheduleDAG.h:518
virtual void dump(ScheduleDAG *) const
Definition: ScheduleDAG.h:536
virtual void initNodes(std::vector< SUnit > &SUnits)=0
virtual bool empty() const =0
virtual void unscheduledNode(SUnit *)
Definition: ScheduleDAG.h:543
virtual void addNode(const SUnit *SU)=0
virtual void updateNode(const SUnit *SU)=0
virtual void push(SUnit *U)=0
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
const TargetLowering * TLI
MachineFunction * MF
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetInstrInfo - Interface to description of machine instruction set.
virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const
Allocate and return a hazard recognizer to use for this target when scheduling the machine instructio...
virtual uint8_t getRepRegClassCostFor(MVT VT) const
Return the cost of the 'representative' register class for the specified value type.
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const
Return the register pressure "high water mark" for the specific register class.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ EH_LABEL
EH_LABEL - Represents a label in mid basic block used to track locations needed for debug and excepti...
Definition: ISDOpcodes.h:1097
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:47
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ LIFETIME_START
This corresponds to the llvm.lifetime.
Definition: ISDOpcodes.h:1293
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1092
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1089
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
constexpr double e
Definition: MathExtras.h:31
Sequence
A sequence of states that a pointer may go through in which an objc_retain and objc_release are actua...
Definition: PtrState.h:41
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1747
ScheduleDAGSDNodes * createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel OptLevel)
createBURRListDAGScheduler - This creates a bottom up register usage reduction list scheduler.
ScheduleDAGSDNodes * createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel)
createHybridListDAGScheduler - This creates a bottom up register pressure aware list scheduler that m...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
ScheduleDAGSDNodes * createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel OptLevel)
createBURRListDAGScheduler - This creates a bottom up list scheduler that schedules nodes in source c...
ScheduleDAGSDNodes * createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOptLevel)
createILPListDAGScheduler - This creates a bottom up register pressure aware list scheduler that trie...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1884
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Description of the encoding of one expression Op.