LLVM 23.0.0git
MachinePipeliner.h
Go to the documentation of this file.
1//===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10//
11// Software pipelining (SWP) is an instruction scheduling technique for loops
12// that overlap loop iterations and exploits ILP via a compiler transformation.
13//
14// Swing Modulo Scheduling is an implementation of software pipelining
15// that generates schedules that are near optimal in terms of initiation
16// interval, register requirements, and stage count. See the papers:
17//
18// "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
19// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
20// Conference on Parallel Architectures and Compilation Techiniques.
21//
22// "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
23// Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
24// Transactions on Computers, Vol. 50, No. 3, 2001.
25//
26// "An Implementation of Swing Modulo Scheduling With Extensions for
27// Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
28// Urbana-Champaign, 2005.
29//
30//
31// The SMS algorithm consists of three main steps after computing the minimal
32// initiation interval (MII).
33// 1) Analyze the dependence graph and compute information about each
34// instruction in the graph.
35// 2) Order the nodes (instructions) by priority based upon the heuristics
36// described in the algorithm.
37// 3) Attempt to schedule the nodes in the specified order using the MII.
38//
39//===----------------------------------------------------------------------===//
40#ifndef LLVM_CODEGEN_MACHINEPIPELINER_H
41#define LLVM_CODEGEN_MACHINEPIPELINER_H
42
43#include "llvm/ADT/STLExtras.h"
44#include "llvm/ADT/SetVector.h"
54
55#include <deque>
56
57namespace llvm {
58
59class AAResults;
60class NodeSet;
61class SMSchedule;
62
65
66/// The main class in the implementation of the target independent
67/// software pipeliner pass.
69public:
70 MachineFunction *MF = nullptr;
72 const MachineLoopInfo *MLI = nullptr;
73 const MachineDominatorTree *MDT = nullptr;
75 const TargetInstrInfo *TII = nullptr;
77 bool disabledByPragma = false;
78 unsigned II_setByPragma = 0;
79
80#ifndef NDEBUG
81 static int NumTries;
82#endif
83
84 /// Cache the target analysis information about the loop.
85 struct LoopInfo {
91 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopPipelinerInfo =
92 nullptr;
93 };
95
96 static char ID;
97
99
101
102 void getAnalysisUsage(AnalysisUsage &AU) const override;
103
104private:
105 void preprocessPhiNodes(MachineBasicBlock &B);
106 bool canPipelineLoop(MachineLoop &L);
107 bool scheduleLoop(MachineLoop &L);
108 bool swingModuloScheduler(MachineLoop &L);
109 void setPragmaPipelineOptions(MachineLoop &L);
110 bool runWindowScheduler(MachineLoop &L);
111 bool useSwingModuloScheduler();
112 bool useWindowScheduler(bool Changed);
113};
114
115/// Represents a dependence between two instruction.
117 SUnit *Dst = nullptr;
118 SDep Pred;
119 unsigned Distance = 0;
120 bool IsValidationOnly = false;
121
122public:
123 /// Creates an edge corresponding to an edge represented by \p PredOrSucc and
124 /// \p Dep in the original DAG. This pair has no information about the
125 /// direction of the edge, so we need to pass an additional argument \p
126 /// IsSucc.
127 SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc,
128 bool IsValidationOnly)
129 : Dst(PredOrSucc), Pred(Dep), Distance(0u),
130 IsValidationOnly(IsValidationOnly) {
131 SUnit *Src = Dep.getSUnit();
132
133 if (IsSucc) {
134 std::swap(Src, Dst);
135 Pred.setSUnit(Src);
136 }
137
138 // An anti-dependence to PHI means loop-carried dependence.
139 if (Pred.getKind() == SDep::Anti && Src->getInstr()->isPHI()) {
140 Distance = 1;
141 std::swap(Src, Dst);
142 auto Reg = Pred.getReg();
143 Pred = SDep(Src, SDep::Kind::Data, Reg);
144 }
145 }
146
147 /// Returns the SUnit from which the edge comes (source node).
148 SUnit *getSrc() const { return Pred.getSUnit(); }
149
150 /// Returns the SUnit to which the edge points (destination node).
151 SUnit *getDst() const { return Dst; }
152
153 /// Returns the latency value for the edge.
154 unsigned getLatency() const { return Pred.getLatency(); }
155
156 /// Sets the latency for the edge.
157 void setLatency(unsigned Latency) { Pred.setLatency(Latency); }
158
159 /// Returns the distance value for the edge.
160 unsigned getDistance() const { return Distance; }
161
162 /// Sets the distance value for the edge.
163 void setDistance(unsigned D) { Distance = D; }
164
165 /// Returns the register associated with the edge.
166 Register getReg() const { return Pred.getReg(); }
167
168 /// Returns true if the edge represents anti dependence.
169 bool isAntiDep() const { return Pred.getKind() == SDep::Kind::Anti; }
170
171 /// Returns true if the edge represents output dependence.
172 bool isOutputDep() const { return Pred.getKind() == SDep::Kind::Output; }
173
174 /// Returns true if the edge represents a dependence that is not data, anti or
175 /// output dependence.
176 bool isOrderDep() const { return Pred.getKind() == SDep::Kind::Order; }
177
178 /// Returns true if the edge represents unknown scheduling barrier.
179 bool isBarrier() const { return Pred.isBarrier(); }
180
181 /// Returns true if the edge represents an artificial dependence.
182 bool isArtificial() const { return Pred.isArtificial(); }
183
184 /// Tests if this is a Data dependence that is associated with a register.
185 bool isAssignedRegDep() const { return Pred.isAssignedRegDep(); }
186
187 /// Returns true for DDG nodes that we ignore when computing the cost
188 /// functions. We ignore the back-edge recurrence in order to avoid unbounded
189 /// recursion in the calculation of the ASAP, ALAP, etc functions.
190 bool ignoreDependence(bool IgnoreAnti) const;
191
192 /// Returns true if this edge is intended to be used only for validating the
193 /// schedule.
194 bool isValidationOnly() const { return IsValidationOnly; }
195};
196
197/// Represents loop-carried dependencies. Because SwingSchedulerDAG doesn't
198/// assume cycle dependencies as the name suggests, such dependencies must be
199/// handled separately. After DAG construction is finished, these dependencies
200/// are added to SwingSchedulerDDG.
201/// TODO: Also handle output-dependencies introduced by physical registers.
205
207
209 auto Ite = OrderDeps.find(Key);
210 if (Ite == OrderDeps.end())
211 return nullptr;
212 return &Ite->second;
213 }
214
215 /// Adds some edges to the original DAG that correspond to loop-carried
216 /// dependencies. Historically, loop-carried edges are represented by using
217 /// non-loop-carried edges in the original DAG. This function appends such
218 /// edges to preserve the previous behavior.
219 void modifySUnits(std::vector<SUnit> &SUnits, const TargetInstrInfo *TII);
220
221 void dump(SUnit *SU, const TargetRegisterInfo *TRI,
222 const MachineRegisterInfo *MRI) const;
223};
224
225/// This class provides APIs to retrieve edges from/to an SUnit node, with a
226/// particular focus on loop-carried dependencies. Since SUnit is not designed
227/// to represent such edges, handling them directly using its APIs has required
228/// non-trivial logic in the past. This class serves as a wrapper around SUnit,
229/// offering a simpler interface for managing these dependencies.
232
233 struct SwingSchedulerDDGEdges {
234 EdgesType Preds;
235 EdgesType Succs;
236 };
237
238 void initEdges(SUnit *SU);
239
240 SUnit *EntrySU;
241 SUnit *ExitSU;
242
243 std::vector<SwingSchedulerDDGEdges> EdgesVec;
244 SwingSchedulerDDGEdges EntrySUEdges;
245 SwingSchedulerDDGEdges ExitSUEdges;
246
247 /// Edges that are used only when validating the schedule. These edges are
248 /// not considered to drive the optimization heuristics.
249 SmallVector<SwingSchedulerDDGEdge, 8> ValidationOnlyEdges;
250
251 /// Adds a NON-validation-only edge to the DDG. Assumes to be called only by
252 /// the ctor.
253 void addEdge(const SUnit *SU, const SwingSchedulerDDGEdge &Edge);
254
255 SwingSchedulerDDGEdges &getEdges(const SUnit *SU);
256 const SwingSchedulerDDGEdges &getEdges(const SUnit *SU) const;
257
258public:
259 SwingSchedulerDDG(std::vector<SUnit> &SUnits, SUnit *EntrySU, SUnit *ExitSU,
260 const LoopCarriedEdges &LCE);
261
262 const EdgesType &getInEdges(const SUnit *SU) const;
263
264 const EdgesType &getOutEdges(const SUnit *SU) const;
265
266 bool isValidSchedule(const SMSchedule &Schedule) const;
267};
268
269/// This class builds the dependence graph for the instructions in a loop,
270/// and attempts to schedule the instructions using the SMS algorithm.
272 MachinePipeliner &Pass;
273
274 std::unique_ptr<SwingSchedulerDDG> DDG;
275
276 /// The minimum initiation interval between iterations for this schedule.
277 unsigned MII = 0;
278 /// The maximum initiation interval between iterations for this schedule.
279 unsigned MAX_II = 0;
280 /// Set to true if a valid pipelined schedule is found for the loop.
281 bool Scheduled = false;
282 MachineLoop &Loop;
283 LiveIntervals &LIS;
284 const RegisterClassInfo &RegClassInfo;
285 unsigned II_setByPragma = 0;
286 TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr;
287
288 /// A topological ordering of the SUnits, which is needed for changing
289 /// dependences and iterating over the SUnits.
291
292 struct NodeInfo {
293 int ASAP = 0;
294 int ALAP = 0;
295 int ZeroLatencyDepth = 0;
296 int ZeroLatencyHeight = 0;
297
298 NodeInfo() = default;
299 };
300 /// Computed properties for each node in the graph.
301 std::vector<NodeInfo> ScheduleInfo;
302
303 enum OrderKind { BottomUp = 0, TopDown = 1 };
304 /// Computed node ordering for scheduling.
305 SetVector<SUnit *> NodeOrder;
306
307 using NodeSetType = SmallVector<NodeSet, 8>;
308 using ValueMapTy = DenseMap<unsigned, unsigned>;
309 using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
311
312 /// Instructions to change when emitting the final schedule.
314
315 /// We may create a new instruction, so remember it because it
316 /// must be deleted when the pass is finished.
318
319 /// Ordered list of DAG postprocessing steps.
320 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
321
322 /// Used to compute single-iteration dependencies (i.e., buildSchedGraph).
323 AliasAnalysis *AA;
324
325 /// Used to compute loop-carried dependencies (i.e.,
326 /// addLoopCarriedDependences).
327 BatchAAResults BAA;
328
329 /// Helper class to implement Johnson's circuit finding algorithm.
330 class Circuits {
331 std::vector<SUnit> &SUnits;
332 SetVector<SUnit *> Stack;
333 BitVector Blocked;
336 // Node to Index from ScheduleDAGTopologicalSort
337 std::vector<int> *Node2Idx;
338 unsigned NumPaths = 0u;
339 static unsigned MaxPaths;
340
341 public:
342 Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo)
343 : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {
344 Node2Idx = new std::vector<int>(SUs.size());
345 unsigned Idx = 0;
346 for (const auto &NodeNum : Topo)
347 Node2Idx->at(NodeNum) = Idx++;
348 }
349 Circuits &operator=(const Circuits &other) = delete;
350 Circuits(const Circuits &other) = delete;
351 ~Circuits() { delete Node2Idx; }
352
353 /// Reset the data structures used in the circuit algorithm.
354 void reset() {
355 Stack.clear();
356 Blocked.reset();
357 B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
358 NumPaths = 0;
359 }
360
361 void createAdjacencyStructure(SwingSchedulerDAG *DAG);
362 bool circuit(int V, int S, NodeSetType &NodeSets,
363 const SwingSchedulerDAG *DAG, bool HasBackedge = false);
364 void unblock(int U);
365 };
366
367 struct CopyToPhiMutation : public ScheduleDAGMutation {
368 void apply(ScheduleDAGInstrs *DAG) override;
369 };
370
371public:
373 const RegisterClassInfo &rci, unsigned II,
375 : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
376 RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI),
377 Topo(SUnits, &ExitSU), AA(AA), BAA(*AA) {
378 P.MF->getSubtarget().getSMSMutations(Mutations);
380 Mutations.push_back(std::make_unique<CopyToPhiMutation>());
381 BAA.enableCrossIterationMode();
382 }
383
384 void schedule() override;
385 void finishBlock() override;
386
387 /// Return true if the loop kernel has been scheduled.
388 bool hasNewSchedule() { return Scheduled; }
389
390 /// Return the earliest time an instruction may be scheduled.
391 int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
392
393 /// Return the latest time an instruction my be scheduled.
394 int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
395
396 /// The mobility function, which the number of slots in which
397 /// an instruction may be scheduled.
398 int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
399
400 /// The depth, in the dependence graph, for a node.
401 unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
402
403 /// The maximum unweighted length of a path from an arbitrary node to the
404 /// given node in which each edge has latency 0
406 return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
407 }
408
409 /// The height, in the dependence graph, for a node.
410 unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
411
412 /// The maximum unweighted length of a path from the given node to an
413 /// arbitrary node in which each edge has latency 0
415 return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
416 }
417
418 bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const;
419
420 void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
421
422 void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
423
424 /// Return the new base register that was stored away for the changed
425 /// instruction.
428 InstrChanges.find(SU);
429 if (It != InstrChanges.end())
430 return It->second.first;
431 return Register();
432 }
433
434 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
435 Mutations.push_back(std::move(Mutation));
436 }
437
438 static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
439
440 const SwingSchedulerDDG *getDDG() const { return DDG.get(); }
441
442 bool mayOverlapInLaterIter(const MachineInstr *BaseMI,
443 const MachineInstr *OtherMI) const;
444
445private:
446 LoopCarriedEdges addLoopCarriedDependences();
447 void updatePhiDependences();
448 void changeDependences();
449 unsigned calculateResMII();
450 unsigned calculateRecMII(NodeSetType &RecNodeSets);
451 void findCircuits(NodeSetType &NodeSets);
452 void fuseRecs(NodeSetType &NodeSets);
453 void removeDuplicateNodes(NodeSetType &NodeSets);
454 void computeNodeFunctions(NodeSetType &NodeSets);
455 void registerPressureFilter(NodeSetType &NodeSets);
456 void colocateNodeSets(NodeSetType &NodeSets);
457 void checkNodeSets(NodeSetType &NodeSets);
458 void groupRemainingNodes(NodeSetType &NodeSets);
459 void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
460 SetVector<SUnit *> &NodesAdded);
461 void computeNodeOrder(NodeSetType &NodeSets);
462 void checkValidNodeOrder(const NodeSetType &Circuits) const;
463 bool schedulePipeline(SMSchedule &Schedule);
464 bool computeDelta(const MachineInstr &MI, int &Delta) const;
465 MachineInstr *findDefInLoop(Register Reg);
466 bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
467 unsigned &OffsetPos, Register &NewBase,
468 int64_t &NewOffset);
469 void postProcessDAG();
470 /// Set the Minimum Initiation Interval for this schedule attempt.
471 void setMII(unsigned ResMII, unsigned RecMII);
472 /// Set the Maximum Initiation Interval for this schedule attempt.
473 void setMAX_II();
474};
475
476/// A NodeSet contains a set of SUnit DAG nodes with additional information
477/// that assigns a priority to the set.
478class NodeSet {
479 SetVector<SUnit *> Nodes;
480 bool HasRecurrence = false;
481 unsigned RecMII = 0;
482 int MaxMOV = 0;
483 unsigned MaxDepth = 0;
484 unsigned Colocate = 0;
485 SUnit *ExceedPressure = nullptr;
486 unsigned Latency = 0;
487
488public:
490
491 NodeSet() = default;
493 : Nodes(S, E), HasRecurrence(true) {
494 // Calculate the latency of this node set.
495 // Example to demonstrate the calculation:
496 // Given: N0 -> N1 -> N2 -> N0
497 // Edges:
498 // (N0 -> N1, 3)
499 // (N0 -> N1, 5)
500 // (N1 -> N2, 2)
501 // (N2 -> N0, 1)
502 // The total latency which is a lower bound of the recurrence MII is the
503 // longest path from N0 back to N0 given only the edges of this node set.
504 // In this example, the latency is: 5 + 2 + 1 = 8.
505 //
506 // Hold a map from each SUnit in the circle to the maximum distance from the
507 // source node by only considering the nodes.
508 const SwingSchedulerDDG *DDG = DAG->getDDG();
509 DenseMap<SUnit *, unsigned> SUnitToDistance;
510 for (auto *Node : Nodes)
511 SUnitToDistance[Node] = 0;
512
513 for (unsigned I = 1, E = Nodes.size(); I <= E; ++I) {
514 SUnit *U = Nodes[I - 1];
515 SUnit *V = Nodes[I % Nodes.size()];
516 for (const SwingSchedulerDDGEdge &Succ : DDG->getOutEdges(U)) {
517 SUnit *SuccSUnit = Succ.getDst();
518 if (V != SuccSUnit)
519 continue;
520 unsigned &DU = SUnitToDistance[U];
521 unsigned &DV = SUnitToDistance[V];
522 if (DU + Succ.getLatency() > DV)
523 DV = DU + Succ.getLatency();
524 }
525 }
526 // Handle a back-edge in loop carried dependencies
527 SUnit *FirstNode = Nodes[0];
528 SUnit *LastNode = Nodes[Nodes.size() - 1];
529
530 for (auto &PI : DDG->getInEdges(LastNode)) {
531 // If we have an order dep that is potentially loop carried then a
532 // back-edge exists between the last node and the first node that isn't
533 // modeled in the DAG. Handle it manually by adding 1 to the distance of
534 // the last node.
535 if (PI.getSrc() != FirstNode || !PI.isOrderDep() ||
536 !DAG->isLoopCarriedDep(PI))
537 continue;
538 unsigned &First = SUnitToDistance[FirstNode];
539 unsigned Last = SUnitToDistance[LastNode];
540 First = std::max(First, Last + 1);
541 }
542
543 // The latency is the distance from the source node to itself.
544 Latency = SUnitToDistance[Nodes.front()];
545 }
546
547 bool insert(SUnit *SU) { return Nodes.insert(SU); }
548
549 void insert(iterator S, iterator E) { Nodes.insert(S, E); }
550
551 template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
552 return Nodes.remove_if(P);
553 }
554
555 unsigned count(SUnit *SU) const { return Nodes.count(SU); }
556
557 bool hasRecurrence() { return HasRecurrence; };
558
559 unsigned size() const { return Nodes.size(); }
560
561 bool empty() const { return Nodes.empty(); }
562
563 SUnit *getNode(unsigned i) const { return Nodes[i]; };
564
565 void setRecMII(unsigned mii) { RecMII = mii; };
566
567 void setColocate(unsigned c) { Colocate = c; };
568
569 void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
570
571 bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
572
573 int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
574
575 int getRecMII() { return RecMII; }
576
577 /// Summarize node functions for the entire node set.
579 for (SUnit *SU : *this) {
580 MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
581 MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
582 }
583 }
584
585 unsigned getLatency() { return Latency; }
586
587 unsigned getMaxDepth() { return MaxDepth; }
588
589 void clear() {
590 Nodes.clear();
591 RecMII = 0;
592 HasRecurrence = false;
593 MaxMOV = 0;
594 MaxDepth = 0;
595 Colocate = 0;
596 ExceedPressure = nullptr;
597 }
598
599 operator SetVector<SUnit *> &() { return Nodes; }
600
601 /// Sort the node sets by importance. First, rank them by recurrence MII,
602 /// then by mobility (least mobile done first), and finally by depth.
603 /// Each node set may contain a colocate value which is used as the first
604 /// tie breaker, if it's set.
605 bool operator>(const NodeSet &RHS) const {
606 if (RecMII == RHS.RecMII) {
607 if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
608 return Colocate < RHS.Colocate;
609 if (MaxMOV == RHS.MaxMOV)
610 return MaxDepth > RHS.MaxDepth;
611 return MaxMOV < RHS.MaxMOV;
612 }
613 return RecMII > RHS.RecMII;
614 }
615
616 bool operator==(const NodeSet &RHS) const {
617 return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
618 MaxDepth == RHS.MaxDepth;
619 }
620
621 bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
622
623 iterator begin() { return Nodes.begin(); }
624 iterator end() { return Nodes.end(); }
625 void print(raw_ostream &os) const;
626
627#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
628 LLVM_DUMP_METHOD void dump() const;
629#endif
630};
631
632// 16 was selected based on the number of ProcResource kinds for all
633// existing Subtargets, so that SmallVector don't need to resize too often.
634static const int DefaultProcResSize = 16;
635
637private:
638 const MCSubtargetInfo *STI;
639 const MCSchedModel &SM;
640 const TargetSubtargetInfo *ST;
641 const TargetInstrInfo *TII;
643 const bool UseDFA;
644 /// DFA resources for each slot
646 /// Modulo Reservation Table. When a resource with ID R is consumed in cycle
647 /// C, it is counted in MRT[C mod II][R]. (Used when UseDFA == F)
649 /// The number of scheduled micro operations for each slot. Micro operations
650 /// are assumed to be scheduled one per cycle, starting with the cycle in
651 /// which the instruction is scheduled.
652 llvm::SmallVector<int> NumScheduledMops;
653 /// Each processor resource is associated with a so-called processor resource
654 /// mask. This vector allows to correlate processor resource IDs with
655 /// processor resource masks. There is exactly one element per each processor
656 /// resource declared by the scheduling model.
658 int InitiationInterval = 0;
659 /// The number of micro operations that can be scheduled at a cycle.
660 int IssueWidth;
661
662 int calculateResMIIDFA() const;
663 /// Check if MRT is overbooked
664 bool isOverbooked() const;
665 /// Reserve resources on MRT
666 void reserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
667 /// Unreserve resources on MRT
668 void unreserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
669
670 /// Return M satisfying Dividend = Divisor * X + M, 0 < M < Divisor.
671 /// The slot on MRT to reserve a resource for the cycle C is positiveModulo(C,
672 /// II).
673 int positiveModulo(int Dividend, int Divisor) const {
674 assert(Divisor > 0);
675 int R = Dividend % Divisor;
676 if (R < 0)
677 R += Divisor;
678 return R;
679 }
680
681#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
682 LLVM_DUMP_METHOD void dumpMRT() const;
683#endif
684
685public:
687 : STI(ST), SM(ST->getSchedModel()), ST(ST), TII(ST->getInstrInfo()),
688 DAG(DAG), UseDFA(ST->useDFAforSMS()),
689 ProcResourceMasks(SM.getNumProcResourceKinds(), 0),
690 IssueWidth(SM.IssueWidth) {
691 initProcResourceVectors(SM, ProcResourceMasks);
692 if (IssueWidth <= 0)
693 // If IssueWidth is not specified, set a sufficiently large value
694 IssueWidth = 100;
695 if (SwpForceIssueWidth > 0)
696 IssueWidth = SwpForceIssueWidth;
697 }
698
699 void initProcResourceVectors(const MCSchedModel &SM,
701
702 /// Check if the resources occupied by a machine instruction are available
703 /// in the current state.
704 bool canReserveResources(SUnit &SU, int Cycle);
705
706 /// Reserve the resources occupied by a machine instruction and change the
707 /// current state to reflect that change.
708 void reserveResources(SUnit &SU, int Cycle);
709
710 int calculateResMII() const;
711
712 /// Initialize resources with the initiation interval II.
713 void init(int II);
714};
715
716/// This class represents the scheduled code. The main data structure is a
717/// map from scheduled cycle to instructions. During scheduling, the
718/// data structure explicitly represents all stages/iterations. When
719/// the algorithm finshes, the schedule is collapsed into a single stage,
720/// which represents instructions from different loop iterations.
721///
722/// The SMS algorithm allows negative values for cycles, so the first cycle
723/// in the schedule is the smallest cycle value.
725private:
726 /// Map from execution cycle to instructions.
727 DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
728
729 /// Map from instruction to execution cycle.
730 std::map<SUnit *, int> InstrToCycle;
731
732 /// Keep track of the first cycle value in the schedule. It starts
733 /// as zero, but the algorithm allows negative values.
734 int FirstCycle = 0;
735
736 /// Keep track of the last cycle value in the schedule.
737 int LastCycle = 0;
738
739 /// The initiation interval (II) for the schedule.
740 int InitiationInterval = 0;
741
742 /// Target machine information.
743 const TargetSubtargetInfo &ST;
744
745 /// Virtual register information.
747
748 ResourceManager ProcItinResources;
749
750public:
752 : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
753 ProcItinResources(&ST, DAG) {}
754
755 void reset() {
756 ScheduledInstrs.clear();
757 InstrToCycle.clear();
758 FirstCycle = 0;
759 LastCycle = 0;
760 InitiationInterval = 0;
761 }
762
763 /// Set the initiation interval for this schedule.
765 InitiationInterval = ii;
766 ProcItinResources.init(ii);
767 }
768
769 /// Return the initiation interval for this schedule.
770 int getInitiationInterval() const { return InitiationInterval; }
771
772 /// Return the first cycle in the completed schedule. This
773 /// can be a negative value.
774 int getFirstCycle() const { return FirstCycle; }
775
776 /// Return the last cycle in the finalized schedule.
777 int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
778
779 /// Return the cycle of the earliest scheduled instruction in the dependence
780 /// chain.
781 int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep,
782 const SwingSchedulerDDG *DDG);
783
784 /// Return the cycle of the latest scheduled instruction in the dependence
785 /// chain.
786 int latestCycleInChain(const SwingSchedulerDDGEdge &Dep,
787 const SwingSchedulerDDG *DDG);
788
789 void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II,
790 SwingSchedulerDAG *DAG);
791 bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
792
793 /// Iterators for the cycle to instruction map.
797
798 /// Return true if the instruction is scheduled at the specified stage.
799 bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
800 return (stageScheduled(SU) == (int)StageNum);
801 }
802
803 /// Return the stage for a scheduled instruction. Return -1 if
804 /// the instruction has not been scheduled.
805 int stageScheduled(SUnit *SU) const {
806 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
807 if (it == InstrToCycle.end())
808 return -1;
809 return (it->second - FirstCycle) / InitiationInterval;
810 }
811
812 /// Return the cycle for a scheduled instruction. This function normalizes
813 /// the first cycle to be 0.
814 unsigned cycleScheduled(SUnit *SU) const {
815 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
816 assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
817 return (it->second - FirstCycle) % InitiationInterval;
818 }
819
820 /// Return the maximum stage count needed for this schedule.
821 unsigned getMaxStageCount() {
822 return (LastCycle - FirstCycle) / InitiationInterval;
823 }
824
825 /// Return the instructions that are scheduled at the specified cycle.
826 std::deque<SUnit *> &getInstructions(int cycle) {
827 return ScheduledInstrs[cycle];
828 }
829
831 computeUnpipelineableNodes(SwingSchedulerDAG *SSD,
833
834 std::deque<SUnit *>
835 reorderInstructions(const SwingSchedulerDAG *SSD,
836 const std::deque<SUnit *> &Instrs) const;
837
838 bool
839 normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD,
841 bool isValidSchedule(SwingSchedulerDAG *SSD);
842 void finalizeSchedule(SwingSchedulerDAG *SSD);
843 void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU,
844 std::deque<SUnit *> &Insts) const;
845 bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const;
846 bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def,
847 MachineOperand &MO) const;
848
849 bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU,
850 const SwingSchedulerDDG *DDG) const;
851 void print(raw_ostream &os) const;
852 void dump() const;
853};
854
855} // end namespace llvm
856
857#endif // LLVM_CODEGEN_MACHINEPIPELINER_H
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t IntrinsicInst * II
#define P(N)
PowerPC VSX FMA Mutation
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
Value * RHS
Represent the analysis usage information of a pass.
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
BitVector & reset()
Definition BitVector.h:411
iterator end()
Definition DenseMap.h:81
Itinerary data supplied by a subtarget to be used by a target.
Generic base class for all target subtargets.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
The main class in the implementation of the target independent software pipeliner pass.
bool runOnMachineFunction(MachineFunction &MF) override
The "main" function for implementing Swing Modulo Scheduling.
const TargetInstrInfo * TII
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
const InstrItineraryData * InstrItins
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
SetVector< SUnit * >::const_iterator iterator
bool isExceedSU(SUnit *SU)
void insert(iterator S, iterator E)
void setRecMII(unsigned mii)
void computeNodeSetInfo(SwingSchedulerDAG *SSD)
Summarize node functions for the entire node set.
unsigned getMaxDepth()
unsigned count(SUnit *SU) const
NodeSet()=default
void setColocate(unsigned c)
unsigned getLatency()
NodeSet(iterator S, iterator E, const SwingSchedulerDAG *DAG)
bool operator>(const NodeSet &RHS) const
Sort the node sets by importance.
int compareRecMII(NodeSet &RHS)
unsigned size() const
bool operator!=(const NodeSet &RHS) const
bool insert(SUnit *SU)
bool operator==(const NodeSet &RHS) const
bool remove_if(UnaryPredicate P)
bool empty() const
void setExceedPressure(SUnit *SU)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
ResourceManager(const TargetSubtargetInfo *ST, ScheduleDAGInstrs *DAG)
Scheduling dependency.
Definition ScheduleDAG.h:51
SUnit * getSUnit() const
@ Output
A register output-dependence (aka WAW).
Definition ScheduleDAG.h:57
@ Order
Any other ordering dependency.
Definition ScheduleDAG.h:58
@ Anti
A register anti-dependence (aka WAR).
Definition ScheduleDAG.h:56
This class represents the scheduled code.
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
bool isScheduledAtStage(SUnit *SU, unsigned StageNum)
Return true if the instruction is scheduled at the specified stage.
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
DenseMap< int, std::deque< SUnit * > >::const_iterator const_sched_iterator
DenseMap< int, std::deque< SUnit * > >::iterator sched_iterator
Iterators for the cycle to instruction map.
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
SMSchedule(MachineFunction *mf, SwingSchedulerDAG *DAG)
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Scheduling unit. This is a node in the scheduling DAG.
A ScheduleDAG for scheduling lists of MachineInstr.
ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags=false)
const MachineLoopInfo * MLI
Mutate the DAG as a postpass after normal DAG building.
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
std::vector< SUnit > SUnits
The scheduling units.
MachineFunction & MF
Machine function.
ScheduleDAG & operator=(const ScheduleDAG &)=delete
SUnit ExitSU
Special node for the region exit.
A vector that has set insertion semantics.
Definition SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
const value_type & front() const
Return the first element of the SetVector.
Definition SetVector.h:132
typename vector_type::const_iterator const_iterator
Definition SetVector.h:73
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
const SwingSchedulerDDG * getDDG() const
void finishBlock() override
Clean up after the software pipeliner runs.
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
bool hasNewSchedule()
Return true if the loop kernel has been scheduled.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const
Return true for an order or output dependence that is loop carried potentially.
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
bool mayOverlapInLaterIter(const MachineInstr *BaseMI, const MachineInstr *OtherMI) const
Return false if there is no overlap between the region accessed by BaseMI in an iteration and the reg...
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis, const RegisterClassInfo &rci, unsigned II, TargetInstrInfo::PipelinerLoopInfo *PLI, AliasAnalysis *AA)
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
Register getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
static bool classof(const ScheduleDAGInstrs *DAG)
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Represents a dependence between two instruction.
SUnit * getDst() const
Returns the SUnit to which the edge points (destination node).
Register getReg() const
Returns the register associated with the edge.
void setDistance(unsigned D)
Sets the distance value for the edge.
bool isBarrier() const
Returns true if the edge represents unknown scheduling barrier.
void setLatency(unsigned Latency)
Sets the latency for the edge.
SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc, bool IsValidationOnly)
Creates an edge corresponding to an edge represented by PredOrSucc and Dep in the original DAG.
bool isAntiDep() const
Returns true if the edge represents anti dependence.
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
bool isArtificial() const
Returns true if the edge represents an artificial dependence.
bool ignoreDependence(bool IgnoreAnti) const
Returns true for DDG nodes that we ignore when computing the cost functions.
bool isOrderDep() const
Returns true if the edge represents a dependence that is not data, anti or output dependence.
unsigned getLatency() const
Returns the latency value for the edge.
SUnit * getSrc() const
Returns the SUnit from which the edge comes (source node).
bool isValidationOnly() const
Returns true if this edge is intended to be used only for validating the schedule.
unsigned getDistance() const
Returns the distance value for the edge.
bool isOutputDep() const
Returns true if the edge represents output dependence.
This class provides APIs to retrieve edges from/to an SUnit node, with a particular focus on loop-car...
SwingSchedulerDDG(std::vector< SUnit > &SUnits, SUnit *EntrySU, SUnit *ExitSU, const LoopCarriedEdges &LCE)
const EdgesType & getInEdges(const SUnit *SU) const
bool isValidSchedule(const SMSchedule &Schedule) const
Check if Schedule doesn't violate the validation-only dependencies.
const EdgesType & getOutEdges(const SUnit *SU) const
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
CycleInfo::CycleT Cycle
Definition CycleInfo.h:24
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
cl::opt< bool > SwpEnableCopyToPhi
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
static const int DefaultProcResSize
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
Represents loop-carried dependencies.
SmallSetVector< SUnit *, 8 > OrderDep
const OrderDep * getOrderDepOrNull(SUnit *Key) const
void modifySUnits(std::vector< SUnit > &SUnits, const TargetInstrInfo *TII)
Adds some edges to the original DAG that correspond to loop-carried dependencies.
void dump(SUnit *SU, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) const
DenseMap< SUnit *, OrderDep > OrderDepsType
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:123
Machine model for scheduling, bundling, and heuristics.
Definition MCSchedule.h:258
Cache the target analysis information about the loop.
SmallVector< MachineOperand, 4 > BrCond
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopPipelinerInfo