LLVM 23.0.0git
MachinePipeliner.h
Go to the documentation of this file.
1//===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10//
11// Software pipelining (SWP) is an instruction scheduling technique for loops
12// that overlap loop iterations and exploits ILP via a compiler transformation.
13//
14// Swing Modulo Scheduling is an implementation of software pipelining
15// that generates schedules that are near optimal in terms of initiation
16// interval, register requirements, and stage count. See the papers:
17//
18// "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
19// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
20// Conference on Parallel Architectures and Compilation Techiniques.
21//
22// "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
23// Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
24// Transactions on Computers, Vol. 50, No. 3, 2001.
25//
26// "An Implementation of Swing Modulo Scheduling With Extensions for
27// Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
28// Urbana-Champaign, 2005.
29//
30//
31// The SMS algorithm consists of three main steps after computing the minimal
32// initiation interval (MII).
33// 1) Analyze the dependence graph and compute information about each
34// instruction in the graph.
35// 2) Order the nodes (instructions) by priority based upon the heuristics
36// described in the algorithm.
37// 3) Attempt to schedule the nodes in the specified order using the MII.
38//
39//===----------------------------------------------------------------------===//
40#ifndef LLVM_CODEGEN_MACHINEPIPELINER_H
41#define LLVM_CODEGEN_MACHINEPIPELINER_H
42
43#include "llvm/ADT/STLExtras.h"
44#include "llvm/ADT/SetVector.h"
54
55#include <deque>
56
57namespace llvm {
58
59class AAResults;
60class NodeSet;
61class SMSchedule;
62
65
66/// The main class in the implementation of the target independent
67/// software pipeliner pass.
69public:
70 MachineFunction *MF = nullptr;
72 const MachineLoopInfo *MLI = nullptr;
73 const MachineDominatorTree *MDT = nullptr;
75 const TargetInstrInfo *TII = nullptr;
77 bool disabledByPragma = false;
78 unsigned II_setByPragma = 0;
79
80#ifndef NDEBUG
81 static int NumTries;
82#endif
83
84 /// Cache the target analysis information about the loop.
85 struct LoopInfo {
91 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopPipelinerInfo =
92 nullptr;
93 };
95
96 static char ID;
97
99
100 bool runOnMachineFunction(MachineFunction &MF) override;
101
102 void getAnalysisUsage(AnalysisUsage &AU) const override;
103
104private:
105 void preprocessPhiNodes(MachineBasicBlock &B);
106 bool canPipelineLoop(MachineLoop &L);
107 bool scheduleLoop(MachineLoop &L);
108 bool swingModuloScheduler(MachineLoop &L);
109 void setPragmaPipelineOptions(MachineLoop &L);
110 bool runWindowScheduler(MachineLoop &L);
111 bool useSwingModuloScheduler();
112 bool useWindowScheduler(bool Changed);
113};
114
115/// Represents a dependence between two instruction.
117 SUnit *Dst = nullptr;
118 SDep Pred;
119 unsigned Distance = 0;
120 bool IsValidationOnly = false;
121
122public:
123 /// Creates an edge corresponding to an edge represented by \p PredOrSucc and
124 /// \p Dep in the original DAG. This pair has no information about the
125 /// direction of the edge, so we need to pass an additional argument \p
126 /// IsSucc.
127 SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc,
128 bool IsValidationOnly)
129 : Dst(PredOrSucc), Pred(Dep), Distance(0u),
130 IsValidationOnly(IsValidationOnly) {
131 SUnit *Src = Dep.getSUnit();
132
133 if (IsSucc) {
134 std::swap(Src, Dst);
135 Pred.setSUnit(Src);
136 }
137
138 // An anti-dependence to PHI means loop-carried dependence.
139 if (Pred.getKind() == SDep::Anti && Src->getInstr()->isPHI()) {
140 Distance = 1;
141 std::swap(Src, Dst);
142 auto Reg = Pred.getReg();
143 Pred = SDep(Src, SDep::Kind::Data, Reg);
144 }
145 }
146
147 /// Returns the SUnit from which the edge comes (source node).
148 SUnit *getSrc() const { return Pred.getSUnit(); }
149
150 /// Returns the SUnit to which the edge points (destination node).
151 SUnit *getDst() const { return Dst; }
152
153 /// Returns the latency value for the edge.
154 unsigned getLatency() const { return Pred.getLatency(); }
155
156 /// Sets the latency for the edge.
157 void setLatency(unsigned Latency) { Pred.setLatency(Latency); }
158
159 /// Returns the distance value for the edge.
160 unsigned getDistance() const { return Distance; }
161
162 /// Sets the distance value for the edge.
163 void setDistance(unsigned D) { Distance = D; }
164
165 /// Returns the register associated with the edge.
166 Register getReg() const { return Pred.getReg(); }
167
168 /// Returns true if the edge represents anti dependence.
169 bool isAntiDep() const { return Pred.getKind() == SDep::Kind::Anti; }
170
171 /// Returns true if the edge represents output dependence.
172 bool isOutputDep() const { return Pred.getKind() == SDep::Kind::Output; }
173
174 /// Returns true if the edge represents a dependence that is not data, anti or
175 /// output dependence.
176 bool isOrderDep() const { return Pred.getKind() == SDep::Kind::Order; }
177
178 /// Returns true if the edge represents unknown scheduling barrier.
179 bool isBarrier() const { return Pred.isBarrier(); }
180
181 /// Returns true if the edge represents an artificial dependence.
182 bool isArtificial() const { return Pred.isArtificial(); }
183
184 /// Tests if this is a Data dependence that is associated with a register.
185 bool isAssignedRegDep() const { return Pred.isAssignedRegDep(); }
186
187 /// Returns true for DDG nodes that we ignore when computing the cost
188 /// functions. We ignore the back-edge recurrence in order to avoid unbounded
189 /// recursion in the calculation of the ASAP, ALAP, etc functions.
190 LLVM_ABI bool ignoreDependence(bool IgnoreAnti) const;
191
192 /// Returns true if this edge is intended to be used only for validating the
193 /// schedule.
194 bool isValidationOnly() const { return IsValidationOnly; }
195};
196
197/// Represents loop-carried dependencies. Because SwingSchedulerDAG doesn't
198/// assume cycle dependencies as the name suggests, such dependencies must be
199/// handled separately. After DAG construction is finished, these dependencies
200/// are added to SwingSchedulerDDG.
201/// TODO: Also handle output-dependencies introduced by physical registers.
205
207
209 auto Ite = OrderDeps.find(Key);
210 if (Ite == OrderDeps.end())
211 return nullptr;
212 return &Ite->second;
213 }
214
215 /// Adds some edges to the original DAG that correspond to loop-carried
216 /// dependencies. Historically, loop-carried edges are represented by using
217 /// non-loop-carried edges in the original DAG. This function appends such
218 /// edges to preserve the previous behavior.
219 LLVM_ABI void modifySUnits(std::vector<SUnit> &SUnits,
220 const TargetInstrInfo *TII);
221
222 LLVM_ABI void dump(SUnit *SU, const TargetRegisterInfo *TRI,
223 const MachineRegisterInfo *MRI) const;
224};
225
226/// This class provides APIs to retrieve edges from/to an SUnit node, with a
227/// particular focus on loop-carried dependencies. Since SUnit is not designed
228/// to represent such edges, handling them directly using its APIs has required
229/// non-trivial logic in the past. This class serves as a wrapper around SUnit,
230/// offering a simpler interface for managing these dependencies.
233
234 struct SwingSchedulerDDGEdges {
235 EdgesType Preds;
236 EdgesType Succs;
237
238 /// This field is a subset of ValidationOnlyEdges. These edges are used only
239 /// by specific heuristics, mainly for cycle detection. Although they are
240 /// unnecessary in theory (i.e., ignoring them should still yield a valid
241 /// schedule), they are retained to preserve the existing behavior. Since we
242 /// only need which extra edges exist from a given SUnit, we only store the
243 /// destination SUnits.
244 SmallVector<SUnit *, 4> ExtraSuccs;
245 };
246
247 void initEdges(SUnit *SU);
248
249 SUnit *EntrySU;
250 SUnit *ExitSU;
251
252 std::vector<SwingSchedulerDDGEdges> EdgesVec;
253 SwingSchedulerDDGEdges EntrySUEdges;
254 SwingSchedulerDDGEdges ExitSUEdges;
255
256 /// Edges that are used only when validating the schedule. These edges are
257 /// not considered to drive the optimization heuristics.
258 SmallVector<SwingSchedulerDDGEdge, 8> ValidationOnlyEdges;
259
260 /// Adds a NON-validation-only edge to the DDG. Assumes to be called only by
261 /// the ctor.
262 void addEdge(const SUnit *SU, const SwingSchedulerDDGEdge &Edge);
263
264 SwingSchedulerDDGEdges &getEdges(const SUnit *SU);
265 const SwingSchedulerDDGEdges &getEdges(const SUnit *SU) const;
266
267public:
268 LLVM_ABI SwingSchedulerDDG(std::vector<SUnit> &SUnits, SUnit *EntrySU,
269 SUnit *ExitSU, const LoopCarriedEdges &LCE);
270
271 LLVM_ABI const EdgesType &getInEdges(const SUnit *SU) const;
272
273 LLVM_ABI const EdgesType &getOutEdges(const SUnit *SU) const;
274
276
277 LLVM_ABI bool isValidSchedule(const SMSchedule &Schedule) const;
278};
279
280/// This class builds the dependence graph for the instructions in a loop,
281/// and attempts to schedule the instructions using the SMS algorithm.
283 MachinePipeliner &Pass;
284
285 std::unique_ptr<SwingSchedulerDDG> DDG;
286
287 /// The minimum initiation interval between iterations for this schedule.
288 unsigned MII = 0;
289 /// The maximum initiation interval between iterations for this schedule.
290 unsigned MAX_II = 0;
291 /// Set to true if a valid pipelined schedule is found for the loop.
292 bool Scheduled = false;
293 MachineLoop &Loop;
294 LiveIntervals &LIS;
295 const RegisterClassInfo &RegClassInfo;
296 unsigned II_setByPragma = 0;
297 TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr;
298
299 /// A topological ordering of the SUnits, which is needed for changing
300 /// dependences and iterating over the SUnits.
302
303 struct NodeInfo {
304 int ASAP = 0;
305 int ALAP = 0;
306 int ZeroLatencyDepth = 0;
307 int ZeroLatencyHeight = 0;
308
309 NodeInfo() = default;
310 };
311 /// Computed properties for each node in the graph.
312 std::vector<NodeInfo> ScheduleInfo;
313
314 enum OrderKind { BottomUp = 0, TopDown = 1 };
315 /// Computed node ordering for scheduling.
316 SetVector<SUnit *> NodeOrder;
317
318 using NodeSetType = SmallVector<NodeSet, 8>;
319 using ValueMapTy = DenseMap<unsigned, unsigned>;
320 using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
322
323 /// Instructions to change when emitting the final schedule.
325
326 /// We may create a new instruction, so remember it because it
327 /// must be deleted when the pass is finished.
329
330 /// Ordered list of DAG postprocessing steps.
331 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
332
333 /// Used to compute single-iteration dependencies (i.e., buildSchedGraph).
334 AliasAnalysis *AA;
335
336 /// Used to compute loop-carried dependencies (i.e.,
337 /// addLoopCarriedDependences).
338 BatchAAResults BAA;
339
340 /// Helper class to implement Johnson's circuit finding algorithm.
341 class Circuits {
342 std::vector<SUnit> &SUnits;
343 SetVector<SUnit *> Stack;
344 BitVector Blocked;
347 // Node to Index from ScheduleDAGTopologicalSort
348 std::vector<int> *Node2Idx;
349 unsigned NumPaths = 0u;
350 static unsigned MaxPaths;
351
352 public:
353 Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo)
354 : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {
355 Node2Idx = new std::vector<int>(SUs.size());
356 unsigned Idx = 0;
357 for (const auto &NodeNum : Topo)
358 Node2Idx->at(NodeNum) = Idx++;
359 }
360 Circuits &operator=(const Circuits &other) = delete;
361 Circuits(const Circuits &other) = delete;
362 ~Circuits() { delete Node2Idx; }
363
364 /// Reset the data structures used in the circuit algorithm.
365 void reset() {
366 Stack.clear();
367 Blocked.reset();
368 B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
369 NumPaths = 0;
370 }
371
372 LLVM_ABI void createAdjacencyStructure(SwingSchedulerDDG *DDG);
373 LLVM_ABI bool circuit(int V, int S, NodeSetType &NodeSets,
374 const SwingSchedulerDAG *DAG,
375 bool HasBackedge = false);
376 LLVM_ABI void unblock(int U);
377 };
378
379 struct LLVM_ABI CopyToPhiMutation : public ScheduleDAGMutation {
380 void apply(ScheduleDAGInstrs *DAG) override;
381 };
382
383public:
385 const RegisterClassInfo &rci, unsigned II,
387 : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
388 RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI),
389 Topo(SUnits, &ExitSU), AA(AA), BAA(*AA) {
390 P.MF->getSubtarget().getSMSMutations(Mutations);
392 Mutations.push_back(std::make_unique<CopyToPhiMutation>());
393 BAA.enableCrossIterationMode();
394 }
395
396 void schedule() override;
397 void finishBlock() override;
398
399 /// Return true if the loop kernel has been scheduled.
400 bool hasNewSchedule() { return Scheduled; }
401
402 /// Return the earliest time an instruction may be scheduled.
403 int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
404
405 /// Return the latest time an instruction my be scheduled.
406 int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
407
408 /// The mobility function, which the number of slots in which
409 /// an instruction may be scheduled.
410 int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
411
412 /// The depth, in the dependence graph, for a node.
413 unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
414
415 /// The maximum unweighted length of a path from an arbitrary node to the
416 /// given node in which each edge has latency 0
418 return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
419 }
420
421 /// The height, in the dependence graph, for a node.
422 unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
423
424 /// The maximum unweighted length of a path from the given node to an
425 /// arbitrary node in which each edge has latency 0
427 return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
428 }
429
430 void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
431
432 void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
433
434 /// Return the new base register that was stored away for the changed
435 /// instruction.
438 InstrChanges.find(SU);
439 if (It != InstrChanges.end())
440 return It->second.first;
441 return Register();
442 }
443
444 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
445 Mutations.push_back(std::move(Mutation));
446 }
447
448 static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
449
450 const SwingSchedulerDDG *getDDG() const { return DDG.get(); }
451
452 bool mayOverlapInLaterIter(const MachineInstr *BaseMI,
453 const MachineInstr *OtherMI) const;
454
455private:
456 LoopCarriedEdges addLoopCarriedDependences();
457 void updatePhiDependences();
458 void changeDependences();
459 unsigned calculateResMII();
460 unsigned calculateRecMII(NodeSetType &RecNodeSets);
461 void findCircuits(NodeSetType &NodeSets);
462 void fuseRecs(NodeSetType &NodeSets);
463 void removeDuplicateNodes(NodeSetType &NodeSets);
464 void computeNodeFunctions(NodeSetType &NodeSets);
465 void registerPressureFilter(NodeSetType &NodeSets);
466 void colocateNodeSets(NodeSetType &NodeSets);
467 void checkNodeSets(NodeSetType &NodeSets);
468 void groupRemainingNodes(NodeSetType &NodeSets);
469 void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
470 SetVector<SUnit *> &NodesAdded);
471 void computeNodeOrder(NodeSetType &NodeSets);
472 void checkValidNodeOrder(const NodeSetType &Circuits) const;
473 bool schedulePipeline(SMSchedule &Schedule);
474 bool computeDelta(const MachineInstr &MI, int &Delta) const;
475 MachineInstr *findDefInLoop(Register Reg);
476 bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
477 unsigned &OffsetPos, Register &NewBase,
478 int64_t &NewOffset);
479 void postProcessDAG();
480 /// Set the Minimum Initiation Interval for this schedule attempt.
481 void setMII(unsigned ResMII, unsigned RecMII);
482 /// Set the Maximum Initiation Interval for this schedule attempt.
483 void setMAX_II();
484};
485
486/// A NodeSet contains a set of SUnit DAG nodes with additional information
487/// that assigns a priority to the set.
488class NodeSet {
489 SetVector<SUnit *> Nodes;
490 bool HasRecurrence = false;
491 unsigned RecMII = 0;
492 int MaxMOV = 0;
493 unsigned MaxDepth = 0;
494 unsigned Colocate = 0;
495 SUnit *ExceedPressure = nullptr;
496 unsigned Latency = 0;
497
498public:
500
501 NodeSet() = default;
503 : Nodes(S, E), HasRecurrence(true) {
504 // Calculate the latency of this node set.
505 // Example to demonstrate the calculation:
506 // Given: N0 -> N1 -> N2 -> N0
507 // Edges:
508 // (N0 -> N1, 3)
509 // (N0 -> N1, 5)
510 // (N1 -> N2, 2)
511 // (N2 -> N0, 1)
512 // The total latency which is a lower bound of the recurrence MII is the
513 // longest path from N0 back to N0 given only the edges of this node set.
514 // In this example, the latency is: 5 + 2 + 1 = 8.
515 //
516 // Hold a map from each SUnit in the circle to the maximum distance from the
517 // source node by only considering the nodes.
518 const SwingSchedulerDDG *DDG = DAG->getDDG();
519 DenseMap<SUnit *, unsigned> SUnitToDistance;
520 for (auto *Node : Nodes)
521 SUnitToDistance[Node] = 0;
522
523 for (unsigned I = 1, E = Nodes.size(); I <= E; ++I) {
524 SUnit *U = Nodes[I - 1];
525 SUnit *V = Nodes[I % Nodes.size()];
526 for (const SwingSchedulerDDGEdge &Succ : DDG->getOutEdges(U)) {
527 SUnit *SuccSUnit = Succ.getDst();
528 if (V != SuccSUnit)
529 continue;
530 unsigned &DU = SUnitToDistance[U];
531 unsigned &DV = SUnitToDistance[V];
532 if (DU + Succ.getLatency() > DV)
533 DV = DU + Succ.getLatency();
534 }
535 }
536 // Handle a back-edge in loop carried dependencies
537 SUnit *FirstNode = Nodes[0];
538 SUnit *LastNode = Nodes[Nodes.size() - 1];
539
540 for (SUnit *SU : DDG->getExtraOutEdges(LastNode)) {
541 // If we have an order dep that is potentially loop carried then a
542 // back-edge exists between the last node and the first node in extra
543 // edges. Handle it manually by adding 1 to the distance of the last node.
544 if (SU != FirstNode)
545 continue;
546 unsigned &First = SUnitToDistance[FirstNode];
547 unsigned Last = SUnitToDistance[LastNode];
548 First = std::max(First, Last + 1);
549 }
550
551 // The latency is the distance from the source node to itself.
552 Latency = SUnitToDistance[Nodes.front()];
553 }
554
555 bool insert(SUnit *SU) { return Nodes.insert(SU); }
556
557 void insert(iterator S, iterator E) { Nodes.insert(S, E); }
558
559 template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
560 return Nodes.remove_if(P);
561 }
562
563 unsigned count(SUnit *SU) const { return Nodes.count(SU); }
564
565 bool hasRecurrence() { return HasRecurrence; };
566
567 unsigned size() const { return Nodes.size(); }
568
569 bool empty() const { return Nodes.empty(); }
570
571 SUnit *getNode(unsigned i) const { return Nodes[i]; };
572
573 void setRecMII(unsigned mii) { RecMII = mii; };
574
575 void setColocate(unsigned c) { Colocate = c; };
576
577 void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
578
579 bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
580
581 int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
582
583 int getRecMII() { return RecMII; }
584
585 /// Summarize node functions for the entire node set.
587 for (SUnit *SU : *this) {
588 MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
589 MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
590 }
591 }
592
593 unsigned getLatency() { return Latency; }
594
595 unsigned getMaxDepth() { return MaxDepth; }
596
597 void clear() {
598 Nodes.clear();
599 RecMII = 0;
600 HasRecurrence = false;
601 MaxMOV = 0;
602 MaxDepth = 0;
603 Colocate = 0;
604 ExceedPressure = nullptr;
605 }
606
607 operator SetVector<SUnit *> &() { return Nodes; }
608
609 /// Sort the node sets by importance. First, rank them by recurrence MII,
610 /// then by mobility (least mobile done first), and finally by depth.
611 /// Each node set may contain a colocate value which is used as the first
612 /// tie breaker, if it's set.
613 bool operator>(const NodeSet &RHS) const {
614 if (RecMII == RHS.RecMII) {
615 if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
616 return Colocate < RHS.Colocate;
617 if (MaxMOV == RHS.MaxMOV)
618 return MaxDepth > RHS.MaxDepth;
619 return MaxMOV < RHS.MaxMOV;
620 }
621 return RecMII > RHS.RecMII;
622 }
623
624 bool operator==(const NodeSet &RHS) const {
625 return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
626 MaxDepth == RHS.MaxDepth;
627 }
628
629 bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
630
631 iterator begin() { return Nodes.begin(); }
632 iterator end() { return Nodes.end(); }
633 LLVM_ABI void print(raw_ostream &os) const;
634
635#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
636 LLVM_DUMP_METHOD void dump() const;
637#endif
638};
639
640// 16 was selected based on the number of ProcResource kinds for all
641// existing Subtargets, so that SmallVector don't need to resize too often.
642static const int DefaultProcResSize = 16;
643
645private:
646 const MCSubtargetInfo *STI;
647 const MCSchedModel &SM;
648 const TargetSubtargetInfo *ST;
649 const TargetInstrInfo *TII;
651 const bool UseDFA;
652 /// DFA resources for each slot
654 /// Modulo Reservation Table. When a resource with ID R is consumed in cycle
655 /// C, it is counted in MRT[C mod II][R]. (Used when UseDFA == F)
657 /// The number of scheduled micro operations for each slot. Micro operations
658 /// are assumed to be scheduled one per cycle, starting with the cycle in
659 /// which the instruction is scheduled.
660 llvm::SmallVector<int> NumScheduledMops;
661 /// Each processor resource is associated with a so-called processor resource
662 /// mask. This vector allows to correlate processor resource IDs with
663 /// processor resource masks. There is exactly one element per each processor
664 /// resource declared by the scheduling model.
666 int InitiationInterval = 0;
667 /// The number of micro operations that can be scheduled at a cycle.
668 int IssueWidth;
669
670 int calculateResMIIDFA() const;
671 /// Check if MRT is overbooked
672 bool isOverbooked() const;
673 /// Reserve resources on MRT
674 void reserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
675 /// Unreserve resources on MRT
676 void unreserveResources(const MCSchedClassDesc *SCDesc, int Cycle);
677
678 /// Return M satisfying Dividend = Divisor * X + M, 0 < M < Divisor.
679 /// The slot on MRT to reserve a resource for the cycle C is positiveModulo(C,
680 /// II).
681 int positiveModulo(int Dividend, int Divisor) const {
682 assert(Divisor > 0);
683 int R = Dividend % Divisor;
684 if (R < 0)
685 R += Divisor;
686 return R;
687 }
688
689#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
690 LLVM_DUMP_METHOD void dumpMRT() const;
691#endif
692
693public:
695 : STI(ST), SM(ST->getSchedModel()), ST(ST), TII(ST->getInstrInfo()),
696 DAG(DAG), UseDFA(ST->useDFAforSMS()),
697 ProcResourceMasks(SM.getNumProcResourceKinds(), 0),
698 IssueWidth(SM.IssueWidth) {
699 initProcResourceVectors(SM, ProcResourceMasks);
700 if (IssueWidth <= 0)
701 // If IssueWidth is not specified, set a sufficiently large value
702 IssueWidth = 100;
703 if (SwpForceIssueWidth > 0)
704 IssueWidth = SwpForceIssueWidth;
705 }
706
707 LLVM_ABI void initProcResourceVectors(const MCSchedModel &SM,
709
710 /// Check if the resources occupied by a machine instruction are available
711 /// in the current state.
712 LLVM_ABI bool canReserveResources(SUnit &SU, int Cycle);
713
714 /// Reserve the resources occupied by a machine instruction and change the
715 /// current state to reflect that change.
716 LLVM_ABI void reserveResources(SUnit &SU, int Cycle);
717
718 LLVM_ABI int calculateResMII() const;
719
720 /// Initialize resources with the initiation interval II.
721 LLVM_ABI void init(int II);
722};
723
724/// This class represents the scheduled code. The main data structure is a
725/// map from scheduled cycle to instructions. During scheduling, the
726/// data structure explicitly represents all stages/iterations. When
727/// the algorithm finshes, the schedule is collapsed into a single stage,
728/// which represents instructions from different loop iterations.
729///
730/// The SMS algorithm allows negative values for cycles, so the first cycle
731/// in the schedule is the smallest cycle value.
733private:
734 /// Map from execution cycle to instructions.
735 DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
736
737 /// Map from instruction to execution cycle.
738 std::map<SUnit *, int> InstrToCycle;
739
740 /// Keep track of the first cycle value in the schedule. It starts
741 /// as zero, but the algorithm allows negative values.
742 int FirstCycle = 0;
743
744 /// Keep track of the last cycle value in the schedule.
745 int LastCycle = 0;
746
747 /// The initiation interval (II) for the schedule.
748 int InitiationInterval = 0;
749
750 /// Target machine information.
751 const TargetSubtargetInfo &ST;
752
753 /// Virtual register information.
755
756 ResourceManager ProcItinResources;
757
758public:
760 : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
761 ProcItinResources(&ST, DAG) {}
762
763 void reset() {
764 ScheduledInstrs.clear();
765 InstrToCycle.clear();
766 FirstCycle = 0;
767 LastCycle = 0;
768 InitiationInterval = 0;
769 }
770
771 /// Set the initiation interval for this schedule.
773 InitiationInterval = ii;
774 ProcItinResources.init(ii);
775 }
776
777 /// Return the initiation interval for this schedule.
778 int getInitiationInterval() const { return InitiationInterval; }
779
780 /// Return the first cycle in the completed schedule. This
781 /// can be a negative value.
782 int getFirstCycle() const { return FirstCycle; }
783
784 /// Return the last cycle in the finalized schedule.
785 int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
786
787 LLVM_ABI void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
788 int II, SwingSchedulerDAG *DAG);
789 LLVM_ABI bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
790
791 /// Iterators for the cycle to instruction map.
795
796 /// Return true if the instruction is scheduled at the specified stage.
797 bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
798 return (stageScheduled(SU) == (int)StageNum);
799 }
800
801 /// Return the stage for a scheduled instruction. Return -1 if
802 /// the instruction has not been scheduled.
803 int stageScheduled(SUnit *SU) const {
804 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
805 if (it == InstrToCycle.end())
806 return -1;
807 return (it->second - FirstCycle) / InitiationInterval;
808 }
809
810 /// Return the cycle for a scheduled instruction. This function normalizes
811 /// the first cycle to be 0.
812 unsigned cycleScheduled(SUnit *SU) const {
813 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
814 assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
815 return (it->second - FirstCycle) % InitiationInterval;
816 }
817
818 /// Return the maximum stage count needed for this schedule.
819 unsigned getMaxStageCount() {
820 return (LastCycle - FirstCycle) / InitiationInterval;
821 }
822
823 /// Return the instructions that are scheduled at the specified cycle.
824 std::deque<SUnit *> &getInstructions(int cycle) {
825 return ScheduledInstrs[cycle];
826 }
827
829 computeUnpipelineableNodes(SwingSchedulerDAG *SSD,
831
832 LLVM_ABI std::deque<SUnit *>
833 reorderInstructions(const SwingSchedulerDAG *SSD,
834 const std::deque<SUnit *> &Instrs) const;
835
836 LLVM_ABI bool
837 normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD,
839 LLVM_ABI bool isValidSchedule(SwingSchedulerDAG *SSD);
840 LLVM_ABI void finalizeSchedule(SwingSchedulerDAG *SSD);
841 LLVM_ABI void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU,
842 std::deque<SUnit *> &Insts) const;
843 LLVM_ABI bool isLoopCarried(const SwingSchedulerDAG *SSD,
844 MachineInstr &Phi) const;
845 LLVM_ABI bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD,
846 MachineInstr *Def,
847 MachineOperand &MO) const;
848
849 LLVM_ABI bool
850 onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU,
851 const SwingSchedulerDDG *DDG) const;
852 LLVM_ABI void print(raw_ostream &os) const;
853 LLVM_ABI void dump() const;
854};
855
856} // end namespace llvm
857
858#endif // LLVM_CODEGEN_MACHINEPIPELINER_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static constexpr unsigned SM(unsigned Version)
uint64_t IntrinsicInst * II
#define P(N)
PowerPC VSX FMA Mutation
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
Value * RHS
Represent the analysis usage information of a pass.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
BitVector & reset()
Reset all bits in the bitvector.
Definition BitVector.h:409
iterator end()
Definition DenseMap.h:85
Itinerary data supplied by a subtarget to be used by a target.
Generic base class for all target subtargets.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
The main class in the implementation of the target independent software pipeliner pass.
const TargetInstrInfo * TII
const MachineDominatorTree * MDT
const MachineLoopInfo * MLI
MachineOptimizationRemarkEmitter * ORE
RegisterClassInfo RegClassInfo
const InstrItineraryData * InstrItins
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
SUnit * getNode(unsigned i) const
SetVector< SUnit * >::const_iterator iterator
bool isExceedSU(SUnit *SU)
void insert(iterator S, iterator E)
void setRecMII(unsigned mii)
void computeNodeSetInfo(SwingSchedulerDAG *SSD)
Summarize node functions for the entire node set.
unsigned getMaxDepth()
unsigned count(SUnit *SU) const
NodeSet()=default
void setColocate(unsigned c)
unsigned getLatency()
NodeSet(iterator S, iterator E, const SwingSchedulerDAG *DAG)
bool operator>(const NodeSet &RHS) const
Sort the node sets by importance.
int compareRecMII(NodeSet &RHS)
unsigned size() const
bool operator!=(const NodeSet &RHS) const
bool insert(SUnit *SU)
bool operator==(const NodeSet &RHS) const
bool remove_if(UnaryPredicate P)
bool empty() const
void setExceedPressure(SUnit *SU)
Wrapper class representing virtual and physical registers.
Definition Register.h:20
LLVM_ABI void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
ResourceManager(const TargetSubtargetInfo *ST, ScheduleDAGInstrs *DAG)
Scheduling dependency.
Definition ScheduleDAG.h:51
SUnit * getSUnit() const
@ Output
A register output-dependence (aka WAW).
Definition ScheduleDAG.h:57
@ Order
Any other ordering dependency.
Definition ScheduleDAG.h:58
@ Anti
A register anti-dependence (aka WAR).
Definition ScheduleDAG.h:56
This class represents the scheduled code.
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
bool isScheduledAtStage(SUnit *SU, unsigned StageNum)
Return true if the instruction is scheduled at the specified stage.
int getInitiationInterval() const
Return the initiation interval for this schedule.
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
int getFirstCycle() const
Return the first cycle in the completed schedule.
DenseMap< int, std::deque< SUnit * > >::const_iterator const_sched_iterator
DenseMap< int, std::deque< SUnit * > >::iterator sched_iterator
Iterators for the cycle to instruction map.
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
SMSchedule(MachineFunction *mf, SwingSchedulerDAG *DAG)
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Scheduling unit. This is a node in the scheduling DAG.
A ScheduleDAG for scheduling lists of MachineInstr.
ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags=false)
const MachineLoopInfo * MLI
Mutate the DAG as a postpass after normal DAG building.
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
std::vector< SUnit > SUnits
The scheduling units.
MachineFunction & MF
Machine function.
ScheduleDAG & operator=(const ScheduleDAG &)=delete
SUnit ExitSU
Special node for the region exit.
A vector that has set insertion semantics.
Definition SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
const value_type & front() const
Return the first element of the SetVector.
Definition SetVector.h:132
typename vector_type::const_iterator const_iterator
Definition SetVector.h:73
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
const SwingSchedulerDDG * getDDG() const
bool hasNewSchedule()
Return true if the loop kernel has been scheduled.
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis, const RegisterClassInfo &rci, unsigned II, TargetInstrInfo::PipelinerLoopInfo *PLI, AliasAnalysis *AA)
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
Register getInstrBaseReg(SUnit *SU) const
Return the new base register that was stored away for the changed instruction.
static bool classof(const ScheduleDAGInstrs *DAG)
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Represents a dependence between two instruction.
SUnit * getDst() const
Returns the SUnit to which the edge points (destination node).
Register getReg() const
Returns the register associated with the edge.
void setDistance(unsigned D)
Sets the distance value for the edge.
bool isBarrier() const
Returns true if the edge represents unknown scheduling barrier.
void setLatency(unsigned Latency)
Sets the latency for the edge.
SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc, bool IsValidationOnly)
Creates an edge corresponding to an edge represented by PredOrSucc and Dep in the original DAG.
bool isAntiDep() const
Returns true if the edge represents anti dependence.
bool isAssignedRegDep() const
Tests if this is a Data dependence that is associated with a register.
bool isArtificial() const
Returns true if the edge represents an artificial dependence.
LLVM_ABI bool ignoreDependence(bool IgnoreAnti) const
Returns true for DDG nodes that we ignore when computing the cost functions.
bool isOrderDep() const
Returns true if the edge represents a dependence that is not data, anti or output dependence.
unsigned getLatency() const
Returns the latency value for the edge.
SUnit * getSrc() const
Returns the SUnit from which the edge comes (source node).
bool isValidationOnly() const
Returns true if this edge is intended to be used only for validating the schedule.
unsigned getDistance() const
Returns the distance value for the edge.
bool isOutputDep() const
Returns true if the edge represents output dependence.
This class provides APIs to retrieve edges from/to an SUnit node, with a particular focus on loop-car...
LLVM_ABI SwingSchedulerDDG(std::vector< SUnit > &SUnits, SUnit *EntrySU, SUnit *ExitSU, const LoopCarriedEdges &LCE)
LLVM_ABI ArrayRef< SUnit * > getExtraOutEdges(const SUnit *SU) const
LLVM_ABI const EdgesType & getInEdges(const SUnit *SU) const
LLVM_ABI bool isValidSchedule(const SMSchedule &Schedule) const
Check if Schedule doesn't violate the validation-only dependencies.
LLVM_ABI const EdgesType & getOutEdges(const SUnit *SU) const
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
CycleInfo::CycleT Cycle
Definition CycleInfo.h:26
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI cl::opt< bool > SwpEnableCopyToPhi
LLVM_ABI cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
static const int DefaultProcResSize
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
Represents loop-carried dependencies.
SmallSetVector< SUnit *, 8 > OrderDep
const OrderDep * getOrderDepOrNull(SUnit *Key) const
LLVM_ABI void modifySUnits(std::vector< SUnit > &SUnits, const TargetInstrInfo *TII)
Adds some edges to the original DAG that correspond to loop-carried dependencies.
LLVM_ABI void dump(SUnit *SU, const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) const
DenseMap< SUnit *, OrderDep > OrderDepsType
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition MCSchedule.h:129
Machine model for scheduling, bundling, and heuristics.
Definition MCSchedule.h:264
Cache the target analysis information about the loop.
SmallVector< MachineOperand, 4 > BrCond
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopPipelinerInfo