LLVM  8.0.0svn
MachinePipeliner.cpp
Go to the documentation of this file.
1 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
11 //
12 // Software pipelining (SWP) is an instruction scheduling technique for loops
13 // that overlap loop iterations and exploits ILP via a compiler transformation.
14 //
15 // Swing Modulo Scheduling is an implementation of software pipelining
16 // that generates schedules that are near optimal in terms of initiation
17 // interval, register requirements, and stage count. See the papers:
18 //
19 // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
20 // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996
21 // Conference on Parallel Architectures and Compilation Techiniques.
22 //
23 // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
24 // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
25 // Transactions on Computers, Vol. 50, No. 3, 2001.
26 //
27 // "An Implementation of Swing Modulo Scheduling With Extensions for
28 // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
29 // Urbana-Chambpain, 2005.
30 //
31 //
32 // The SMS algorithm consists of three main steps after computing the minimal
33 // initiation interval (MII).
34 // 1) Analyze the dependence graph and compute information about each
35 // instruction in the graph.
36 // 2) Order the nodes (instructions) by priority based upon the heuristics
37 // described in the algorithm.
38 // 3) Attempt to schedule the nodes in the specified order using the MII.
39 //
40 // This SMS implementation is a target-independent back-end pass. When enabled,
41 // the pass runs just prior to the register allocation pass, while the machine
42 // IR is in SSA form. If software pipelining is successful, then the original
43 // loop is replaced by the optimized loop. The optimized loop contains one or
44 // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
45 // the instructions cannot be scheduled in a given MII, we increase the MII by
46 // one and try again.
47 //
48 // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
49 // represent loop carried dependences in the DAG as order edges to the Phi
50 // nodes. We also perform several passes over the DAG to eliminate unnecessary
51 // edges that inhibit the ability to pipeline. The implementation uses the
52 // DFAPacketizer class to compute the minimum initiation interval and the check
53 // where an instruction may be inserted in the pipelined schedule.
54 //
55 // In order for the SMS pass to work, several target specific hooks need to be
56 // implemented to get information about the loop structure and to rewrite
57 // instructions.
58 //
59 //===----------------------------------------------------------------------===//
60 
61 #include "llvm/ADT/ArrayRef.h"
62 #include "llvm/ADT/BitVector.h"
63 #include "llvm/ADT/DenseMap.h"
64 #include "llvm/ADT/MapVector.h"
65 #include "llvm/ADT/PriorityQueue.h"
66 #include "llvm/ADT/SetVector.h"
67 #include "llvm/ADT/SmallPtrSet.h"
68 #include "llvm/ADT/SmallSet.h"
69 #include "llvm/ADT/SmallVector.h"
70 #include "llvm/ADT/Statistic.h"
96 #include "llvm/Config/llvm-config.h"
97 #include "llvm/IR/Attributes.h"
98 #include "llvm/IR/DebugLoc.h"
99 #include "llvm/IR/Function.h"
100 #include "llvm/MC/LaneBitmask.h"
101 #include "llvm/MC/MCInstrDesc.h"
103 #include "llvm/MC/MCRegisterInfo.h"
104 #include "llvm/Pass.h"
106 #include "llvm/Support/Compiler.h"
107 #include "llvm/Support/Debug.h"
108 #include "llvm/Support/MathExtras.h"
110 #include <algorithm>
111 #include <cassert>
112 #include <climits>
113 #include <cstdint>
114 #include <deque>
115 #include <functional>
116 #include <iterator>
117 #include <map>
118 #include <memory>
119 #include <tuple>
120 #include <utility>
121 #include <vector>
122 
123 using namespace llvm;
124 
125 #define DEBUG_TYPE "pipeliner"
126 
127 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
128 STATISTIC(NumPipelined, "Number of loops software pipelined");
129 STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
130 
131 /// A command line option to turn software pipelining on or off.
132 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
134  cl::desc("Enable Software Pipelining"));
135 
136 /// A command line option to enable SWP at -Os.
137 static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
138  cl::desc("Enable SWP at Os."), cl::Hidden,
139  cl::init(false));
140 
141 /// A command line argument to limit minimum initial interval for pipelining.
142 static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
143  cl::desc("Size limit for the MII."),
144  cl::Hidden, cl::init(27));
145 
146 /// A command line argument to limit the number of stages in the pipeline.
147 static cl::opt<int>
148  SwpMaxStages("pipeliner-max-stages",
149  cl::desc("Maximum stages allowed in the generated scheduled."),
150  cl::Hidden, cl::init(3));
151 
152 /// A command line option to disable the pruning of chain dependences due to
153 /// an unrelated Phi.
154 static cl::opt<bool>
155  SwpPruneDeps("pipeliner-prune-deps",
156  cl::desc("Prune dependences between unrelated Phi nodes."),
157  cl::Hidden, cl::init(true));
158 
159 /// A command line option to disable the pruning of loop carried order
160 /// dependences.
161 static cl::opt<bool>
162  SwpPruneLoopCarried("pipeliner-prune-loop-carried",
163  cl::desc("Prune loop carried order dependences."),
164  cl::Hidden, cl::init(true));
165 
166 #ifndef NDEBUG
167 static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
168 #endif
169 
170 static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
171  cl::ReallyHidden, cl::init(false),
172  cl::ZeroOrMore, cl::desc("Ignore RecMII"));
173 
174 // A command line option to enable the CopyToPhi DAG mutation.
175 static cl::opt<bool>
176  SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
177  cl::init(true), cl::ZeroOrMore,
178  cl::desc("Enable CopyToPhi DAG Mutation"));
179 
180 namespace {
181 
182 class NodeSet;
183 class SMSchedule;
184 
185 /// The main class in the implementation of the target independent
186 /// software pipeliner pass.
187 class MachinePipeliner : public MachineFunctionPass {
188 public:
189  MachineFunction *MF = nullptr;
190  const MachineLoopInfo *MLI = nullptr;
191  const MachineDominatorTree *MDT = nullptr;
192  const InstrItineraryData *InstrItins;
193  const TargetInstrInfo *TII = nullptr;
194  RegisterClassInfo RegClassInfo;
195 
196 #ifndef NDEBUG
197  static int NumTries;
198 #endif
199 
200  /// Cache the target analysis information about the loop.
201  struct LoopInfo {
202  MachineBasicBlock *TBB = nullptr;
203  MachineBasicBlock *FBB = nullptr;
205  MachineInstr *LoopInductionVar = nullptr;
206  MachineInstr *LoopCompare = nullptr;
207  };
208  LoopInfo LI;
209 
210  static char ID;
211 
212  MachinePipeliner() : MachineFunctionPass(ID) {
214  }
215 
216  bool runOnMachineFunction(MachineFunction &MF) override;
217 
218  void getAnalysisUsage(AnalysisUsage &AU) const override {
225  }
226 
227 private:
228  void preprocessPhiNodes(MachineBasicBlock &B);
229  bool canPipelineLoop(MachineLoop &L);
230  bool scheduleLoop(MachineLoop &L);
231  bool swingModuloScheduler(MachineLoop &L);
232 };
233 
234 /// This class builds the dependence graph for the instructions in a loop,
235 /// and attempts to schedule the instructions using the SMS algorithm.
236 class SwingSchedulerDAG : public ScheduleDAGInstrs {
237  MachinePipeliner &Pass;
238  /// The minimum initiation interval between iterations for this schedule.
239  unsigned MII = 0;
240  /// Set to true if a valid pipelined schedule is found for the loop.
241  bool Scheduled = false;
242  MachineLoop &Loop;
243  LiveIntervals &LIS;
244  const RegisterClassInfo &RegClassInfo;
245 
246  /// A toplogical ordering of the SUnits, which is needed for changing
247  /// dependences and iterating over the SUnits.
249 
250  struct NodeInfo {
251  int ASAP = 0;
252  int ALAP = 0;
253  int ZeroLatencyDepth = 0;
254  int ZeroLatencyHeight = 0;
255 
256  NodeInfo() = default;
257  };
258  /// Computed properties for each node in the graph.
259  std::vector<NodeInfo> ScheduleInfo;
260 
261  enum OrderKind { BottomUp = 0, TopDown = 1 };
262  /// Computed node ordering for scheduling.
264 
265  using NodeSetType = SmallVector<NodeSet, 8>;
266  using ValueMapTy = DenseMap<unsigned, unsigned>;
267  using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
268  using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
269 
270  /// Instructions to change when emitting the final schedule.
272 
273  /// We may create a new instruction, so remember it because it
274  /// must be deleted when the pass is finished.
276 
277  /// Ordered list of DAG postprocessing steps.
278  std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
279 
280  /// Helper class to implement Johnson's circuit finding algorithm.
281  class Circuits {
282  std::vector<SUnit> &SUnits;
283  SetVector<SUnit *> Stack;
284  BitVector Blocked;
287  // Node to Index from ScheduleDAGTopologicalSort
288  std::vector<int> *Node2Idx;
289  unsigned NumPaths;
290  static unsigned MaxPaths;
291 
292  public:
293  Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo)
294  : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) {
295  Node2Idx = new std::vector<int>(SUs.size());
296  unsigned Idx = 0;
297  for (const auto &NodeNum : Topo)
298  Node2Idx->at(NodeNum) = Idx++;
299  }
300 
301  ~Circuits() { delete Node2Idx; }
302 
303  /// Reset the data structures used in the circuit algorithm.
304  void reset() {
305  Stack.clear();
306  Blocked.reset();
307  B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
308  NumPaths = 0;
309  }
310 
311  void createAdjacencyStructure(SwingSchedulerDAG *DAG);
312  bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
313  void unblock(int U);
314  };
315 
316  struct CopyToPhiMutation : public ScheduleDAGMutation {
317  void apply(ScheduleDAGInstrs *DAG) override;
318  };
319 
320 public:
321  SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
322  const RegisterClassInfo &rci)
323  : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
324  RegClassInfo(rci), Topo(SUnits, &ExitSU) {
325  P.MF->getSubtarget().getSMSMutations(Mutations);
326  if (SwpEnableCopyToPhi)
327  Mutations.push_back(llvm::make_unique<CopyToPhiMutation>());
328  }
329 
330  void schedule() override;
331  void finishBlock() override;
332 
333  /// Return true if the loop kernel has been scheduled.
334  bool hasNewSchedule() { return Scheduled; }
335 
336  /// Return the earliest time an instruction may be scheduled.
337  int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
338 
339  /// Return the latest time an instruction my be scheduled.
340  int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
341 
342  /// The mobility function, which the number of slots in which
343  /// an instruction may be scheduled.
344  int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
345 
346  /// The depth, in the dependence graph, for a node.
347  unsigned getDepth(SUnit *Node) { return Node->getDepth(); }
348 
349  /// The maximum unweighted length of a path from an arbitrary node to the
350  /// given node in which each edge has latency 0
351  int getZeroLatencyDepth(SUnit *Node) {
352  return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth;
353  }
354 
355  /// The height, in the dependence graph, for a node.
356  unsigned getHeight(SUnit *Node) { return Node->getHeight(); }
357 
358  /// The maximum unweighted length of a path from the given node to an
359  /// arbitrary node in which each edge has latency 0
360  int getZeroLatencyHeight(SUnit *Node) {
361  return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
362  }
363 
364  /// Return true if the dependence is a back-edge in the data dependence graph.
365  /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
366  /// using an anti dependence from a Phi to an instruction.
367  bool isBackedge(SUnit *Source, const SDep &Dep) {
368  if (Dep.getKind() != SDep::Anti)
369  return false;
370  return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
371  }
372 
373  bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true);
374 
375  /// The distance function, which indicates that operation V of iteration I
376  /// depends on operations U of iteration I-distance.
377  unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
378  // Instructions that feed a Phi have a distance of 1. Computing larger
379  // values for arrays requires data dependence information.
380  if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
381  return 1;
382  return 0;
383  }
384 
385  /// Set the Minimum Initiation Interval for this schedule attempt.
386  void setMII(unsigned mii) { MII = mii; }
387 
388  void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
389 
390  void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
391 
392  /// Return the new base register that was stored away for the changed
393  /// instruction.
394  unsigned getInstrBaseReg(SUnit *SU) {
396  InstrChanges.find(SU);
397  if (It != InstrChanges.end())
398  return It->second.first;
399  return 0;
400  }
401 
402  void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
403  Mutations.push_back(std::move(Mutation));
404  }
405 
406  static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
407 
408 private:
409  void addLoopCarriedDependences(AliasAnalysis *AA);
410  void updatePhiDependences();
411  void changeDependences();
412  unsigned calculateResMII();
413  unsigned calculateRecMII(NodeSetType &RecNodeSets);
414  void findCircuits(NodeSetType &NodeSets);
415  void fuseRecs(NodeSetType &NodeSets);
416  void removeDuplicateNodes(NodeSetType &NodeSets);
417  void computeNodeFunctions(NodeSetType &NodeSets);
418  void registerPressureFilter(NodeSetType &NodeSets);
419  void colocateNodeSets(NodeSetType &NodeSets);
420  void checkNodeSets(NodeSetType &NodeSets);
421  void groupRemainingNodes(NodeSetType &NodeSets);
422  void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
423  SetVector<SUnit *> &NodesAdded);
424  void computeNodeOrder(NodeSetType &NodeSets);
425  void checkValidNodeOrder(const NodeSetType &Circuits) const;
426  bool schedulePipeline(SMSchedule &Schedule);
427  void generatePipelinedLoop(SMSchedule &Schedule);
428  void generateProlog(SMSchedule &Schedule, unsigned LastStage,
429  MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
430  MBBVectorTy &PrologBBs);
431  void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
432  MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
433  MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
434  void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
435  MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
436  SMSchedule &Schedule, ValueMapTy *VRMap,
437  InstrMapTy &InstrMap, unsigned LastStageNum,
438  unsigned CurStageNum, bool IsLast);
439  void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
440  MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
441  SMSchedule &Schedule, ValueMapTy *VRMap,
442  InstrMapTy &InstrMap, unsigned LastStageNum,
443  unsigned CurStageNum, bool IsLast);
444  void removeDeadInstructions(MachineBasicBlock *KernelBB,
445  MBBVectorTy &EpilogBBs);
446  void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
447  SMSchedule &Schedule);
448  void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
449  MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
450  ValueMapTy *VRMap);
451  bool computeDelta(MachineInstr &MI, unsigned &Delta);
452  void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
453  unsigned Num);
454  MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
455  unsigned InstStageNum);
456  MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
457  unsigned InstStageNum,
458  SMSchedule &Schedule);
459  void updateInstruction(MachineInstr *NewMI, bool LastDef,
460  unsigned CurStageNum, unsigned InstrStageNum,
461  SMSchedule &Schedule, ValueMapTy *VRMap);
462  MachineInstr *findDefInLoop(unsigned Reg);
463  unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
464  unsigned LoopStage, ValueMapTy *VRMap,
465  MachineBasicBlock *BB);
466  void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
467  SMSchedule &Schedule, ValueMapTy *VRMap,
468  InstrMapTy &InstrMap);
469  void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
470  InstrMapTy &InstrMap, unsigned CurStageNum,
471  unsigned PhiNum, MachineInstr *Phi,
472  unsigned OldReg, unsigned NewReg,
473  unsigned PrevReg = 0);
474  bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
475  unsigned &OffsetPos, unsigned &NewBase,
476  int64_t &NewOffset);
477  void postprocessDAG();
478 };
479 
480 /// A NodeSet contains a set of SUnit DAG nodes with additional information
481 /// that assigns a priority to the set.
482 class NodeSet {
483  SetVector<SUnit *> Nodes;
484  bool HasRecurrence = false;
485  unsigned RecMII = 0;
486  int MaxMOV = 0;
487  unsigned MaxDepth = 0;
488  unsigned Colocate = 0;
489  SUnit *ExceedPressure = nullptr;
490  unsigned Latency = 0;
491 
492 public:
493  using iterator = SetVector<SUnit *>::const_iterator;
494 
495  NodeSet() = default;
496  NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {
497  Latency = 0;
498  for (unsigned i = 0, e = Nodes.size(); i < e; ++i)
499  for (const SDep &Succ : Nodes[i]->Succs)
500  if (Nodes.count(Succ.getSUnit()))
501  Latency += Succ.getLatency();
502  }
503 
504  bool insert(SUnit *SU) { return Nodes.insert(SU); }
505 
506  void insert(iterator S, iterator E) { Nodes.insert(S, E); }
507 
508  template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
509  return Nodes.remove_if(P);
510  }
511 
512  unsigned count(SUnit *SU) const { return Nodes.count(SU); }
513 
514  bool hasRecurrence() { return HasRecurrence; };
515 
516  unsigned size() const { return Nodes.size(); }
517 
518  bool empty() const { return Nodes.empty(); }
519 
520  SUnit *getNode(unsigned i) const { return Nodes[i]; };
521 
522  void setRecMII(unsigned mii) { RecMII = mii; };
523 
524  void setColocate(unsigned c) { Colocate = c; };
525 
526  void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
527 
528  bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
529 
530  int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
531 
532  int getRecMII() { return RecMII; }
533 
534  /// Summarize node functions for the entire node set.
535  void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
536  for (SUnit *SU : *this) {
537  MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
538  MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
539  }
540  }
541 
542  unsigned getLatency() { return Latency; }
543 
544  unsigned getMaxDepth() { return MaxDepth; }
545 
546  void clear() {
547  Nodes.clear();
548  RecMII = 0;
549  HasRecurrence = false;
550  MaxMOV = 0;
551  MaxDepth = 0;
552  Colocate = 0;
553  ExceedPressure = nullptr;
554  }
555 
556  operator SetVector<SUnit *> &() { return Nodes; }
557 
558  /// Sort the node sets by importance. First, rank them by recurrence MII,
559  /// then by mobility (least mobile done first), and finally by depth.
560  /// Each node set may contain a colocate value which is used as the first
561  /// tie breaker, if it's set.
562  bool operator>(const NodeSet &RHS) const {
563  if (RecMII == RHS.RecMII) {
564  if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
565  return Colocate < RHS.Colocate;
566  if (MaxMOV == RHS.MaxMOV)
567  return MaxDepth > RHS.MaxDepth;
568  return MaxMOV < RHS.MaxMOV;
569  }
570  return RecMII > RHS.RecMII;
571  }
572 
573  bool operator==(const NodeSet &RHS) const {
574  return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
575  MaxDepth == RHS.MaxDepth;
576  }
577 
578  bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
579 
580  iterator begin() { return Nodes.begin(); }
581  iterator end() { return Nodes.end(); }
582 
583  void print(raw_ostream &os) const {
584  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
585  << " depth " << MaxDepth << " col " << Colocate << "\n";
586  for (const auto &I : Nodes)
587  os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
588  os << "\n";
589  }
590 
591 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
592  LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
593 #endif
594 };
595 
596 /// This class represents the scheduled code. The main data structure is a
597 /// map from scheduled cycle to instructions. During scheduling, the
598 /// data structure explicitly represents all stages/iterations. When
599 /// the algorithm finshes, the schedule is collapsed into a single stage,
600 /// which represents instructions from different loop iterations.
601 ///
602 /// The SMS algorithm allows negative values for cycles, so the first cycle
603 /// in the schedule is the smallest cycle value.
604 class SMSchedule {
605 private:
606  /// Map from execution cycle to instructions.
607  DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
608 
609  /// Map from instruction to execution cycle.
610  std::map<SUnit *, int> InstrToCycle;
611 
612  /// Map for each register and the max difference between its uses and def.
613  /// The first element in the pair is the max difference in stages. The
614  /// second is true if the register defines a Phi value and loop value is
615  /// scheduled before the Phi.
616  std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
617 
618  /// Keep track of the first cycle value in the schedule. It starts
619  /// as zero, but the algorithm allows negative values.
620  int FirstCycle = 0;
621 
622  /// Keep track of the last cycle value in the schedule.
623  int LastCycle = 0;
624 
625  /// The initiation interval (II) for the schedule.
626  int InitiationInterval = 0;
627 
628  /// Target machine information.
629  const TargetSubtargetInfo &ST;
630 
631  /// Virtual register information.
633 
634  std::unique_ptr<DFAPacketizer> Resources;
635 
636 public:
637  SMSchedule(MachineFunction *mf)
638  : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
639  Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {}
640 
641  void reset() {
642  ScheduledInstrs.clear();
643  InstrToCycle.clear();
644  RegToStageDiff.clear();
645  FirstCycle = 0;
646  LastCycle = 0;
647  InitiationInterval = 0;
648  }
649 
650  /// Set the initiation interval for this schedule.
651  void setInitiationInterval(int ii) { InitiationInterval = ii; }
652 
653  /// Return the first cycle in the completed schedule. This
654  /// can be a negative value.
655  int getFirstCycle() const { return FirstCycle; }
656 
657  /// Return the last cycle in the finalized schedule.
658  int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
659 
660  /// Return the cycle of the earliest scheduled instruction in the dependence
661  /// chain.
662  int earliestCycleInChain(const SDep &Dep);
663 
664  /// Return the cycle of the latest scheduled instruction in the dependence
665  /// chain.
666  int latestCycleInChain(const SDep &Dep);
667 
668  void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
669  int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
670  bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
671 
672  /// Iterators for the cycle to instruction map.
673  using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator;
674  using const_sched_iterator =
675  DenseMap<int, std::deque<SUnit *>>::const_iterator;
676 
677  /// Return true if the instruction is scheduled at the specified stage.
678  bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
679  return (stageScheduled(SU) == (int)StageNum);
680  }
681 
682  /// Return the stage for a scheduled instruction. Return -1 if
683  /// the instruction has not been scheduled.
684  int stageScheduled(SUnit *SU) const {
685  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
686  if (it == InstrToCycle.end())
687  return -1;
688  return (it->second - FirstCycle) / InitiationInterval;
689  }
690 
691  /// Return the cycle for a scheduled instruction. This function normalizes
692  /// the first cycle to be 0.
693  unsigned cycleScheduled(SUnit *SU) const {
694  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
695  assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
696  return (it->second - FirstCycle) % InitiationInterval;
697  }
698 
699  /// Return the maximum stage count needed for this schedule.
700  unsigned getMaxStageCount() {
701  return (LastCycle - FirstCycle) / InitiationInterval;
702  }
703 
704  /// Return the max. number of stages/iterations that can occur between a
705  /// register definition and its uses.
706  unsigned getStagesForReg(int Reg, unsigned CurStage) {
707  std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
708  if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
709  return 1;
710  return Stages.first;
711  }
712 
713  /// The number of stages for a Phi is a little different than other
714  /// instructions. The minimum value computed in RegToStageDiff is 1
715  /// because we assume the Phi is needed for at least 1 iteration.
716  /// This is not the case if the loop value is scheduled prior to the
717  /// Phi in the same stage. This function returns the number of stages
718  /// or iterations needed between the Phi definition and any uses.
719  unsigned getStagesForPhi(int Reg) {
720  std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
721  if (Stages.second)
722  return Stages.first;
723  return Stages.first - 1;
724  }
725 
726  /// Return the instructions that are scheduled at the specified cycle.
727  std::deque<SUnit *> &getInstructions(int cycle) {
728  return ScheduledInstrs[cycle];
729  }
730 
731  bool isValidSchedule(SwingSchedulerDAG *SSD);
732  void finalizeSchedule(SwingSchedulerDAG *SSD);
733  void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
734  std::deque<SUnit *> &Insts);
735  bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
736  bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def,
737  MachineOperand &MO);
738  void print(raw_ostream &os) const;
739  void dump() const;
740 };
741 
742 } // end anonymous namespace
743 
744 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
745 char MachinePipeliner::ID = 0;
746 #ifndef NDEBUG
747 int MachinePipeliner::NumTries = 0;
748 #endif
750 
751 INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE,
752  "Modulo Software Pipelining", false, false)
757 INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE,
758  "Modulo Software Pipelining", false, false)
759 
760 /// The "main" function for implementing Swing Modulo Scheduling.
761 bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
762  if (skipFunction(mf.getFunction()))
763  return false;
764 
765  if (!EnableSWP)
766  return false;
767 
768  if (mf.getFunction().getAttributes().hasAttribute(
769  AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
770  !EnableSWPOptSize.getPosition())
771  return false;
772 
773  MF = &mf;
774  MLI = &getAnalysis<MachineLoopInfo>();
775  MDT = &getAnalysis<MachineDominatorTree>();
776  TII = MF->getSubtarget().getInstrInfo();
777  RegClassInfo.runOnMachineFunction(*MF);
778 
779  for (auto &L : *MLI)
780  scheduleLoop(*L);
781 
782  return false;
783 }
784 
785 /// Attempt to perform the SMS algorithm on the specified loop. This function is
786 /// the main entry point for the algorithm. The function identifies candidate
787 /// loops, calculates the minimum initiation interval, and attempts to schedule
788 /// the loop.
789 bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
790  bool Changed = false;
791  for (auto &InnerLoop : L)
792  Changed |= scheduleLoop(*InnerLoop);
793 
794 #ifndef NDEBUG
795  // Stop trying after reaching the limit (if any).
796  int Limit = SwpLoopLimit;
797  if (Limit >= 0) {
798  if (NumTries >= SwpLoopLimit)
799  return Changed;
800  NumTries++;
801  }
802 #endif
803 
804  if (!canPipelineLoop(L))
805  return Changed;
806 
807  ++NumTrytoPipeline;
808 
809  Changed = swingModuloScheduler(L);
810 
811  return Changed;
812 }
813 
814 /// Return true if the loop can be software pipelined. The algorithm is
815 /// restricted to loops with a single basic block. Make sure that the
816 /// branch in the loop can be analyzed.
817 bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
818  if (L.getNumBlocks() != 1)
819  return false;
820 
821  // Check if the branch can't be understood because we can't do pipelining
822  // if that's the case.
823  LI.TBB = nullptr;
824  LI.FBB = nullptr;
825  LI.BrCond.clear();
826  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond))
827  return false;
828 
829  LI.LoopInductionVar = nullptr;
830  LI.LoopCompare = nullptr;
831  if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare))
832  return false;
833 
834  if (!L.getLoopPreheader())
835  return false;
836 
837  // Remove any subregisters from inputs to phi nodes.
838  preprocessPhiNodes(*L.getHeader());
839  return true;
840 }
841 
842 void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
843  MachineRegisterInfo &MRI = MF->getRegInfo();
844  SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
845 
846  for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) {
847  MachineOperand &DefOp = PI.getOperand(0);
848  assert(DefOp.getSubReg() == 0);
849  auto *RC = MRI.getRegClass(DefOp.getReg());
850 
851  for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
852  MachineOperand &RegOp = PI.getOperand(i);
853  if (RegOp.getSubReg() == 0)
854  continue;
855 
856  // If the operand uses a subregister, replace it with a new register
857  // without subregisters, and generate a copy to the new register.
858  unsigned NewReg = MRI.createVirtualRegister(RC);
859  MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
861  const DebugLoc &DL = PredB.findDebugLoc(At);
862  auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
863  .addReg(RegOp.getReg(), getRegState(RegOp),
864  RegOp.getSubReg());
865  Slots.insertMachineInstrInMaps(*Copy);
866  RegOp.setReg(NewReg);
867  RegOp.setSubReg(0);
868  }
869  }
870 }
871 
872 /// The SMS algorithm consists of the following main steps:
873 /// 1. Computation and analysis of the dependence graph.
874 /// 2. Ordering of the nodes (instructions).
875 /// 3. Attempt to Schedule the loop.
876 bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
877  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
878 
879  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo);
880 
881  MachineBasicBlock *MBB = L.getHeader();
882  // The kernel should not include any terminator instructions. These
883  // will be added back later.
884  SMS.startBlock(MBB);
885 
886  // Compute the number of 'real' instructions in the basic block by
887  // ignoring terminators.
888  unsigned size = MBB->size();
890  E = MBB->instr_end();
891  I != E; ++I, --size)
892  ;
893 
894  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
895  SMS.schedule();
896  SMS.exitRegion();
897 
898  SMS.finishBlock();
899  return SMS.hasNewSchedule();
900 }
901 
902 /// We override the schedule function in ScheduleDAGInstrs to implement the
903 /// scheduling part of the Swing Modulo Scheduling algorithm.
904 void SwingSchedulerDAG::schedule() {
905  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
906  buildSchedGraph(AA);
907  addLoopCarriedDependences(AA);
908  updatePhiDependences();
909  Topo.InitDAGTopologicalSorting();
910  changeDependences();
911  postprocessDAG();
912  LLVM_DEBUG(dump());
913 
914  NodeSetType NodeSets;
915  findCircuits(NodeSets);
916  NodeSetType Circuits = NodeSets;
917 
918  // Calculate the MII.
919  unsigned ResMII = calculateResMII();
920  unsigned RecMII = calculateRecMII(NodeSets);
921 
922  fuseRecs(NodeSets);
923 
924  // This flag is used for testing and can cause correctness problems.
925  if (SwpIgnoreRecMII)
926  RecMII = 0;
927 
928  MII = std::max(ResMII, RecMII);
929  LLVM_DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII
930  << ", res=" << ResMII << ")\n");
931 
932  // Can't schedule a loop without a valid MII.
933  if (MII == 0)
934  return;
935 
936  // Don't pipeline large loops.
937  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii)
938  return;
939 
940  computeNodeFunctions(NodeSets);
941 
942  registerPressureFilter(NodeSets);
943 
944  colocateNodeSets(NodeSets);
945 
946  checkNodeSets(NodeSets);
947 
948  LLVM_DEBUG({
949  for (auto &I : NodeSets) {
950  dbgs() << " Rec NodeSet ";
951  I.dump();
952  }
953  });
954 
955  std::stable_sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());
956 
957  groupRemainingNodes(NodeSets);
958 
959  removeDuplicateNodes(NodeSets);
960 
961  LLVM_DEBUG({
962  for (auto &I : NodeSets) {
963  dbgs() << " NodeSet ";
964  I.dump();
965  }
966  });
967 
968  computeNodeOrder(NodeSets);
969 
970  // check for node order issues
971  checkValidNodeOrder(Circuits);
972 
973  SMSchedule Schedule(Pass.MF);
974  Scheduled = schedulePipeline(Schedule);
975 
976  if (!Scheduled)
977  return;
978 
979  unsigned numStages = Schedule.getMaxStageCount();
980  // No need to generate pipeline if there are no overlapped iterations.
981  if (numStages == 0)
982  return;
983 
984  // Check that the maximum stage count is less than user-defined limit.
985  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages)
986  return;
987 
988  generatePipelinedLoop(Schedule);
989  ++NumPipelined;
990 }
991 
992 /// Clean up after the software pipeliner runs.
993 void SwingSchedulerDAG::finishBlock() {
994  for (MachineInstr *I : NewMIs)
995  MF.DeleteMachineInstr(I);
996  NewMIs.clear();
997 
998  // Call the superclass.
1000 }
1001 
1002 /// Return the register values for the operands of a Phi instruction.
1003 /// This function assume the instruction is a Phi.
1005  unsigned &InitVal, unsigned &LoopVal) {
1006  assert(Phi.isPHI() && "Expecting a Phi.");
1007 
1008  InitVal = 0;
1009  LoopVal = 0;
1010  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
1011  if (Phi.getOperand(i + 1).getMBB() != Loop)
1012  InitVal = Phi.getOperand(i).getReg();
1013  else
1014  LoopVal = Phi.getOperand(i).getReg();
1015 
1016  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
1017 }
1018 
1019 /// Return the Phi register value that comes from the incoming block.
1020 static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
1021  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
1022  if (Phi.getOperand(i + 1).getMBB() != LoopBB)
1023  return Phi.getOperand(i).getReg();
1024  return 0;
1025 }
1026 
1027 /// Return the Phi register value that comes the loop block.
1028 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
1029  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
1030  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
1031  return Phi.getOperand(i).getReg();
1032  return 0;
1033 }
1034 
1035 /// Return true if SUb can be reached from SUa following the chain edges.
1036 static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
1037  SmallPtrSet<SUnit *, 8> Visited;
1038  SmallVector<SUnit *, 8> Worklist;
1039  Worklist.push_back(SUa);
1040  while (!Worklist.empty()) {
1041  const SUnit *SU = Worklist.pop_back_val();
1042  for (auto &SI : SU->Succs) {
1043  SUnit *SuccSU = SI.getSUnit();
1044  if (SI.getKind() == SDep::Order) {
1045  if (Visited.count(SuccSU))
1046  continue;
1047  if (SuccSU == SUb)
1048  return true;
1049  Worklist.push_back(SuccSU);
1050  Visited.insert(SuccSU);
1051  }
1052  }
1053  }
1054  return false;
1055 }
1056 
1057 /// Return true if the instruction causes a chain between memory
1058 /// references before and after it.
1060  return MI.isCall() || MI.hasUnmodeledSideEffects() ||
1061  (MI.hasOrderedMemoryRef() &&
1062  (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
1063 }
1064 
1065 /// Return the underlying objects for the memory references of an instruction.
1066 /// This function calls the code in ValueTracking, but first checks that the
1067 /// instruction has a memory operand.
1070  const DataLayout &DL) {
1071  if (!MI->hasOneMemOperand())
1072  return;
1073  MachineMemOperand *MM = *MI->memoperands_begin();
1074  if (!MM->getValue())
1075  return;
1076  GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
1077  for (Value *V : Objs) {
1078  if (!isIdentifiedObject(V)) {
1079  Objs.clear();
1080  return;
1081  }
1082  Objs.push_back(V);
1083  }
1084 }
1085 
1086 /// Add a chain edge between a load and store if the store can be an
1087 /// alias of the load on a subsequent iteration, i.e., a loop carried
1088 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
1089 /// but that code doesn't create loop carried dependences.
1090 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
1092  Value *UnknownValue =
1094  for (auto &SU : SUnits) {
1095  MachineInstr &MI = *SU.getInstr();
1096  if (isDependenceBarrier(MI, AA))
1097  PendingLoads.clear();
1098  else if (MI.mayLoad()) {
1100  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
1101  if (Objs.empty())
1102  Objs.push_back(UnknownValue);
1103  for (auto V : Objs) {
1104  SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
1105  SUs.push_back(&SU);
1106  }
1107  } else if (MI.mayStore()) {
1109  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
1110  if (Objs.empty())
1111  Objs.push_back(UnknownValue);
1112  for (auto V : Objs) {
1114  PendingLoads.find(V);
1115  if (I == PendingLoads.end())
1116  continue;
1117  for (auto Load : I->second) {
1118  if (isSuccOrder(Load, &SU))
1119  continue;
1120  MachineInstr &LdMI = *Load->getInstr();
1121  // First, perform the cheaper check that compares the base register.
1122  // If they are the same and the load offset is less than the store
1123  // offset, then mark the dependence as loop carried potentially.
1124  unsigned BaseReg1, BaseReg2;
1125  int64_t Offset1, Offset2;
1126  if (TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) &&
1127  TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
1128  if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
1129  assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
1130  "What happened to the chain edge?");
1131  SDep Dep(Load, SDep::Barrier);
1132  Dep.setLatency(1);
1133  SU.addPred(Dep);
1134  continue;
1135  }
1136  }
1137  // Second, the more expensive check that uses alias analysis on the
1138  // base registers. If they alias, and the load offset is less than
1139  // the store offset, the mark the dependence as loop carried.
1140  if (!AA) {
1141  SDep Dep(Load, SDep::Barrier);
1142  Dep.setLatency(1);
1143  SU.addPred(Dep);
1144  continue;
1145  }
1146  MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
1147  MachineMemOperand *MMO2 = *MI.memoperands_begin();
1148  if (!MMO1->getValue() || !MMO2->getValue()) {
1149  SDep Dep(Load, SDep::Barrier);
1150  Dep.setLatency(1);
1151  SU.addPred(Dep);
1152  continue;
1153  }
1154  if (MMO1->getValue() == MMO2->getValue() &&
1155  MMO1->getOffset() <= MMO2->getOffset()) {
1156  SDep Dep(Load, SDep::Barrier);
1157  Dep.setLatency(1);
1158  SU.addPred(Dep);
1159  continue;
1160  }
1161  AliasResult AAResult = AA->alias(
1163  MMO1->getAAInfo()),
1165  MMO2->getAAInfo()));
1166 
1167  if (AAResult != NoAlias) {
1168  SDep Dep(Load, SDep::Barrier);
1169  Dep.setLatency(1);
1170  SU.addPred(Dep);
1171  }
1172  }
1173  }
1174  }
1175  }
1176 }
1177 
1178 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
1179 /// processes dependences for PHIs. This function adds true dependences
1180 /// from a PHI to a use, and a loop carried dependence from the use to the
1181 /// PHI. The loop carried dependence is represented as an anti dependence
1182 /// edge. This function also removes chain dependences between unrelated
1183 /// PHIs.
1184 void SwingSchedulerDAG::updatePhiDependences() {
1185  SmallVector<SDep, 4> RemoveDeps;
1187 
1188  // Iterate over each DAG node.
1189  for (SUnit &I : SUnits) {
1190  RemoveDeps.clear();
1191  // Set to true if the instruction has an operand defined by a Phi.
1192  unsigned HasPhiUse = 0;
1193  unsigned HasPhiDef = 0;
1194  MachineInstr *MI = I.getInstr();
1195  // Iterate over each operand, and we process the definitions.
1196  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
1197  MOE = MI->operands_end();
1198  MOI != MOE; ++MOI) {
1199  if (!MOI->isReg())
1200  continue;
1201  unsigned Reg = MOI->getReg();
1202  if (MOI->isDef()) {
1203  // If the register is used by a Phi, then create an anti dependence.
1205  UI = MRI.use_instr_begin(Reg),
1206  UE = MRI.use_instr_end();
1207  UI != UE; ++UI) {
1208  MachineInstr *UseMI = &*UI;
1209  SUnit *SU = getSUnit(UseMI);
1210  if (SU != nullptr && UseMI->isPHI()) {
1211  if (!MI->isPHI()) {
1212  SDep Dep(SU, SDep::Anti, Reg);
1213  Dep.setLatency(1);
1214  I.addPred(Dep);
1215  } else {
1216  HasPhiDef = Reg;
1217  // Add a chain edge to a dependent Phi that isn't an existing
1218  // predecessor.
1219  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
1220  I.addPred(SDep(SU, SDep::Barrier));
1221  }
1222  }
1223  }
1224  } else if (MOI->isUse()) {
1225  // If the register is defined by a Phi, then create a true dependence.
1226  MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
1227  if (DefMI == nullptr)
1228  continue;
1229  SUnit *SU = getSUnit(DefMI);
1230  if (SU != nullptr && DefMI->isPHI()) {
1231  if (!MI->isPHI()) {
1232  SDep Dep(SU, SDep::Data, Reg);
1233  Dep.setLatency(0);
1234  ST.adjustSchedDependency(SU, &I, Dep);
1235  I.addPred(Dep);
1236  } else {
1237  HasPhiUse = Reg;
1238  // Add a chain edge to a dependent Phi that isn't an existing
1239  // predecessor.
1240  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
1241  I.addPred(SDep(SU, SDep::Barrier));
1242  }
1243  }
1244  }
1245  }
1246  // Remove order dependences from an unrelated Phi.
1247  if (!SwpPruneDeps)
1248  continue;
1249  for (auto &PI : I.Preds) {
1250  MachineInstr *PMI = PI.getSUnit()->getInstr();
1251  if (PMI->isPHI() && PI.getKind() == SDep::Order) {
1252  if (I.getInstr()->isPHI()) {
1253  if (PMI->getOperand(0).getReg() == HasPhiUse)
1254  continue;
1255  if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
1256  continue;
1257  }
1258  RemoveDeps.push_back(PI);
1259  }
1260  }
1261  for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
1262  I.removePred(RemoveDeps[i]);
1263  }
1264 }
1265 
1266 /// Iterate over each DAG node and see if we can change any dependences
1267 /// in order to reduce the recurrence MII.
1268 void SwingSchedulerDAG::changeDependences() {
1269  // See if an instruction can use a value from the previous iteration.
1270  // If so, we update the base and offset of the instruction and change
1271  // the dependences.
1272  for (SUnit &I : SUnits) {
1273  unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
1274  int64_t NewOffset = 0;
1275  if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
1276  NewOffset))
1277  continue;
1278 
1279  // Get the MI and SUnit for the instruction that defines the original base.
1280  unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg();
1281  MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
1282  if (!DefMI)
1283  continue;
1284  SUnit *DefSU = getSUnit(DefMI);
1285  if (!DefSU)
1286  continue;
1287  // Get the MI and SUnit for the instruction that defins the new base.
1288  MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
1289  if (!LastMI)
1290  continue;
1291  SUnit *LastSU = getSUnit(LastMI);
1292  if (!LastSU)
1293  continue;
1294 
1295  if (Topo.IsReachable(&I, LastSU))
1296  continue;
1297 
1298  // Remove the dependence. The value now depends on a prior iteration.
1299  SmallVector<SDep, 4> Deps;
1300  for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
1301  ++P)
1302  if (P->getSUnit() == DefSU)
1303  Deps.push_back(*P);
1304  for (int i = 0, e = Deps.size(); i != e; i++) {
1305  Topo.RemovePred(&I, Deps[i].getSUnit());
1306  I.removePred(Deps[i]);
1307  }
1308  // Remove the chain dependence between the instructions.
1309  Deps.clear();
1310  for (auto &P : LastSU->Preds)
1311  if (P.getSUnit() == &I && P.getKind() == SDep::Order)
1312  Deps.push_back(P);
1313  for (int i = 0, e = Deps.size(); i != e; i++) {
1314  Topo.RemovePred(LastSU, Deps[i].getSUnit());
1315  LastSU->removePred(Deps[i]);
1316  }
1317 
1318  // Add a dependence between the new instruction and the instruction
1319  // that defines the new base.
1320  SDep Dep(&I, SDep::Anti, NewBase);
1321  Topo.AddPred(LastSU, &I);
1322  LastSU->addPred(Dep);
1323 
1324  // Remember the base and offset information so that we can update the
1325  // instruction during code generation.
1326  InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
1327  }
1328 }
1329 
1330 namespace {
1331 
1332 // FuncUnitSorter - Comparison operator used to sort instructions by
1333 // the number of functional unit choices.
1334 struct FuncUnitSorter {
1335  const InstrItineraryData *InstrItins;
1336  DenseMap<unsigned, unsigned> Resources;
1337 
1338  FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
1339 
1340  // Compute the number of functional unit alternatives needed
1341  // at each stage, and take the minimum value. We prioritize the
1342  // instructions by the least number of choices first.
1343  unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
1344  unsigned schedClass = Inst->getDesc().getSchedClass();
1345  unsigned min = UINT_MAX;
1346  for (const InstrStage *IS = InstrItins->beginStage(schedClass),
1347  *IE = InstrItins->endStage(schedClass);
1348  IS != IE; ++IS) {
1349  unsigned funcUnits = IS->getUnits();
1350  unsigned numAlternatives = countPopulation(funcUnits);
1351  if (numAlternatives < min) {
1352  min = numAlternatives;
1353  F = funcUnits;
1354  }
1355  }
1356  return min;
1357  }
1358 
1359  // Compute the critical resources needed by the instruction. This
1360  // function records the functional units needed by instructions that
1361  // must use only one functional unit. We use this as a tie breaker
1362  // for computing the resource MII. The instrutions that require
1363  // the same, highly used, functional unit have high priority.
1364  void calcCriticalResources(MachineInstr &MI) {
1365  unsigned SchedClass = MI.getDesc().getSchedClass();
1366  for (const InstrStage *IS = InstrItins->beginStage(SchedClass),
1367  *IE = InstrItins->endStage(SchedClass);
1368  IS != IE; ++IS) {
1369  unsigned FuncUnits = IS->getUnits();
1370  if (countPopulation(FuncUnits) == 1)
1371  Resources[FuncUnits]++;
1372  }
1373  }
1374 
1375  /// Return true if IS1 has less priority than IS2.
1376  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
1377  unsigned F1 = 0, F2 = 0;
1378  unsigned MFUs1 = minFuncUnits(IS1, F1);
1379  unsigned MFUs2 = minFuncUnits(IS2, F2);
1380  if (MFUs1 == 1 && MFUs2 == 1)
1381  return Resources.lookup(F1) < Resources.lookup(F2);
1382  return MFUs1 > MFUs2;
1383  }
1384 };
1385 
1386 } // end anonymous namespace
1387 
1388 /// Calculate the resource constrained minimum initiation interval for the
1389 /// specified loop. We use the DFA to model the resources needed for
1390 /// each instruction, and we ignore dependences. A different DFA is created
1391 /// for each cycle that is required. When adding a new instruction, we attempt
1392 /// to add it to each existing DFA, until a legal space is found. If the
1393 /// instruction cannot be reserved in an existing DFA, we create a new one.
1394 unsigned SwingSchedulerDAG::calculateResMII() {
1396  MachineBasicBlock *MBB = Loop.getHeader();
1397  Resources.push_back(TII->CreateTargetScheduleState(MF.getSubtarget()));
1398 
1399  // Sort the instructions by the number of available choices for scheduling,
1400  // least to most. Use the number of critical resources as the tie breaker.
1401  FuncUnitSorter FUS =
1402  FuncUnitSorter(MF.getSubtarget().getInstrItineraryData());
1404  E = MBB->getFirstTerminator();
1405  I != E; ++I)
1406  FUS.calcCriticalResources(*I);
1408  FuncUnitOrder(FUS);
1409 
1411  E = MBB->getFirstTerminator();
1412  I != E; ++I)
1413  FuncUnitOrder.push(&*I);
1414 
1415  while (!FuncUnitOrder.empty()) {
1416  MachineInstr *MI = FuncUnitOrder.top();
1417  FuncUnitOrder.pop();
1418  if (TII->isZeroCost(MI->getOpcode()))
1419  continue;
1420  // Attempt to reserve the instruction in an existing DFA. At least one
1421  // DFA is needed for each cycle.
1422  unsigned NumCycles = getSUnit(MI)->Latency;
1423  unsigned ReservedCycles = 0;
1426  for (unsigned C = 0; C < NumCycles; ++C)
1427  while (RI != RE) {
1428  if ((*RI++)->canReserveResources(*MI)) {
1429  ++ReservedCycles;
1430  break;
1431  }
1432  }
1433  // Start reserving resources using existing DFAs.
1434  for (unsigned C = 0; C < ReservedCycles; ++C) {
1435  --RI;
1436  (*RI)->reserveResources(*MI);
1437  }
1438  // Add new DFAs, if needed, to reserve resources.
1439  for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
1440  DFAPacketizer *NewResource =
1442  assert(NewResource->canReserveResources(*MI) && "Reserve error.");
1443  NewResource->reserveResources(*MI);
1444  Resources.push_back(NewResource);
1445  }
1446  }
1447  int Resmii = Resources.size();
1448  // Delete the memory for each of the DFAs that were created earlier.
1449  for (DFAPacketizer *RI : Resources) {
1450  DFAPacketizer *D = RI;
1451  delete D;
1452  }
1453  Resources.clear();
1454  return Resmii;
1455 }
1456 
1457 /// Calculate the recurrence-constrainted minimum initiation interval.
1458 /// Iterate over each circuit. Compute the delay(c) and distance(c)
1459 /// for each circuit. The II needs to satisfy the inequality
1460 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
1461 /// II that satisfies the inequality, and the RecMII is the maximum
1462 /// of those values.
1463 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1464  unsigned RecMII = 0;
1465 
1466  for (NodeSet &Nodes : NodeSets) {
1467  if (Nodes.empty())
1468  continue;
1469 
1470  unsigned Delay = Nodes.getLatency();
1471  unsigned Distance = 1;
1472 
1473  // ii = ceil(delay / distance)
1474  unsigned CurMII = (Delay + Distance - 1) / Distance;
1475  Nodes.setRecMII(CurMII);
1476  if (CurMII > RecMII)
1477  RecMII = CurMII;
1478  }
1479 
1480  return RecMII;
1481 }
1482 
1483 /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1484 /// but we do this to find the circuits, and then change them back.
1485 static void swapAntiDependences(std::vector<SUnit> &SUnits) {
1487  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1488  SUnit *SU = &SUnits[i];
1489  for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
1490  IP != EP; ++IP) {
1491  if (IP->getKind() != SDep::Anti)
1492  continue;
1493  DepsAdded.push_back(std::make_pair(SU, *IP));
1494  }
1495  }
1496  for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
1497  E = DepsAdded.end();
1498  I != E; ++I) {
1499  // Remove this anti dependency and add one in the reverse direction.
1500  SUnit *SU = I->first;
1501  SDep &D = I->second;
1502  SUnit *TargetSU = D.getSUnit();
1503  unsigned Reg = D.getReg();
1504  unsigned Lat = D.getLatency();
1505  SU->removePred(D);
1506  SDep Dep(SU, SDep::Anti, Reg);
1507  Dep.setLatency(Lat);
1508  TargetSU->addPred(Dep);
1509  }
1510 }
1511 
1512 /// Create the adjacency structure of the nodes in the graph.
1513 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1514  SwingSchedulerDAG *DAG) {
1515  BitVector Added(SUnits.size());
1516  DenseMap<int, int> OutputDeps;
1517  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1518  Added.reset();
1519  // Add any successor to the adjacency matrix and exclude duplicates.
1520  for (auto &SI : SUnits[i].Succs) {
1521  // Only create a back-edge on the first and last nodes of a dependence
1522  // chain. This records any chains and adds them later.
1523  if (SI.getKind() == SDep::Output) {
1524  int N = SI.getSUnit()->NodeNum;
1525  int BackEdge = i;
1526  auto Dep = OutputDeps.find(BackEdge);
1527  if (Dep != OutputDeps.end()) {
1528  BackEdge = Dep->second;
1529  OutputDeps.erase(Dep);
1530  }
1531  OutputDeps[N] = BackEdge;
1532  }
1533  // Do not process a boundary node, an artificial node.
1534  // A back-edge is processed only if it goes to a Phi.
1535  if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||
1536  (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
1537  continue;
1538  int N = SI.getSUnit()->NodeNum;
1539  if (!Added.test(N)) {
1540  AdjK[i].push_back(N);
1541  Added.set(N);
1542  }
1543  }
1544  // A chain edge between a store and a load is treated as a back-edge in the
1545  // adjacency matrix.
1546  for (auto &PI : SUnits[i].Preds) {
1547  if (!SUnits[i].getInstr()->mayStore() ||
1548  !DAG->isLoopCarriedDep(&SUnits[i], PI, false))
1549  continue;
1550  if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1551  int N = PI.getSUnit()->NodeNum;
1552  if (!Added.test(N)) {
1553  AdjK[i].push_back(N);
1554  Added.set(N);
1555  }
1556  }
1557  }
1558  }
1559  // Add back-eges in the adjacency matrix for the output dependences.
1560  for (auto &OD : OutputDeps)
1561  if (!Added.test(OD.second)) {
1562  AdjK[OD.first].push_back(OD.second);
1563  Added.set(OD.second);
1564  }
1565 }
1566 
1567 /// Identify an elementary circuit in the dependence graph starting at the
1568 /// specified node.
1569 bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1570  bool HasBackedge) {
1571  SUnit *SV = &SUnits[V];
1572  bool F = false;
1573  Stack.insert(SV);
1574  Blocked.set(V);
1575 
1576  for (auto W : AdjK[V]) {
1577  if (NumPaths > MaxPaths)
1578  break;
1579  if (W < S)
1580  continue;
1581  if (W == S) {
1582  if (!HasBackedge)
1583  NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
1584  F = true;
1585  ++NumPaths;
1586  break;
1587  } else if (!Blocked.test(W)) {
1588  if (circuit(W, S, NodeSets,
1589  Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
1590  F = true;
1591  }
1592  }
1593 
1594  if (F)
1595  unblock(V);
1596  else {
1597  for (auto W : AdjK[V]) {
1598  if (W < S)
1599  continue;
1600  if (B[W].count(SV) == 0)
1601  B[W].insert(SV);
1602  }
1603  }
1604  Stack.pop_back();
1605  return F;
1606 }
1607 
1608 /// Unblock a node in the circuit finding algorithm.
1609 void SwingSchedulerDAG::Circuits::unblock(int U) {
1610  Blocked.reset(U);
1611  SmallPtrSet<SUnit *, 4> &BU = B[U];
1612  while (!BU.empty()) {
1614  assert(SI != BU.end() && "Invalid B set.");
1615  SUnit *W = *SI;
1616  BU.erase(W);
1617  if (Blocked.test(W->NodeNum))
1618  unblock(W->NodeNum);
1619  }
1620 }
1621 
1622 /// Identify all the elementary circuits in the dependence graph using
1623 /// Johnson's circuit algorithm.
1624 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1625  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1626  // but we do this to find the circuits, and then change them back.
1627  swapAntiDependences(SUnits);
1628 
1629  Circuits Cir(SUnits, Topo);
1630  // Create the adjacency structure.
1631  Cir.createAdjacencyStructure(this);
1632  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1633  Cir.reset();
1634  Cir.circuit(i, i, NodeSets);
1635  }
1636 
1637  // Change the dependences back so that we've created a DAG again.
1638  swapAntiDependences(SUnits);
1639 }
1640 
1641 // Create artificial dependencies between the source of COPY/REG_SEQUENCE that
1642 // is loop-carried to the USE in next iteration. This will help pipeliner avoid
1643 // additional copies that are needed across iterations. An artificial dependence
1644 // edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
1645 
1646 // PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
1647 // SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
1648 // PHI-------True-Dep------> USEOfPhi
1649 
1650 // The mutation creates
1651 // USEOfPHI -------Artificial-Dep---> SRCOfCopy
1652 
1653 // This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
1654 // (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
1655 // late to avoid additional copies across iterations. The possible scheduling
1656 // order would be
1657 // USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
1658 
1660  for (SUnit &SU : DAG->SUnits) {
1661  // Find the COPY/REG_SEQUENCE instruction.
1662  if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
1663  continue;
1664 
1665  // Record the loop carried PHIs.
1666  SmallVector<SUnit *, 4> PHISUs;
1667  // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
1668  SmallVector<SUnit *, 4> SrcSUs;
1669 
1670  for (auto &Dep : SU.Preds) {
1671  SUnit *TmpSU = Dep.getSUnit();
1672  MachineInstr *TmpMI = TmpSU->getInstr();
1673  SDep::Kind DepKind = Dep.getKind();
1674  // Save the loop carried PHI.
1675  if (DepKind == SDep::Anti && TmpMI->isPHI())
1676  PHISUs.push_back(TmpSU);
1677  // Save the source of COPY/REG_SEQUENCE.
1678  // If the source has no pre-decessors, we will end up creating cycles.
1679  else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)
1680  SrcSUs.push_back(TmpSU);
1681  }
1682 
1683  if (PHISUs.size() == 0 || SrcSUs.size() == 0)
1684  continue;
1685 
1686  // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
1687  // SUnit to the container.
1688  SmallVector<SUnit *, 8> UseSUs;
1689  for (auto I = PHISUs.begin(); I != PHISUs.end(); ++I) {
1690  for (auto &Dep : (*I)->Succs) {
1691  if (Dep.getKind() != SDep::Data)
1692  continue;
1693 
1694  SUnit *TmpSU = Dep.getSUnit();
1695  MachineInstr *TmpMI = TmpSU->getInstr();
1696  if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
1697  PHISUs.push_back(TmpSU);
1698  continue;
1699  }
1700  UseSUs.push_back(TmpSU);
1701  }
1702  }
1703 
1704  if (UseSUs.size() == 0)
1705  continue;
1706 
1707  SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
1708  // Add the artificial dependencies if it does not form a cycle.
1709  for (auto I : UseSUs) {
1710  for (auto Src : SrcSUs) {
1711  if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
1712  Src->addPred(SDep(I, SDep::Artificial));
1713  SDAG->Topo.AddPred(Src, I);
1714  }
1715  }
1716  }
1717  }
1718 }
1719 
1720 /// Return true for DAG nodes that we ignore when computing the cost functions.
1721 /// We ignore the back-edge recurrence in order to avoid unbounded recursion
1722 /// in the calculation of the ASAP, ALAP, etc functions.
1723 static bool ignoreDependence(const SDep &D, bool isPred) {
1724  if (D.isArtificial())
1725  return true;
1726  return D.getKind() == SDep::Anti && isPred;
1727 }
1728 
1729 /// Compute several functions need to order the nodes for scheduling.
1730 /// ASAP - Earliest time to schedule a node.
1731 /// ALAP - Latest time to schedule a node.
1732 /// MOV - Mobility function, difference between ALAP and ASAP.
1733 /// D - Depth of each node.
1734 /// H - Height of each node.
1735 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1736  ScheduleInfo.resize(SUnits.size());
1737 
1738  LLVM_DEBUG({
1739  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
1740  E = Topo.end();
1741  I != E; ++I) {
1742  const SUnit &SU = SUnits[*I];
1743  dumpNode(SU);
1744  }
1745  });
1746 
1747  int maxASAP = 0;
1748  // Compute ASAP and ZeroLatencyDepth.
1749  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
1750  E = Topo.end();
1751  I != E; ++I) {
1752  int asap = 0;
1753  int zeroLatencyDepth = 0;
1754  SUnit *SU = &SUnits[*I];
1755  for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
1756  EP = SU->Preds.end();
1757  IP != EP; ++IP) {
1758  SUnit *pred = IP->getSUnit();
1759  if (IP->getLatency() == 0)
1760  zeroLatencyDepth =
1761  std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
1762  if (ignoreDependence(*IP, true))
1763  continue;
1764  asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() -
1765  getDistance(pred, SU, *IP) * MII));
1766  }
1767  maxASAP = std::max(maxASAP, asap);
1768  ScheduleInfo[*I].ASAP = asap;
1769  ScheduleInfo[*I].ZeroLatencyDepth = zeroLatencyDepth;
1770  }
1771 
1772  // Compute ALAP, ZeroLatencyHeight, and MOV.
1774  E = Topo.rend();
1775  I != E; ++I) {
1776  int alap = maxASAP;
1777  int zeroLatencyHeight = 0;
1778  SUnit *SU = &SUnits[*I];
1779  for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
1780  ES = SU->Succs.end();
1781  IS != ES; ++IS) {
1782  SUnit *succ = IS->getSUnit();
1783  if (IS->getLatency() == 0)
1784  zeroLatencyHeight =
1785  std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
1786  if (ignoreDependence(*IS, true))
1787  continue;
1788  alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() +
1789  getDistance(SU, succ, *IS) * MII));
1790  }
1791 
1792  ScheduleInfo[*I].ALAP = alap;
1793  ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight;
1794  }
1795 
1796  // After computing the node functions, compute the summary for each node set.
1797  for (NodeSet &I : NodeSets)
1798  I.computeNodeSetInfo(this);
1799 
1800  LLVM_DEBUG({
1801  for (unsigned i = 0; i < SUnits.size(); i++) {
1802  dbgs() << "\tNode " << i << ":\n";
1803  dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
1804  dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
1805  dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
1806  dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
1807  dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
1808  dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
1809  dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
1810  }
1811  });
1812 }
1813 
1814 /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1815 /// as the predecessors of the elements of NodeOrder that are not also in
1816 /// NodeOrder.
1817 static bool pred_L(SetVector<SUnit *> &NodeOrder,
1819  const NodeSet *S = nullptr) {
1820  Preds.clear();
1821  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1822  I != E; ++I) {
1823  for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
1824  PI != PE; ++PI) {
1825  if (S && S->count(PI->getSUnit()) == 0)
1826  continue;
1827  if (ignoreDependence(*PI, true))
1828  continue;
1829  if (NodeOrder.count(PI->getSUnit()) == 0)
1830  Preds.insert(PI->getSUnit());
1831  }
1832  // Back-edges are predecessors with an anti-dependence.
1833  for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
1834  ES = (*I)->Succs.end();
1835  IS != ES; ++IS) {
1836  if (IS->getKind() != SDep::Anti)
1837  continue;
1838  if (S && S->count(IS->getSUnit()) == 0)
1839  continue;
1840  if (NodeOrder.count(IS->getSUnit()) == 0)
1841  Preds.insert(IS->getSUnit());
1842  }
1843  }
1844  return !Preds.empty();
1845 }
1846 
1847 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1848 /// as the successors of the elements of NodeOrder that are not also in
1849 /// NodeOrder.
1850 static bool succ_L(SetVector<SUnit *> &NodeOrder,
1852  const NodeSet *S = nullptr) {
1853  Succs.clear();
1854  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1855  I != E; ++I) {
1856  for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
1857  SI != SE; ++SI) {
1858  if (S && S->count(SI->getSUnit()) == 0)
1859  continue;
1860  if (ignoreDependence(*SI, false))
1861  continue;
1862  if (NodeOrder.count(SI->getSUnit()) == 0)
1863  Succs.insert(SI->getSUnit());
1864  }
1865  for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
1866  PE = (*I)->Preds.end();
1867  PI != PE; ++PI) {
1868  if (PI->getKind() != SDep::Anti)
1869  continue;
1870  if (S && S->count(PI->getSUnit()) == 0)
1871  continue;
1872  if (NodeOrder.count(PI->getSUnit()) == 0)
1873  Succs.insert(PI->getSUnit());
1874  }
1875  }
1876  return !Succs.empty();
1877 }
1878 
1879 /// Return true if there is a path from the specified node to any of the nodes
1880 /// in DestNodes. Keep track and return the nodes in any path.
1881 static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
1882  SetVector<SUnit *> &DestNodes,
1883  SetVector<SUnit *> &Exclude,
1884  SmallPtrSet<SUnit *, 8> &Visited) {
1885  if (Cur->isBoundaryNode())
1886  return false;
1887  if (Exclude.count(Cur) != 0)
1888  return false;
1889  if (DestNodes.count(Cur) != 0)
1890  return true;
1891  if (!Visited.insert(Cur).second)
1892  return Path.count(Cur) != 0;
1893  bool FoundPath = false;
1894  for (auto &SI : Cur->Succs)
1895  FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1896  for (auto &PI : Cur->Preds)
1897  if (PI.getKind() == SDep::Anti)
1898  FoundPath |=
1899  computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1900  if (FoundPath)
1901  Path.insert(Cur);
1902  return FoundPath;
1903 }
1904 
1905 /// Return true if Set1 is a subset of Set2.
1906 template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
1907  for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I)
1908  if (Set2.count(*I) == 0)
1909  return false;
1910  return true;
1911 }
1912 
1913 /// Compute the live-out registers for the instructions in a node-set.
1914 /// The live-out registers are those that are defined in the node-set,
1915 /// but not used. Except for use operands of Phis.
1917  NodeSet &NS) {
1919  MachineRegisterInfo &MRI = MF.getRegInfo();
1921  SmallSet<unsigned, 4> Uses;
1922  for (SUnit *SU : NS) {
1923  const MachineInstr *MI = SU->getInstr();
1924  if (MI->isPHI())
1925  continue;
1926  for (const MachineOperand &MO : MI->operands())
1927  if (MO.isReg() && MO.isUse()) {
1928  unsigned Reg = MO.getReg();
1930  Uses.insert(Reg);
1931  else if (MRI.isAllocatable(Reg))
1932  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1933  Uses.insert(*Units);
1934  }
1935  }
1936  for (SUnit *SU : NS)
1937  for (const MachineOperand &MO : SU->getInstr()->operands())
1938  if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1939  unsigned Reg = MO.getReg();
1941  if (!Uses.count(Reg))
1942  LiveOutRegs.push_back(RegisterMaskPair(Reg,
1944  } else if (MRI.isAllocatable(Reg)) {
1945  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1946  if (!Uses.count(*Units))
1947  LiveOutRegs.push_back(RegisterMaskPair(*Units,
1949  }
1950  }
1951  RPTracker.addLiveRegs(LiveOutRegs);
1952 }
1953 
1954 /// A heuristic to filter nodes in recurrent node-sets if the register
1955 /// pressure of a set is too high.
1956 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1957  for (auto &NS : NodeSets) {
1958  // Skip small node-sets since they won't cause register pressure problems.
1959  if (NS.size() <= 2)
1960  continue;
1961  IntervalPressure RecRegPressure;
1962  RegPressureTracker RecRPTracker(RecRegPressure);
1963  RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
1964  computeLiveOuts(MF, RecRPTracker, NS);
1965  RecRPTracker.closeBottom();
1966 
1967  std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1968  llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
1969  return A->NodeNum > B->NodeNum;
1970  });
1971 
1972  for (auto &SU : SUnits) {
1973  // Since we're computing the register pressure for a subset of the
1974  // instructions in a block, we need to set the tracker for each
1975  // instruction in the node-set. The tracker is set to the instruction
1976  // just after the one we're interested in.
1978  RecRPTracker.setPos(std::next(CurInstI));
1979 
1980  RegPressureDelta RPDelta;
1981  ArrayRef<PressureChange> CriticalPSets;
1982  RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
1983  CriticalPSets,
1984  RecRegPressure.MaxSetPressure);
1985  if (RPDelta.Excess.isValid()) {
1986  LLVM_DEBUG(
1987  dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
1988  << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
1989  << ":" << RPDelta.Excess.getUnitInc());
1990  NS.setExceedPressure(SU);
1991  break;
1992  }
1993  RecRPTracker.recede();
1994  }
1995  }
1996 }
1997 
1998 /// A heuristic to colocate node sets that have the same set of
1999 /// successors.
2000 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
2001  unsigned Colocate = 0;
2002  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
2003  NodeSet &N1 = NodeSets[i];
2005  if (N1.empty() || !succ_L(N1, S1))
2006  continue;
2007  for (int j = i + 1; j < e; ++j) {
2008  NodeSet &N2 = NodeSets[j];
2009  if (N1.compareRecMII(N2) != 0)
2010  continue;
2012  if (N2.empty() || !succ_L(N2, S2))
2013  continue;
2014  if (isSubset(S1, S2) && S1.size() == S2.size()) {
2015  N1.setColocate(++Colocate);
2016  N2.setColocate(Colocate);
2017  break;
2018  }
2019  }
2020  }
2021 }
2022 
2023 /// Check if the existing node-sets are profitable. If not, then ignore the
2024 /// recurrent node-sets, and attempt to schedule all nodes together. This is
2025 /// a heuristic. If the MII is large and all the recurrent node-sets are small,
2026 /// then it's best to try to schedule all instructions together instead of
2027 /// starting with the recurrent node-sets.
2028 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
2029  // Look for loops with a large MII.
2030  if (MII < 17)
2031  return;
2032  // Check if the node-set contains only a simple add recurrence.
2033  for (auto &NS : NodeSets) {
2034  if (NS.getRecMII() > 2)
2035  return;
2036  if (NS.getMaxDepth() > MII)
2037  return;
2038  }
2039  NodeSets.clear();
2040  LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
2041  return;
2042 }
2043 
2044 /// Add the nodes that do not belong to a recurrence set into groups
2045 /// based upon connected componenets.
2046 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
2047  SetVector<SUnit *> NodesAdded;
2048  SmallPtrSet<SUnit *, 8> Visited;
2049  // Add the nodes that are on a path between the previous node sets and
2050  // the current node set.
2051  for (NodeSet &I : NodeSets) {
2053  // Add the nodes from the current node set to the previous node set.
2054  if (succ_L(I, N)) {
2055  SetVector<SUnit *> Path;
2056  for (SUnit *NI : N) {
2057  Visited.clear();
2058  computePath(NI, Path, NodesAdded, I, Visited);
2059  }
2060  if (!Path.empty())
2061  I.insert(Path.begin(), Path.end());
2062  }
2063  // Add the nodes from the previous node set to the current node set.
2064  N.clear();
2065  if (succ_L(NodesAdded, N)) {
2066  SetVector<SUnit *> Path;
2067  for (SUnit *NI : N) {
2068  Visited.clear();
2069  computePath(NI, Path, I, NodesAdded, Visited);
2070  }
2071  if (!Path.empty())
2072  I.insert(Path.begin(), Path.end());
2073  }
2074  NodesAdded.insert(I.begin(), I.end());
2075  }
2076 
2077  // Create a new node set with the connected nodes of any successor of a node
2078  // in a recurrent set.
2079  NodeSet NewSet;
2081  if (succ_L(NodesAdded, N))
2082  for (SUnit *I : N)
2083  addConnectedNodes(I, NewSet, NodesAdded);
2084  if (!NewSet.empty())
2085  NodeSets.push_back(NewSet);
2086 
2087  // Create a new node set with the connected nodes of any predecessor of a node
2088  // in a recurrent set.
2089  NewSet.clear();
2090  if (pred_L(NodesAdded, N))
2091  for (SUnit *I : N)
2092  addConnectedNodes(I, NewSet, NodesAdded);
2093  if (!NewSet.empty())
2094  NodeSets.push_back(NewSet);
2095 
2096  // Create new nodes sets with the connected nodes any remaining node that
2097  // has no predecessor.
2098  for (unsigned i = 0; i < SUnits.size(); ++i) {
2099  SUnit *SU = &SUnits[i];
2100  if (NodesAdded.count(SU) == 0) {
2101  NewSet.clear();
2102  addConnectedNodes(SU, NewSet, NodesAdded);
2103  if (!NewSet.empty())
2104  NodeSets.push_back(NewSet);
2105  }
2106  }
2107 }
2108 
2109 /// Add the node to the set, and add all is its connected nodes to the set.
2110 void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
2111  SetVector<SUnit *> &NodesAdded) {
2112  NewSet.insert(SU);
2113  NodesAdded.insert(SU);
2114  for (auto &SI : SU->Succs) {
2115  SUnit *Successor = SI.getSUnit();
2116  if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
2117  addConnectedNodes(Successor, NewSet, NodesAdded);
2118  }
2119  for (auto &PI : SU->Preds) {
2120  SUnit *Predecessor = PI.getSUnit();
2121  if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
2122  addConnectedNodes(Predecessor, NewSet, NodesAdded);
2123  }
2124 }
2125 
2126 /// Return true if Set1 contains elements in Set2. The elements in common
2127 /// are returned in a different container.
2128 static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
2129  SmallSetVector<SUnit *, 8> &Result) {
2130  Result.clear();
2131  for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
2132  SUnit *SU = Set1[i];
2133  if (Set2.count(SU) != 0)
2134  Result.insert(SU);
2135  }
2136  return !Result.empty();
2137 }
2138 
2139 /// Merge the recurrence node sets that have the same initial node.
2140 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
2141  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
2142  ++I) {
2143  NodeSet &NI = *I;
2144  for (NodeSetType::iterator J = I + 1; J != E;) {
2145  NodeSet &NJ = *J;
2146  if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
2147  if (NJ.compareRecMII(NI) > 0)
2148  NI.setRecMII(NJ.getRecMII());
2149  for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
2150  ++NII)
2151  I->insert(*NII);
2152  NodeSets.erase(J);
2153  E = NodeSets.end();
2154  } else {
2155  ++J;
2156  }
2157  }
2158  }
2159 }
2160 
2161 /// Remove nodes that have been scheduled in previous NodeSets.
2162 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
2163  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
2164  ++I)
2165  for (NodeSetType::iterator J = I + 1; J != E;) {
2166  J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
2167 
2168  if (J->empty()) {
2169  NodeSets.erase(J);
2170  E = NodeSets.end();
2171  } else {
2172  ++J;
2173  }
2174  }
2175 }
2176 
2177 /// Compute an ordered list of the dependence graph nodes, which
2178 /// indicates the order that the nodes will be scheduled. This is a
2179 /// two-level algorithm. First, a partial order is created, which
2180 /// consists of a list of sets ordered from highest to lowest priority.
2181 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
2183  NodeOrder.clear();
2184 
2185  for (auto &Nodes : NodeSets) {
2186  LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
2187  OrderKind Order;
2189  if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
2190  R.insert(N.begin(), N.end());
2191  Order = BottomUp;
2192  LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
2193  } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
2194  R.insert(N.begin(), N.end());
2195  Order = TopDown;
2196  LLVM_DEBUG(dbgs() << " Top down (succs) ");
2197  } else if (isIntersect(N, Nodes, R)) {
2198  // If some of the successors are in the existing node-set, then use the
2199  // top-down ordering.
2200  Order = TopDown;
2201  LLVM_DEBUG(dbgs() << " Top down (intersect) ");
2202  } else if (NodeSets.size() == 1) {
2203  for (auto &N : Nodes)
2204  if (N->Succs.size() == 0)
2205  R.insert(N);
2206  Order = BottomUp;
2207  LLVM_DEBUG(dbgs() << " Bottom up (all) ");
2208  } else {
2209  // Find the node with the highest ASAP.
2210  SUnit *maxASAP = nullptr;
2211  for (SUnit *SU : Nodes) {
2212  if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||
2213  (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))
2214  maxASAP = SU;
2215  }
2216  R.insert(maxASAP);
2217  Order = BottomUp;
2218  LLVM_DEBUG(dbgs() << " Bottom up (default) ");
2219  }
2220 
2221  while (!R.empty()) {
2222  if (Order == TopDown) {
2223  // Choose the node with the maximum height. If more than one, choose
2224  // the node wiTH the maximum ZeroLatencyHeight. If still more than one,
2225  // choose the node with the lowest MOV.
2226  while (!R.empty()) {
2227  SUnit *maxHeight = nullptr;
2228  for (SUnit *I : R) {
2229  if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
2230  maxHeight = I;
2231  else if (getHeight(I) == getHeight(maxHeight) &&
2232  getZeroLatencyHeight(I) > getZeroLatencyHeight(maxHeight))
2233  maxHeight = I;
2234  else if (getHeight(I) == getHeight(maxHeight) &&
2235  getZeroLatencyHeight(I) ==
2236  getZeroLatencyHeight(maxHeight) &&
2237  getMOV(I) < getMOV(maxHeight))
2238  maxHeight = I;
2239  }
2240  NodeOrder.insert(maxHeight);
2241  LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
2242  R.remove(maxHeight);
2243  for (const auto &I : maxHeight->Succs) {
2244  if (Nodes.count(I.getSUnit()) == 0)
2245  continue;
2246  if (NodeOrder.count(I.getSUnit()) != 0)
2247  continue;
2248  if (ignoreDependence(I, false))
2249  continue;
2250  R.insert(I.getSUnit());
2251  }
2252  // Back-edges are predecessors with an anti-dependence.
2253  for (const auto &I : maxHeight->Preds) {
2254  if (I.getKind() != SDep::Anti)
2255  continue;
2256  if (Nodes.count(I.getSUnit()) == 0)
2257  continue;
2258  if (NodeOrder.count(I.getSUnit()) != 0)
2259  continue;
2260  R.insert(I.getSUnit());
2261  }
2262  }
2263  Order = BottomUp;
2264  LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
2266  if (pred_L(NodeOrder, N, &Nodes))
2267  R.insert(N.begin(), N.end());
2268  } else {
2269  // Choose the node with the maximum depth. If more than one, choose
2270  // the node with the maximum ZeroLatencyDepth. If still more than one,
2271  // choose the node with the lowest MOV.
2272  while (!R.empty()) {
2273  SUnit *maxDepth = nullptr;
2274  for (SUnit *I : R) {
2275  if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
2276  maxDepth = I;
2277  else if (getDepth(I) == getDepth(maxDepth) &&
2278  getZeroLatencyDepth(I) > getZeroLatencyDepth(maxDepth))
2279  maxDepth = I;
2280  else if (getDepth(I) == getDepth(maxDepth) &&
2281  getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&
2282  getMOV(I) < getMOV(maxDepth))
2283  maxDepth = I;
2284  }
2285  NodeOrder.insert(maxDepth);
2286  LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
2287  R.remove(maxDepth);
2288  if (Nodes.isExceedSU(maxDepth)) {
2289  Order = TopDown;
2290  R.clear();
2291  R.insert(Nodes.getNode(0));
2292  break;
2293  }
2294  for (const auto &I : maxDepth->Preds) {
2295  if (Nodes.count(I.getSUnit()) == 0)
2296  continue;
2297  if (NodeOrder.count(I.getSUnit()) != 0)
2298  continue;
2299  R.insert(I.getSUnit());
2300  }
2301  // Back-edges are predecessors with an anti-dependence.
2302  for (const auto &I : maxDepth->Succs) {
2303  if (I.getKind() != SDep::Anti)
2304  continue;
2305  if (Nodes.count(I.getSUnit()) == 0)
2306  continue;
2307  if (NodeOrder.count(I.getSUnit()) != 0)
2308  continue;
2309  R.insert(I.getSUnit());
2310  }
2311  }
2312  Order = TopDown;
2313  LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
2315  if (succ_L(NodeOrder, N, &Nodes))
2316  R.insert(N.begin(), N.end());
2317  }
2318  }
2319  LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
2320  }
2321 
2322  LLVM_DEBUG({
2323  dbgs() << "Node order: ";
2324  for (SUnit *I : NodeOrder)
2325  dbgs() << " " << I->NodeNum << " ";
2326  dbgs() << "\n";
2327  });
2328 }
2329 
2330 /// Process the nodes in the computed order and create the pipelined schedule
2331 /// of the instructions, if possible. Return true if a schedule is found.
2332 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2333  if (NodeOrder.empty())
2334  return false;
2335 
2336  bool scheduleFound = false;
2337  // Keep increasing II until a valid schedule is found.
2338  for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {
2339  Schedule.reset();
2340  Schedule.setInitiationInterval(II);
2341  LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
2342 
2343  SetVector<SUnit *>::iterator NI = NodeOrder.begin();
2344  SetVector<SUnit *>::iterator NE = NodeOrder.end();
2345  do {
2346  SUnit *SU = *NI;
2347 
2348  // Compute the schedule time for the instruction, which is based
2349  // upon the scheduled time for any predecessors/successors.
2350  int EarlyStart = INT_MIN;
2351  int LateStart = INT_MAX;
2352  // These values are set when the size of the schedule window is limited
2353  // due to chain dependences.
2354  int SchedEnd = INT_MAX;
2355  int SchedStart = INT_MIN;
2356  Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
2357  II, this);
2358  LLVM_DEBUG({
2359  dbgs() << "Inst (" << SU->NodeNum << ") ";
2360  SU->getInstr()->dump();
2361  dbgs() << "\n";
2362  });
2363  LLVM_DEBUG({
2364  dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart
2365  << " me: " << SchedEnd << " ms: " << SchedStart << "\n";
2366  });
2367 
2368  if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
2369  SchedStart > LateStart)
2370  scheduleFound = false;
2371  else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
2372  SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
2373  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2374  } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
2375  SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
2376  scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
2377  } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2378  SchedEnd =
2379  std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
2380  // When scheduling a Phi it is better to start at the late cycle and go
2381  // backwards. The default order may insert the Phi too far away from
2382  // its first dependence.
2383  if (SU->getInstr()->isPHI())
2384  scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
2385  else
2386  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2387  } else {
2388  int FirstCycle = Schedule.getFirstCycle();
2389  scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
2390  FirstCycle + getASAP(SU) + II - 1, II);
2391  }
2392  // Even if we find a schedule, make sure the schedule doesn't exceed the
2393  // allowable number of stages. We keep trying if this happens.
2394  if (scheduleFound)
2395  if (SwpMaxStages > -1 &&
2396  Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
2397  scheduleFound = false;
2398 
2399  LLVM_DEBUG({
2400  if (!scheduleFound)
2401  dbgs() << "\tCan't schedule\n";
2402  });
2403  } while (++NI != NE && scheduleFound);
2404 
2405  // If a schedule is found, check if it is a valid schedule too.
2406  if (scheduleFound)
2407  scheduleFound = Schedule.isValidSchedule(this);
2408  }
2409 
2410  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");
2411 
2412  if (scheduleFound)
2413  Schedule.finalizeSchedule(this);
2414  else
2415  Schedule.reset();
2416 
2417  return scheduleFound && Schedule.getMaxStageCount() > 0;
2418 }
2419 
2420 /// Given a schedule for the loop, generate a new version of the loop,
2421 /// and replace the old version. This function generates a prolog
2422 /// that contains the initial iterations in the pipeline, and kernel
2423 /// loop, and the epilogue that contains the code for the final
2424 /// iterations.
2425 void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
2426  // Create a new basic block for the kernel and add it to the CFG.
2427  MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2428 
2429  unsigned MaxStageCount = Schedule.getMaxStageCount();
2430 
2431  // Remember the registers that are used in different stages. The index is
2432  // the iteration, or stage, that the instruction is scheduled in. This is
2433  // a map between register names in the original block and the names created
2434  // in each stage of the pipelined loop.
2435  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
2436  InstrMapTy InstrMap;
2437 
2439  // Generate the prolog instructions that set up the pipeline.
2440  generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
2441  MF.insert(BB->getIterator(), KernelBB);
2442 
2443  // Rearrange the instructions to generate the new, pipelined loop,
2444  // and update register names as needed.
2445  for (int Cycle = Schedule.getFirstCycle(),
2446  LastCycle = Schedule.getFinalCycle();
2447  Cycle <= LastCycle; ++Cycle) {
2448  std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle);
2449  // This inner loop schedules each instruction in the cycle.
2450  for (SUnit *CI : CycleInstrs) {
2451  if (CI->getInstr()->isPHI())
2452  continue;
2453  unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr()));
2454  MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
2455  updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap);
2456  KernelBB->push_back(NewMI);
2457  InstrMap[NewMI] = CI->getInstr();
2458  }
2459  }
2460 
2461  // Copy any terminator instructions to the new kernel, and update
2462  // names as needed.
2463  for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
2464  E = BB->instr_end();
2465  I != E; ++I) {
2466  MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
2467  updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap);
2468  KernelBB->push_back(NewMI);
2469  InstrMap[NewMI] = &*I;
2470  }
2471 
2472  KernelBB->transferSuccessors(BB);
2473  KernelBB->replaceSuccessor(BB, KernelBB);
2474 
2475  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule,
2476  VRMap, InstrMap, MaxStageCount, MaxStageCount, false);
2477  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
2478  InstrMap, MaxStageCount, MaxStageCount, false);
2479 
2480  LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
2481 
2483  // Generate the epilog instructions to complete the pipeline.
2484  generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
2485  PrologBBs);
2486 
2487  // We need this step because the register allocation doesn't handle some
2488  // situations well, so we insert copies to help out.
2489  splitLifetimes(KernelBB, EpilogBBs, Schedule);
2490 
2491  // Remove dead instructions due to loop induction variables.
2492  removeDeadInstructions(KernelBB, EpilogBBs);
2493 
2494  // Add branches between prolog and epilog blocks.
2495  addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
2496 
2497  // Remove the original loop since it's no longer referenced.
2498  for (auto &I : *BB)
2500  BB->clear();
2501  BB->eraseFromParent();
2502 
2503  delete[] VRMap;
2504 }
2505 
2506 /// Generate the pipeline prolog code.
2507 void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
2508  MachineBasicBlock *KernelBB,
2509  ValueMapTy *VRMap,
2510  MBBVectorTy &PrologBBs) {
2511  MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
2512  assert(PreheaderBB != nullptr &&
2513  "Need to add code to handle loops w/o preheader");
2514  MachineBasicBlock *PredBB = PreheaderBB;
2515  InstrMapTy InstrMap;
2516 
2517  // Generate a basic block for each stage, not including the last stage,
2518  // which will be generated in the kernel. Each basic block may contain
2519  // instructions from multiple stages/iterations.
2520  for (unsigned i = 0; i < LastStage; ++i) {
2521  // Create and insert the prolog basic block prior to the original loop
2522  // basic block. The original loop is removed later.
2523  MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2524  PrologBBs.push_back(NewBB);
2525  MF.insert(BB->getIterator(), NewBB);
2526  NewBB->transferSuccessors(PredBB);
2527  PredBB->addSuccessor(NewBB);
2528  PredBB = NewBB;
2529 
2530  // Generate instructions for each appropriate stage. Process instructions
2531  // in original program order.
2532  for (int StageNum = i; StageNum >= 0; --StageNum) {
2533  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2534  BBE = BB->getFirstTerminator();
2535  BBI != BBE; ++BBI) {
2536  if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) {
2537  if (BBI->isPHI())
2538  continue;
2539  MachineInstr *NewMI =
2540  cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule);
2541  updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule,
2542  VRMap);
2543  NewBB->push_back(NewMI);
2544  InstrMap[NewMI] = &*BBI;
2545  }
2546  }
2547  }
2548  rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
2549  LLVM_DEBUG({
2550  dbgs() << "prolog:\n";
2551  NewBB->dump();
2552  });
2553  }
2554 
2555  PredBB->replaceSuccessor(BB, KernelBB);
2556 
2557  // Check if we need to remove the branch from the preheader to the original
2558  // loop, and replace it with a branch to the new loop.
2559  unsigned numBranches = TII->removeBranch(*PreheaderBB);
2560  if (numBranches) {
2562  TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc());
2563  }
2564 }
2565 
2566 /// Generate the pipeline epilog code. The epilog code finishes the iterations
2567 /// that were started in either the prolog or the kernel. We create a basic
2568 /// block for each stage that needs to complete.
2569 void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
2570  MachineBasicBlock *KernelBB,
2571  ValueMapTy *VRMap,
2572  MBBVectorTy &EpilogBBs,
2573  MBBVectorTy &PrologBBs) {
2574  // We need to change the branch from the kernel to the first epilog block, so
2575  // this call to analyze branch uses the kernel rather than the original BB.
2576  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2578  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
2579  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
2580  if (checkBranch)
2581  return;
2582 
2583  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
2584  if (*LoopExitI == KernelBB)
2585  ++LoopExitI;
2586  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
2587  MachineBasicBlock *LoopExitBB = *LoopExitI;
2588 
2589  MachineBasicBlock *PredBB = KernelBB;
2590  MachineBasicBlock *EpilogStart = LoopExitBB;
2591  InstrMapTy InstrMap;
2592 
2593  // Generate a basic block for each stage, not including the last stage,
2594  // which was generated for the kernel. Each basic block may contain
2595  // instructions from multiple stages/iterations.
2596  int EpilogStage = LastStage + 1;
2597  for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
2599  EpilogBBs.push_back(NewBB);
2600  MF.insert(BB->getIterator(), NewBB);
2601 
2602  PredBB->replaceSuccessor(LoopExitBB, NewBB);
2603  NewBB->addSuccessor(LoopExitBB);
2604 
2605  if (EpilogStart == LoopExitBB)
2606  EpilogStart = NewBB;
2607 
2608  // Add instructions to the epilog depending on the current block.
2609  // Process instructions in original program order.
2610  for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
2611  for (auto &BBI : *BB) {
2612  if (BBI.isPHI())
2613  continue;
2614  MachineInstr *In = &BBI;
2615  if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
2616  // Instructions with memoperands in the epilog are updated with
2617  // conservative values.
2618  MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
2619  updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
2620  NewBB->push_back(NewMI);
2621  InstrMap[NewMI] = In;
2622  }
2623  }
2624  }
2625  generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
2626  VRMap, InstrMap, LastStage, EpilogStage, i == 1);
2627  generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
2628  InstrMap, LastStage, EpilogStage, i == 1);
2629  PredBB = NewBB;
2630 
2631  LLVM_DEBUG({
2632  dbgs() << "epilog:\n";
2633  NewBB->dump();
2634  });
2635  }
2636 
2637  // Fix any Phi nodes in the loop exit block.
2638  for (MachineInstr &MI : *LoopExitBB) {
2639  if (!MI.isPHI())
2640  break;
2641  for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
2642  MachineOperand &MO = MI.getOperand(i);
2643  if (MO.getMBB() == BB)
2644  MO.setMBB(PredBB);
2645  }
2646  }
2647 
2648  // Create a branch to the new epilog from the kernel.
2649  // Remove the original branch and add a new branch to the epilog.
2650  TII->removeBranch(*KernelBB);
2651  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
2652  // Add a branch to the loop exit.
2653  if (EpilogBBs.size() > 0) {
2654  MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
2656  TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
2657  }
2658 }
2659 
2660 /// Replace all uses of FromReg that appear outside the specified
2661 /// basic block with ToReg.
2662 static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
2663  MachineBasicBlock *MBB,
2664  MachineRegisterInfo &MRI,
2665  LiveIntervals &LIS) {
2666  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
2667  E = MRI.use_end();
2668  I != E;) {
2669  MachineOperand &O = *I;
2670  ++I;
2671  if (O.getParent()->getParent() != MBB)
2672  O.setReg(ToReg);
2673  }
2674  if (!LIS.hasInterval(ToReg))
2675  LIS.createEmptyInterval(ToReg);
2676 }
2677 
2678 /// Return true if the register has a use that occurs outside the
2679 /// specified loop.
2680 static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
2681  MachineRegisterInfo &MRI) {
2683  E = MRI.use_end();
2684  I != E; ++I)
2685  if (I->getParent()->getParent() != BB)
2686  return true;
2687  return false;
2688 }
2689 
2690 /// Generate Phis for the specific block in the generated pipelined code.
2691 /// This function looks at the Phis from the original code to guide the
2692 /// creation of new Phis.
2693 void SwingSchedulerDAG::generateExistingPhis(
2695  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2696  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2697  bool IsLast) {
2698  // Compute the stage number for the initial value of the Phi, which
2699  // comes from the prolog. The prolog to use depends on to which kernel/
2700  // epilog that we're adding the Phi.
2701  unsigned PrologStage = 0;
2702  unsigned PrevStage = 0;
2703  bool InKernel = (LastStageNum == CurStageNum);
2704  if (InKernel) {
2705  PrologStage = LastStageNum - 1;
2706  PrevStage = CurStageNum;
2707  } else {
2708  PrologStage = LastStageNum - (CurStageNum - LastStageNum);
2709  PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
2710  }
2711 
2712  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2713  BBE = BB->getFirstNonPHI();
2714  BBI != BBE; ++BBI) {
2715  unsigned Def = BBI->getOperand(0).getReg();
2716 
2717  unsigned InitVal = 0;
2718  unsigned LoopVal = 0;
2719  getPhiRegs(*BBI, BB, InitVal, LoopVal);
2720 
2721  unsigned PhiOp1 = 0;
2722  // The Phi value from the loop body typically is defined in the loop, but
2723  // not always. So, we need to check if the value is defined in the loop.
2724  unsigned PhiOp2 = LoopVal;
2725  if (VRMap[LastStageNum].count(LoopVal))
2726  PhiOp2 = VRMap[LastStageNum][LoopVal];
2727 
2728  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2729  int LoopValStage =
2730  Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
2731  unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum);
2732  if (NumStages == 0) {
2733  // We don't need to generate a Phi anymore, but we need to rename any uses
2734  // of the Phi value.
2735  unsigned NewReg = VRMap[PrevStage][LoopVal];
2736  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
2737  Def, InitVal, NewReg);
2738  if (VRMap[CurStageNum].count(LoopVal))
2739  VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
2740  }
2741  // Adjust the number of Phis needed depending on the number of prologs left,
2742  // and the distance from where the Phi is first scheduled. The number of
2743  // Phis cannot exceed the number of prolog stages. Each stage can
2744  // potentially define two values.
2745  unsigned MaxPhis = PrologStage + 2;
2746  if (!InKernel && (int)PrologStage <= LoopValStage)
2747  MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
2748  unsigned NumPhis = std::min(NumStages, MaxPhis);
2749 
2750  unsigned NewReg = 0;
2751  unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
2752  // In the epilog, we may need to look back one stage to get the correct
2753  // Phi name because the epilog and prolog blocks execute the same stage.
2754  // The correct name is from the previous block only when the Phi has
2755  // been completely scheduled prior to the epilog, and Phi value is not
2756  // needed in multiple stages.
2757  int StageDiff = 0;
2758  if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
2759  NumPhis == 1)
2760  StageDiff = 1;
2761  // Adjust the computations below when the phi and the loop definition
2762  // are scheduled in different stages.
2763  if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
2764  StageDiff = StageScheduled - LoopValStage;
2765  for (unsigned np = 0; np < NumPhis; ++np) {
2766  // If the Phi hasn't been scheduled, then use the initial Phi operand
2767  // value. Otherwise, use the scheduled version of the instruction. This
2768  // is a little complicated when a Phi references another Phi.
2769  if (np > PrologStage || StageScheduled >= (int)LastStageNum)
2770  PhiOp1 = InitVal;
2771  // Check if the Phi has already been scheduled in a prolog stage.
2772  else if (PrologStage >= AccessStage + StageDiff + np &&
2773  VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
2774  PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
2775  // Check if the Phi has already been scheduled, but the loop intruction
2776  // is either another Phi, or doesn't occur in the loop.
2777  else if (PrologStage >= AccessStage + StageDiff + np) {
2778  // If the Phi references another Phi, we need to examine the other
2779  // Phi to get the correct value.
2780  PhiOp1 = LoopVal;
2781  MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
2782  int Indirects = 1;
2783  while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
2784  int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1));
2785  if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
2786  PhiOp1 = getInitPhiReg(*InstOp1, BB);
2787  else
2788  PhiOp1 = getLoopPhiReg(*InstOp1, BB);
2789  InstOp1 = MRI.getVRegDef(PhiOp1);
2790  int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1));
2791  int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
2792  if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
2793  VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
2794  PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
2795  break;
2796  }
2797  ++Indirects;
2798  }
2799  } else
2800  PhiOp1 = InitVal;
2801  // If this references a generated Phi in the kernel, get the Phi operand
2802  // from the incoming block.
2803  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
2804  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2805  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2806 
2807  MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
2808  bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
2809  // In the epilog, a map lookup is needed to get the value from the kernel,
2810  // or previous epilog block. How is does this depends on if the
2811  // instruction is scheduled in the previous block.
2812  if (!InKernel) {
2813  int StageDiffAdj = 0;
2814  if (LoopValStage != -1 && StageScheduled > LoopValStage)
2815  StageDiffAdj = StageScheduled - LoopValStage;
2816  // Use the loop value defined in the kernel, unless the kernel
2817  // contains the last definition of the Phi.
2818  if (np == 0 && PrevStage == LastStageNum &&
2819  (StageScheduled != 0 || LoopValStage != 0) &&
2820  VRMap[PrevStage - StageDiffAdj].count(LoopVal))
2821  PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
2822  // Use the value defined by the Phi. We add one because we switch
2823  // from looking at the loop value to the Phi definition.
2824  else if (np > 0 && PrevStage == LastStageNum &&
2825  VRMap[PrevStage - np + 1].count(Def))
2826  PhiOp2 = VRMap[PrevStage - np + 1][Def];
2827  // Use the loop value defined in the kernel.
2828  else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
2829  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
2830  PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
2831  // Use the value defined by the Phi, unless we're generating the first
2832  // epilog and the Phi refers to a Phi in a different stage.
2833  else if (VRMap[PrevStage - np].count(Def) &&
2834  (!LoopDefIsPhi || PrevStage != LastStageNum))
2835  PhiOp2 = VRMap[PrevStage - np][Def];
2836  }
2837 
2838  // Check if we can reuse an existing Phi. This occurs when a Phi
2839  // references another Phi, and the other Phi is scheduled in an
2840  // earlier stage. We can try to reuse an existing Phi up until the last
2841  // stage of the current Phi.
2842  if (LoopDefIsPhi) {
2843  if (static_cast<int>(PrologStage - np) >= StageScheduled) {
2844  int LVNumStages = Schedule.getStagesForPhi(LoopVal);
2845  int StageDiff = (StageScheduled - LoopValStage);
2846  LVNumStages -= StageDiff;
2847  // Make sure the loop value Phi has been processed already.
2848  if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
2849  NewReg = PhiOp2;
2850  unsigned ReuseStage = CurStageNum;
2851  if (Schedule.isLoopCarried(this, *PhiInst))
2852  ReuseStage -= LVNumStages;
2853  // Check if the Phi to reuse has been generated yet. If not, then
2854  // there is nothing to reuse.
2855  if (VRMap[ReuseStage - np].count(LoopVal)) {
2856  NewReg = VRMap[ReuseStage - np][LoopVal];
2857 
2858  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2859  &*BBI, Def, NewReg);
2860  // Update the map with the new Phi name.
2861  VRMap[CurStageNum - np][Def] = NewReg;
2862  PhiOp2 = NewReg;
2863  if (VRMap[LastStageNum - np - 1].count(LoopVal))
2864  PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
2865 
2866  if (IsLast && np == NumPhis - 1)
2867  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2868  continue;
2869  }
2870  }
2871  }
2872  if (InKernel && StageDiff > 0 &&
2873  VRMap[CurStageNum - StageDiff - np].count(LoopVal))
2874  PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
2875  }
2876 
2877  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2878  NewReg = MRI.createVirtualRegister(RC);
2879 
2880  MachineInstrBuilder NewPhi =
2881  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2882  TII->get(TargetOpcode::PHI), NewReg);
2883  NewPhi.addReg(PhiOp1).addMBB(BB1);
2884  NewPhi.addReg(PhiOp2).addMBB(BB2);
2885  if (np == 0)
2886  InstrMap[NewPhi] = &*BBI;
2887 
2888  // We define the Phis after creating the new pipelined code, so
2889  // we need to rename the Phi values in scheduled instructions.
2890 
2891  unsigned PrevReg = 0;
2892  if (InKernel && VRMap[PrevStage - np].count(LoopVal))
2893  PrevReg = VRMap[PrevStage - np][LoopVal];
2894  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2895  Def, NewReg, PrevReg);
2896  // If the Phi has been scheduled, use the new name for rewriting.
2897  if (VRMap[CurStageNum - np].count(Def)) {
2898  unsigned R = VRMap[CurStageNum - np][Def];
2899  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2900  R, NewReg);
2901  }
2902 
2903  // Check if we need to rename any uses that occurs after the loop. The
2904  // register to replace depends on whether the Phi is scheduled in the
2905  // epilog.
2906  if (IsLast && np == NumPhis - 1)
2907  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2908 
2909  // In the kernel, a dependent Phi uses the value from this Phi.
2910  if (InKernel)
2911  PhiOp2 = NewReg;
2912 
2913  // Update the map with the new Phi name.
2914  VRMap[CurStageNum - np][Def] = NewReg;
2915  }
2916 
2917  while (NumPhis++ < NumStages) {
2918  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
2919  &*BBI, Def, NewReg, 0);
2920  }
2921 
2922  // Check if we need to rename a Phi that has been eliminated due to
2923  // scheduling.
2924  if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
2925  replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
2926  }
2927 }
2928 
2929 /// Generate Phis for the specified block in the generated pipelined code.
2930 /// These are new Phis needed because the definition is scheduled after the
2931 /// use in the pipelined sequence.
2932 void SwingSchedulerDAG::generatePhis(
2934  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2935  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2936  bool IsLast) {
2937  // Compute the stage number that contains the initial Phi value, and
2938  // the Phi from the previous stage.
2939  unsigned PrologStage = 0;
2940  unsigned PrevStage = 0;
2941  unsigned StageDiff = CurStageNum - LastStageNum;
2942  bool InKernel = (StageDiff == 0);
2943  if (InKernel) {
2944  PrologStage = LastStageNum - 1;
2945  PrevStage = CurStageNum;
2946  } else {
2947  PrologStage = LastStageNum - StageDiff;
2948  PrevStage = LastStageNum + StageDiff - 1;
2949  }
2950 
2951  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
2952  BBE = BB->instr_end();
2953  BBI != BBE; ++BBI) {
2954  for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
2955  MachineOperand &MO = BBI->getOperand(i);
2956  if (!MO.isReg() || !MO.isDef() ||
2958  continue;
2959 
2960  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2961  assert(StageScheduled != -1 && "Expecting scheduled instruction.");
2962  unsigned Def = MO.getReg();
2963  unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum);
2964  // An instruction scheduled in stage 0 and is used after the loop
2965  // requires a phi in the epilog for the last definition from either
2966  // the kernel or prolog.
2967  if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
2968  hasUseAfterLoop(Def, BB, MRI))
2969  NumPhis = 1;
2970  if (!InKernel && (unsigned)StageScheduled > PrologStage)
2971  continue;
2972 
2973  unsigned PhiOp2 = VRMap[PrevStage][Def];
2974  if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
2975  if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
2976  PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
2977  // The number of Phis can't exceed the number of prolog stages. The
2978  // prolog stage number is zero based.
2979  if (NumPhis > PrologStage + 1 - StageScheduled)
2980  NumPhis = PrologStage + 1 - StageScheduled;
2981  for (unsigned np = 0; np < NumPhis; ++np) {
2982  unsigned PhiOp1 = VRMap[PrologStage][Def];
2983  if (np <= PrologStage)
2984  PhiOp1 = VRMap[PrologStage - np][Def];
2985  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
2986  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2987  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2988  if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
2989  PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
2990  }
2991  if (!InKernel)
2992  PhiOp2 = VRMap[PrevStage - np][Def];
2993 
2994  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2995  unsigned NewReg = MRI.createVirtualRegister(RC);
2996 
2997  MachineInstrBuilder NewPhi =
2998  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2999  TII->get(TargetOpcode::PHI), NewReg);
3000  NewPhi.addReg(PhiOp1).addMBB(BB1);
3001  NewPhi.addReg(PhiOp2).addMBB(BB2);
3002  if (np == 0)
3003  InstrMap[NewPhi] = &*BBI;
3004 
3005  // Rewrite uses and update the map. The actions depend upon whether
3006  // we generating code for the kernel or epilog blocks.
3007  if (InKernel) {
3008  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
3009  &*BBI, PhiOp1, NewReg);
3010  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
3011  &*BBI, PhiOp2, NewReg);
3012 
3013  PhiOp2 = NewReg;
3014  VRMap[PrevStage - np - 1][Def] = NewReg;
3015  } else {
3016  VRMap[CurStageNum - np][Def] = NewReg;
3017  if (np == NumPhis - 1)
3018  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
3019  &*BBI, Def, NewReg);
3020  }
3021  if (IsLast && np == NumPhis - 1)
3022  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
3023  }
3024  }
3025  }
3026 }
3027 
3028 /// Remove instructions that generate values with no uses.
3029 /// Typically, these are induction variable operations that generate values
3030 /// used in the loop itself. A dead instruction has a definition with
3031 /// no uses, or uses that occur in the original loop only.
3032 void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
3033  MBBVectorTy &EpilogBBs) {
3034  // For each epilog block, check that the value defined by each instruction
3035  // is used. If not, delete it.
3036  for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
3037  MBE = EpilogBBs.rend();
3038  MBB != MBE; ++MBB)
3039  for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
3040  ME = (*MBB)->instr_rend();
3041  MI != ME;) {
3042  // From DeadMachineInstructionElem. Don't delete inline assembly.
3043  if (MI->isInlineAsm()) {
3044  ++MI;
3045  continue;
3046  }
3047  bool SawStore = false;
3048  // Check if it's safe to remove the instruction due to side effects.
3049  // We can, and want to, remove Phis here.
3050  if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
3051  ++MI;
3052  continue;
3053  }
3054  bool used = true;
3055  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
3056  MOE = MI->operands_end();
3057  MOI != MOE; ++MOI) {
3058  if (!MOI->isReg() || !MOI->isDef())
3059  continue;
3060  unsigned reg = MOI->getReg();
3061  // Assume physical registers are used, unless they are marked dead.
3063  used = !MOI->isDead();
3064  if (used)
3065  break;
3066  continue;
3067  }
3068  unsigned realUses = 0;
3069  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
3070  EI = MRI.use_end();
3071  UI != EI; ++UI) {
3072  // Check if there are any uses that occur only in the original
3073  // loop. If so, that's not a real use.
3074  if (UI->getParent()->getParent() != BB) {
3075  realUses++;
3076  used = true;
3077  break;
3078  }
3079  }
3080  if (realUses > 0)
3081  break;
3082  used = false;
3083  }
3084  if (!used) {
3086  MI++->eraseFromParent();
3087  continue;
3088  }
3089  ++MI;
3090  }
3091  // In the kernel block, check if we can remove a Phi that generates a value
3092  // used in an instruction removed in the epilog block.
3093  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
3094  BBE = KernelBB->getFirstNonPHI();
3095  BBI != BBE;) {
3096  MachineInstr *MI = &*BBI;
3097  ++BBI;
3098  unsigned reg = MI->getOperand(0).getReg();
3099  if (MRI.use_begin(reg) == MRI.use_end()) {
3100  LIS.RemoveMachineInstrFromMaps(*MI);
3101  MI->eraseFromParent();
3102  }
3103  }
3104 }
3105 
3106 /// For loop carried definitions, we split the lifetime of a virtual register
3107 /// that has uses past the definition in the next iteration. A copy with a new
3108 /// virtual register is inserted before the definition, which helps with
3109 /// generating a better register assignment.
3110 ///
3111 /// v1 = phi(a, v2) v1 = phi(a, v2)
3112 /// v2 = phi(b, v3) v2 = phi(b, v3)
3113 /// v3 = .. v4 = copy v1
3114 /// .. = V1 v3 = ..
3115 /// .. = v4
3116 void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
3117  MBBVectorTy &EpilogBBs,
3118  SMSchedule &Schedule) {
3120  for (auto &PHI : KernelBB->phis()) {
3121  unsigned Def = PHI.getOperand(0).getReg();
3122  // Check for any Phi definition that used as an operand of another Phi
3123  // in the same block.
3125  E = MRI.use_instr_end();
3126  I != E; ++I) {
3127  if (I->isPHI() && I->getParent() == KernelBB) {
3128  // Get the loop carried definition.
3129  unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
3130  if (!LCDef)
3131  continue;
3132  MachineInstr *MI = MRI.getVRegDef(LCDef);
3133  if (!MI || MI->getParent() != KernelBB || MI->isPHI())
3134  continue;
3135  // Search through the rest of the block looking for uses of the Phi
3136  // definition. If one occurs, then split the lifetime.
3137  unsigned SplitReg = 0;
3138  for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
3139  KernelBB->instr_end()))
3140  if (BBJ.readsRegister(Def)) {
3141  // We split the lifetime when we find the first use.
3142  if (SplitReg == 0) {
3143  SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
3144  BuildMI(*KernelBB, MI, MI->getDebugLoc(),
3145  TII->get(TargetOpcode::COPY), SplitReg)
3146  .addReg(Def);
3147  }
3148  BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
3149  }
3150  if (!SplitReg)
3151  continue;
3152  // Search through each of the epilog blocks for any uses to be renamed.
3153  for (auto &Epilog : EpilogBBs)
3154  for (auto &I : *Epilog)
3155  if (I.readsRegister(Def))
3156  I.substituteRegister(Def, SplitReg, 0, *TRI);
3157  break;
3158  }
3159  }
3160  }
3161 }
3162 
3163 /// Remove the incoming block from the Phis in a basic block.
3164 static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
3165  for (MachineInstr &MI : *BB) {
3166  if (!MI.isPHI())
3167  break;
3168  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
3169  if (MI.getOperand(i + 1).getMBB() == Incoming) {
3170  MI.RemoveOperand(i + 1);
3171  MI.RemoveOperand(i);
3172  break;
3173  }
3174  }
3175 }
3176 
3177 /// Create branches from each prolog basic block to the appropriate epilog
3178 /// block. These edges are needed if the loop ends before reaching the
3179 /// kernel.
3180 void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
3181  MachineBasicBlock *KernelBB,
3182  MBBVectorTy &EpilogBBs,
3183  SMSchedule &Schedule, ValueMapTy *VRMap) {
3184  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
3185  MachineInstr *IndVar = Pass.LI.LoopInductionVar;
3186  MachineInstr *Cmp = Pass.LI.LoopCompare;
3187  MachineBasicBlock *LastPro = KernelBB;
3188  MachineBasicBlock *LastEpi = KernelBB;
3189 
3190  // Start from the blocks connected to the kernel and work "out"
3191  // to the first prolog and the last epilog blocks.
3193  unsigned MaxIter = PrologBBs.size() - 1;
3194  unsigned LC = UINT_MAX;
3195  unsigned LCMin = UINT_MAX;
3196  for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
3197  // Add branches to the prolog that go to the corresponding
3198  // epilog, and the fall-thru prolog/kernel block.
3199  MachineBasicBlock *Prolog = PrologBBs[j];
3200  MachineBasicBlock *Epilog = EpilogBBs[i];
3201  // We've executed one iteration, so decrement the loop count and check for
3202  // the loop end.
3204  // Check if the LOOP0 has already been removed. If so, then there is no need
3205  // to reduce the trip count.
3206  if (LC != 0)
3207  LC = TII->reduceLoopCount(*Prolog, IndVar, *Cmp, Cond, PrevInsts, j,
3208  MaxIter);
3209 
3210  // Record the value of the first trip count, which is used to determine if
3211  // branches and blocks can be removed for constant trip counts.
3212  if (LCMin == UINT_MAX)
3213  LCMin = LC;
3214 
3215  unsigned numAdded = 0;
3217  Prolog->addSuccessor(Epilog);
3218  numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
3219  } else if (j >= LCMin) {
3220  Prolog->addSuccessor(Epilog);
3221  Prolog->removeSuccessor(LastPro);
3222  LastEpi->removeSuccessor(Epilog);
3223  numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
3224  removePhis(Epilog, LastEpi);
3225  // Remove the blocks that are no longer referenced.
3226  if (LastPro != LastEpi) {
3227  LastEpi->clear();
3228  LastEpi->eraseFromParent();
3229  }
3230  LastPro->clear();
3231  LastPro->eraseFromParent();
3232  } else {
3233  numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
3234  removePhis(Epilog, Prolog);
3235  }
3236  LastPro = Prolog;
3237  LastEpi = Epilog;
3239  E = Prolog->instr_rend();
3240  I != E && numAdded > 0; ++I, --numAdded)
3241  updateInstruction(&*I, false, j, 0, Schedule, VRMap);
3242  }
3243 }
3244 
3245 /// Return true if we can compute the amount the instruction changes
3246 /// during each iteration. Set Delta to the amount of the change.
3247 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
3249  unsigned BaseReg;
3250  int64_t Offset;
3251  if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
3252  return false;
3253 
3254  MachineRegisterInfo &MRI = MF.getRegInfo();
3255  // Check if there is a Phi. If so, get the definition in the loop.
3256  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
3257  if (BaseDef && BaseDef->isPHI()) {
3258  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
3259  BaseDef = MRI.getVRegDef(BaseReg);
3260  }
3261  if (!BaseDef)
3262  return false;
3263 
3264  int D = 0;
3265  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
3266  return false;
3267 
3268  Delta = D;
3269  return true;
3270 }
3271 
3272 /// Update the memory operand with a new offset when the pipeliner
3273 /// generates a new copy of the instruction that refers to a
3274 /// different memory location.
3275 void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
3276  MachineInstr &OldMI, unsigned Num) {
3277  if (Num == 0)
3278  return;
3279  // If the instruction has memory operands, then adjust the offset
3280  // when the instruction appears in different stages.
3281  if (NewMI.memoperands_empty())
3282  return;
3284  for (MachineMemOperand *MMO : NewMI.memoperands()) {
3285  if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
3286  (!MMO->getValue())) {
3287  NewMMOs.push_back(MMO);
3288  continue;
3289  }
3290  unsigned Delta;
3291  if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
3292  int64_t AdjOffset = Delta * Num;
3293  NewMMOs.push_back(
3294  MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
3295  } else {
3296  NewMMOs.push_back(
3298  }
3299  }
3300  NewMI.setMemRefs(MF, NewMMOs);
3301 }
3302 
3303 /// Clone the instruction for the new pipelined loop and update the
3304 /// memory operands, if needed.
3305 MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI,
3306  unsigned CurStageNum,
3307  unsigned InstStageNum) {
3308  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
3309  // Check for tied operands in inline asm instructions. This should be handled
3310  // elsewhere, but I'm not sure of the best solution.
3311  if (OldMI->isInlineAsm())
3312  for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
3313  const auto &MO = OldMI->getOperand(i);
3314  if (MO.isReg() && MO.isUse())
3315  break;
3316  unsigned UseIdx;
3317  if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
3318  NewMI->tieOperands(i, UseIdx);
3319  }
3320  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
3321  return NewMI;
3322 }
3323 
3324 /// Clone the instruction for the new pipelined loop. If needed, this
3325 /// function updates the instruction using the values saved in the
3326 /// InstrChanges structure.
3327 MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI,
3328  unsigned CurStageNum,
3329  unsigned InstStageNum,
3330  SMSchedule &Schedule) {
3331  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
3333  InstrChanges.find(getSUnit(OldMI));
3334  if (It != InstrChanges.end()) {
3335  std::pair<unsigned, int64_t> RegAndOffset = It->second;
3336  unsigned BasePos, OffsetPos;
3337  if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
3338  return nullptr;
3339  int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
3340  MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
3341  if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
3342  NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
3343  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3344  }
3345  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
3346  return NewMI;
3347 }
3348 
3349 /// Update the machine instruction with new virtual registers. This
3350 /// function may change the defintions and/or uses.
3351 void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef,
3352  unsigned CurStageNum,
3353  unsigned InstrStageNum,
3354  SMSchedule &Schedule,
3355  ValueMapTy *VRMap) {
3356  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
3357  MachineOperand &MO = NewMI->getOperand(i);
3359  continue;
3360  unsigned reg = MO.getReg();
3361  if (MO.isDef()) {
3362  // Create a new virtual register for the definition.
3363  const TargetRegisterClass *RC = MRI.getRegClass(reg);
3364  unsigned NewReg = MRI.createVirtualRegister(RC);
3365  MO.setReg(NewReg);
3366  VRMap[CurStageNum][reg] = NewReg;
3367  if (LastDef)
3368  replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
3369  } else if (MO.isUse()) {
3370  MachineInstr *Def = MRI.getVRegDef(reg);
3371  // Compute the stage that contains the last definition for instruction.
3372  int DefStageNum = Schedule.stageScheduled(getSUnit(Def));
3373  unsigned StageNum = CurStageNum;
3374  if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
3375  // Compute the difference in stages between the defintion and the use.
3376  unsigned StageDiff = (InstrStageNum - DefStageNum);
3377  // Make an adjustment to get the last definition.
3378  StageNum -= StageDiff;
3379  }
3380  if (VRMap[StageNum].count(reg))
3381  MO.setReg(VRMap[StageNum][reg]);
3382  }
3383  }
3384 }
3385 
3386 /// Return the instruction in the loop that defines the register.
3387 /// If the definition is a Phi, then follow the Phi operand to
3388 /// the instruction in the loop.
3389 MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
3391  MachineInstr *Def = MRI.getVRegDef(Reg);
3392  while (Def->isPHI()) {
3393  if (!Visited.insert(Def).second)
3394  break;
3395  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
3396  if (Def->getOperand(i + 1).getMBB() == BB) {
3397  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
3398  break;
3399  }
3400  }
3401  return Def;
3402 }
3403 
3404 /// Return the new name for the value from the previous stage.
3405 unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
3406  unsigned LoopVal, unsigned LoopStage,
3407  ValueMapTy *VRMap,
3408  MachineBasicBlock *BB) {
3409  unsigned PrevVal = 0;
3410  if (StageNum > PhiStage) {
3411  MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
3412  if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
3413  // The name is defined in the previous stage.
3414  PrevVal = VRMap[StageNum - 1][LoopVal];
3415  else if (VRMap[StageNum].count(LoopVal))
3416  // The previous name is defined in the current stage when the instruction
3417  // order is swapped.
3418  PrevVal = VRMap[StageNum][LoopVal];
3419  else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
3420  // The loop value hasn't yet been scheduled.
3421  PrevVal = LoopVal;
3422  else if (StageNum == PhiStage + 1)
3423  // The loop value is another phi, which has not been scheduled.
3424  PrevVal = getInitPhiReg(*LoopInst, BB);
3425  else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
3426  // The loop value is another phi, which has been scheduled.
3427  PrevVal =
3428  getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
3429  LoopStage, VRMap, BB);
3430  }
3431  return PrevVal;
3432 }
3433 
3434 /// Rewrite the Phi values in the specified block to use the mappings
3435 /// from the initial operand. Once the Phi is scheduled, we switch
3436 /// to using the loop value instead of the Phi value, so those names
3437 /// do not need to be rewritten.
3438 void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
3439  unsigned StageNum,
3440  SMSchedule &Schedule,
3441  ValueMapTy *VRMap,
3442  InstrMapTy &InstrMap) {
3443  for (auto &PHI : BB->phis()) {
3444  unsigned InitVal = 0;
3445  unsigned LoopVal = 0;
3446  getPhiRegs(PHI, BB, InitVal, LoopVal);
3447  unsigned PhiDef = PHI.getOperand(0).getReg();
3448 
3449  unsigned PhiStage =
3450  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
3451  unsigned LoopStage =
3452  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
3453  unsigned NumPhis = Schedule.getStagesForPhi(PhiDef);
3454  if (NumPhis > StageNum)
3455  NumPhis = StageNum;
3456  for (unsigned np = 0; np <= NumPhis; ++np) {
3457  unsigned NewVal =
3458  getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
3459  if (!NewVal)
3460  NewVal = InitVal;
3461  rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI,
3462  PhiDef, NewVal);
3463  }
3464  }
3465 }
3466 
3467 /// Rewrite a previously scheduled instruction to use the register value
3468 /// from the new instruction. Make sure the instruction occurs in the
3469 /// basic block, and we don't change the uses in the new instruction.
3470 void SwingSchedulerDAG::rewriteScheduledInstr(
3471  MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap,
3472  unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg,
3473  unsigned NewReg, unsigned PrevReg) {
3474  bool InProlog = (CurStageNum < Schedule.getMaxStageCount());
3475  int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum;
3476  // Rewrite uses that have been scheduled already to use the new
3477  // Phi register.
3478  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
3479  EI = MRI.use_end();
3480  UI != EI;) {
3481  MachineOperand &UseOp = *UI;
3482  MachineInstr *UseMI = UseOp.getParent();
3483  ++UI;
3484  if (UseMI->getParent() != BB)
3485  continue;
3486  if (UseMI->isPHI()) {
3487  if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
3488  continue;
3489  if (getLoopPhiReg(*UseMI, BB) != OldReg)
3490  continue;
3491  }
3492  InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
3493  assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
3494  SUnit *OrigMISU = getSUnit(OrigInstr->second);
3495  int StageSched = Schedule.stageScheduled(OrigMISU);
3496  int CycleSched = Schedule.cycleScheduled(OrigMISU);
3497  unsigned ReplaceReg = 0;
3498  // This is the stage for the scheduled instruction.
3499  if (StagePhi == StageSched && Phi->isPHI()) {
3500  int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi));
3501  if (PrevReg && InProlog)
3502  ReplaceReg = PrevReg;
3503  else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) &&
3504  (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI()))
3505  ReplaceReg = PrevReg;
3506  else
3507  ReplaceReg = NewReg;
3508  }
3509  // The scheduled instruction occurs before the scheduled Phi, and the
3510  // Phi is not loop carried.
3511  if (!InProlog && StagePhi + 1 == StageSched &&
3512  !Schedule.isLoopCarried(this, *Phi))
3513  ReplaceReg = NewReg;
3514  if (StagePhi > StageSched && Phi->isPHI())
3515  ReplaceReg = NewReg;
3516  if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
3517  ReplaceReg = NewReg;
3518  if (ReplaceReg) {
3519  MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
3520  UseOp.setReg(ReplaceReg);
3521  }
3522  }
3523 }
3524 
3525 /// Check if we can change the instruction to use an offset value from the
3526 /// previous iteration. If so, return true and set the base and offset values
3527 /// so that we can rewrite the load, if necessary.
3528 /// v1 = Phi(v0, v3)
3529 /// v2 = load v1, 0
3530 /// v3 = post_store v1, 4, x
3531 /// This function enables the load to be rewritten as v2 = load v3, 4.
3532 bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
3533  unsigned &BasePos,
3534  unsigned &OffsetPos,
3535  unsigned &NewBase,
3536  int64_t &Offset) {
3537  // Get the load instruction.
3538  if (TII->isPostIncrement(*MI))
3539  return false;
3540  unsigned BasePosLd, OffsetPosLd;
3541  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
3542  return false;
3543  unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
3544 
3545  // Look for the Phi instruction.
3546  MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
3547  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
3548  if (!Phi || !Phi->isPHI())
3549  return false;
3550  // Get the register defined in the loop block.
3551  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
3552  if (!PrevReg)
3553  return false;
3554 
3555  // Check for the post-increment load/store instruction.
3556  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
3557  if (!PrevDef || PrevDef == MI)
3558  return false;
3559 
3560  if (!TII->isPostIncrement(*PrevDef))
3561  return false;
3562 
3563  unsigned BasePos1 = 0, OffsetPos1 = 0;
3564  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
3565  return false;
3566 
3567  // Make sure that the instructions do not access the same memory location in
3568  // the next iteration.
3569  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
3570  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
3571  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3572  NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
3573  bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
3574  MF.DeleteMachineInstr(NewMI);
3575  if (!Disjoint)
3576  return false;
3577 
3578  // Set the return value once we determine that we return true.
3579  BasePos = BasePosLd;
3580  OffsetPos = OffsetPosLd;
3581  NewBase = PrevReg;
3582  Offset = StoreOffset;
3583  return true;
3584 }
3585 
3586 /// Apply changes to the instruction if needed. The changes are need
3587 /// to improve the scheduling and depend up on the final schedule.
3588 void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
3589  SMSchedule &Schedule) {
3590  SUnit *SU = getSUnit(MI);
3592  InstrChanges.find(SU);
3593  if (It != InstrChanges.end()) {
3594  std::pair<unsigned, int64_t> RegAndOffset = It->second;
3595  unsigned BasePos, OffsetPos;
3596  if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3597  return;
3598  unsigned BaseReg = MI->getOperand(BasePos).getReg();
3599  MachineInstr *LoopDef = findDefInLoop(BaseReg);
3600  int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
3601  int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
3602  int BaseStageNum = Schedule.stageScheduled(SU);
3603  int BaseCycleNum = Schedule.cycleScheduled(SU);
3604  if (BaseStageNum < DefStageNum) {
3605  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3606  int OffsetDiff = DefStageNum - BaseStageNum;
3607  if (DefCycleNum < BaseCycleNum) {
3608  NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
3609  if (OffsetDiff > 0)
3610  --OffsetDiff;
3611  }
3612  int64_t NewOffset =
3613  MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
3614  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3615  SU->setInstr(NewMI);
3616  MISUnitMap[NewMI] = SU;
3617  NewMIs.insert(NewMI);
3618  }
3619  }
3620 }
3621 
3622 /// Return true for an order or output dependence that is loop carried
3623 /// potentially. A dependence is loop carried if the destination defines a valu
3624 /// that may be used or defined by the source in a subsequent iteration.
3625 bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
3626  bool isSucc) {
3627  if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
3628  Dep.isArtificial())
3629  return false;
3630 
3631  if (!SwpPruneLoopCarried)
3632  return true;
3633 
3634  if (Dep.getKind() == SDep::Output)
3635  return true;
3636 
3637  MachineInstr *SI = Source->getInstr();
3638  MachineInstr *DI = Dep.getSUnit()->getInstr();
3639  if (!isSucc)
3640  std::swap(SI, DI);
3641  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
3642 
3643  // Assume ordered loads and stores may have a loop carried dependence.
3644  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
3646  return true;
3647 
3648  // Only chain dependences between a load and store can be loop carried.
3649  if (!DI->mayStore() || !SI->mayLoad())
3650  return false;
3651 
3652  unsigned DeltaS, DeltaD;
3653  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
3654  return true;
3655 
3656  unsigned BaseRegS, BaseRegD;
3657  int64_t OffsetS, OffsetD;
3659  if (!TII->getMemOpBaseRegImmOfs(*SI, BaseRegS, OffsetS, TRI) ||
3660  !TII->getMemOpBaseRegImmOfs(*DI, BaseRegD, OffsetD, TRI))
3661  return true;
3662 
3663  if (BaseRegS != BaseRegD)
3664  return true;
3665 
3666  // Check that the base register is incremented by a constant value for each
3667  // iteration.
3668  MachineInstr *Def = MRI.getVRegDef(BaseRegS);
3669  if (!Def || !Def->isPHI())
3670  return true;
3671  unsigned InitVal = 0;
3672  unsigned LoopVal = 0;
3673  getPhiRegs(*Def, BB, InitVal, LoopVal);
3674  MachineInstr *LoopDef = MRI.getVRegDef(LoopVal);
3675  int D = 0;
3676  if (!LoopDef || !TII->getIncrementValue(*LoopDef, D))
3677  return true;
3678 
3679  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
3680  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
3681 
3682  // This is the main test, which checks the offset values and the loop
3683  // increment value to determine if the accesses may be loop carried.
3684  if (OffsetS >= OffsetD)
3685  return OffsetS + AccessSizeS > DeltaS;
3686  else
3687  return OffsetD + AccessSizeD > DeltaD;
3688 
3689  return true;
3690 }
3691 
3692 void SwingSchedulerDAG::postprocessDAG() {
3693  for (auto &M : Mutations)
3694  M->apply(this);
3695 }
3696 
3697 /// Try to schedule the node at the specified StartCycle and continue
3698 /// until the node is schedule or the EndCycle is reached. This function
3699 /// returns true if the node is scheduled. This routine may search either
3700 /// forward or backward for a place to insert the instruction based upon
3701 /// the relative values of StartCycle and EndCycle.
3702 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
3703  bool forward = true;
3704  if (StartCycle > EndCycle)
3705  forward = false;
3706 
3707  // The terminating condition depends on the direction.
3708  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3709  for (int curCycle = StartCycle; curCycle != termCycle;
3710  forward ? ++curCycle : --curCycle) {
3711 
3712  // Add the already scheduled instructions at the specified cycle to the DFA.
3713  Resources->clearResources();
3714  for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
3715  checkCycle <= LastCycle; checkCycle += II) {
3716  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
3717 
3718  for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
3719  E = cycleInstrs.end();
3720  I != E; ++I) {
3721  if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
3722  continue;
3723  assert(Resources->canReserveResources(*(*I)->getInstr()) &&
3724  "These instructions have already been scheduled.");
3725  Resources->reserveResources(*(*I)->getInstr());
3726  }
3727  }
3728  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
3729  Resources->canReserveResources(*SU->getInstr())) {
3730  LLVM_DEBUG({
3731  dbgs() << "\tinsert at cycle " << curCycle << " ";
3732  SU->getInstr()->dump();
3733  });
3734 
3735  ScheduledInstrs[curCycle].push_back(SU);
3736  InstrToCycle.insert(std::make_pair(SU, curCycle));
3737  if (curCycle > LastCycle)
3738  LastCycle = curCycle;
3739  if (curCycle < FirstCycle)
3740  FirstCycle = curCycle;
3741  return true;
3742  }
3743  LLVM_DEBUG({
3744  dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
3745  SU->getInstr()->dump();
3746  });
3747  }
3748  return false;
3749 }
3750 
3751 // Return the cycle of the earliest scheduled instruction in the chain.
3752 int SMSchedule::earliestCycleInChain(const SDep &Dep) {
3753  SmallPtrSet<SUnit *, 8> Visited;
3754  SmallVector<SDep, 8> Worklist;
3755  Worklist.push_back(Dep);
3756  int EarlyCycle = INT_MAX;
3757  while (!Worklist.empty()) {
3758  const SDep &Cur = Worklist.pop_back_val();
3759  SUnit *PrevSU = Cur.getSUnit();
3760  if (Visited.count(PrevSU))
3761  continue;
3762  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3763  if (it == InstrToCycle.end())
3764  continue;
3765  EarlyCycle = std::min(EarlyCycle, it->second);
3766  for (const auto &PI : PrevSU->Preds)
3767  if (PI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
3768  Worklist.push_back(PI);
3769  Visited.insert(PrevSU);
3770  }
3771  return EarlyCycle;
3772 }
3773 
3774 // Return the cycle of the latest scheduled instruction in the chain.
3775 int SMSchedule::latestCycleInChain(const SDep &Dep) {
3776  SmallPtrSet<SUnit *, 8> Visited;
3777  SmallVector<SDep, 8> Worklist;
3778  Worklist.push_back(Dep);
3779  int LateCycle = INT_MIN;
3780  while (!Worklist.empty()) {
3781  const SDep &Cur = Worklist.pop_back_val();
3782  SUnit *SuccSU = Cur.getSUnit();
3783  if (Visited.count(SuccSU))
3784  continue;
3785  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3786  if (it == InstrToCycle.end())
3787  continue;
3788  LateCycle = std::max(LateCycle, it->second);
3789  for (const auto &SI : SuccSU->Succs)
3790  if (SI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
3791  Worklist.push_back(SI);
3792  Visited.insert(SuccSU);
3793  }
3794  return LateCycle;
3795 }
3796 
3797 /// If an instruction has a use that spans multiple iterations, then
3798 /// return true. These instructions are characterized by having a back-ege
3799 /// to a Phi, which contains a reference to another Phi.
3800 static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {
3801  for (auto &P : SU->Preds)
3802  if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
3803  for (auto &S : P.getSUnit()->Succs)
3804  if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
3805  return P.getSUnit();
3806  return nullptr;
3807 }
3808 
3809 /// Compute the scheduling start slot for the instruction. The start slot
3810 /// depends on any predecessor or successor nodes scheduled already.
3811 void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
3812  int *MinEnd, int *MaxStart, int II,
3813  SwingSchedulerDAG *DAG) {
3814  // Iterate over each instruction that has been scheduled already. The start
3815  // slot computation depends on whether the previously scheduled instruction
3816  // is a predecessor or successor of the specified instruction.
3817  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
3818 
3819  // Iterate over each instruction in the current cycle.
3820  for (SUnit *I : getInstructions(cycle)) {
3821  // Because we're processing a DAG for the dependences, we recognize
3822  // the back-edge in recurrences by anti dependences.
3823  for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
3824  const SDep &Dep = SU->Preds[i];
3825  if (Dep.getSUnit() == I) {
3826  if (!DAG->isBackedge(SU, Dep)) {
3827  int EarlyStart = cycle + Dep.getLatency() -
3828  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3829  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3830  if (DAG->isLoopCarriedDep(SU, Dep, false)) {
3831  int End = earliestCycleInChain(Dep) + (II - 1);
3832  *MinEnd = std::min(*MinEnd, End);
3833  }
3834  } else {
3835  int LateStart = cycle - Dep.getLatency() +
3836  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3837  *MinLateStart = std::min(*MinLateStart, LateStart);
3838  }
3839  }
3840  // For instruction that requires multiple iterations, make sure that
3841  // the dependent instruction is not scheduled past the definition.
3842  SUnit *BE = multipleIterations(I, DAG);
3843  if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
3844  !SU->isPred(I))
3845  *MinLateStart = std::min(*MinLateStart, cycle);
3846  }
3847  for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {
3848  if (SU->Succs[i].getSUnit() == I) {
3849  const SDep &Dep = SU->Succs[i];
3850  if (!DAG->isBackedge(SU, Dep)) {
3851  int LateStart = cycle - Dep.getLatency() +
3852  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3853  *MinLateStart = std::min(*MinLateStart, LateStart);
3854  if (DAG->isLoopCarriedDep(SU, Dep)) {
3855  int Start = latestCycleInChain(Dep) + 1 - II;
3856  *MaxStart = std::max(*MaxStart, Start);
3857  }
3858  } else {
3859  int EarlyStart = cycle + Dep.getLatency() -
3860  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3861  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3862  }
3863  }
3864  }
3865  }
3866  }
3867 }
3868 
3869 /// Order the instructions within a cycle so that the definitions occur
3870 /// before the uses. Returns true if the instruction is added to the start
3871 /// of the list, or false if added to the end.
3872 void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
3873  std::deque<SUnit *> &Insts) {
3874  MachineInstr *MI = SU->getInstr();
3875  bool OrderBeforeUse = false;
3876  bool OrderAfterDef = false;
3877  bool OrderBeforeDef = false;
3878  unsigned MoveDef = 0;
3879  unsigned MoveUse = 0;
3880  int StageInst1 = stageScheduled(SU);
3881 
3882  unsigned Pos = 0;
3883  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
3884  ++I, ++Pos) {
3885  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3886  MachineOperand &MO = MI->getOperand(i);
3888  continue;
3889 
3890  unsigned Reg = MO.getReg();
3891  unsigned BasePos, OffsetPos;
3892  if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3893  if (MI->getOperand(BasePos).getReg() == Reg)
3894  if (unsigned NewReg = SSD->getInstrBaseReg(SU))
3895  Reg = NewReg;
3896  bool Reads, Writes;
3897  std::tie(Reads, Writes) =
3898  (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3899  if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
3900  OrderBeforeUse = true;
3901  if (MoveUse == 0)
3902  MoveUse = Pos;
3903  } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
3904  // Add the instruction after the scheduled instruction.
3905  OrderAfterDef = true;
3906  MoveDef = Pos;
3907  } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
3908  if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
3909  OrderBeforeUse = true;
3910  if (MoveUse == 0)
3911  MoveUse = Pos;
3912  } else {
3913  OrderAfterDef = true;
3914  MoveDef = Pos;
3915  }
3916  } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
3917  OrderBeforeUse = true;
3918  if (MoveUse == 0)
3919  MoveUse = Pos;
3920  if (MoveUse != 0) {
3921  OrderAfterDef = true;
3922  MoveDef = Pos - 1;
3923  }
3924  } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
3925  // Add the instruction before the scheduled instruction.
3926  OrderBeforeUse = true;
3927  if (MoveUse == 0)
3928  MoveUse = Pos;
3929  } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
3930  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
3931  if (MoveUse == 0) {
3932  OrderBeforeDef = true;
3933  MoveUse = Pos;
3934  }
3935  }
3936  }
3937  // Check for order dependences between instructions. Make sure the source
3938  // is ordered before the destination.
3939  for (auto &S : SU->Succs) {
3940  if (S.getSUnit() != *I)
3941  continue;
3942  if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
3943  OrderBeforeUse = true;
3944  if (Pos < MoveUse)
3945  MoveUse = Pos;
3946  }
3947  }
3948  for (auto &P : SU->Preds) {
3949  if (P.getSUnit() != *I)
3950  continue;
3951  if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
3952  OrderAfterDef = true;
3953  MoveDef = Pos;
3954  }
3955  }
3956  }
3957 
3958  // A circular dependence.
3959  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3960  OrderBeforeUse = false;
3961 
3962  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
3963  // to a loop-carried dependence.
3964  if (OrderBeforeDef)
3965  OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3966 
3967  // The uncommon case when the instruction order needs to be updated because
3968  // there is both a use and def.
3969  if (OrderBeforeUse && OrderAfterDef) {
3970  SUnit *UseSU = Insts.at(MoveUse);
3971  SUnit *DefSU = Insts.at(MoveDef);
3972  if (MoveUse > MoveDef) {
3973  Insts.erase(Insts.begin() + MoveUse);
3974  Insts.erase(Insts.begin() + MoveDef);
3975  } else {
3976  Insts.erase(Insts.begin() + MoveDef);
3977  Insts.erase(Insts.begin() + MoveUse);
3978  }
3979  orderDependence(SSD, UseSU, Insts);
3980  orderDependence(SSD, SU, Insts);
3981  orderDependence(SSD, DefSU, Insts);
3982  return;
3983  }
3984  // Put the new instruction first if there is a use in the list. Otherwise,
3985  // put it at the end of the list.
3986  if (OrderBeforeUse)
3987  Insts.push_front(SU);
3988  else
3989  Insts.push_back(SU);
3990 }
3991 
3992 /// Return true if the scheduled Phi has a loop carried operand.
3993 bool SMSchedule::isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi) {
3994  if (!Phi.isPHI())
3995  return false;
3996  assert(Phi.isPHI() && "Expecting a Phi.");
3997  SUnit *DefSU = SSD->getSUnit(&Phi);
3998  unsigned DefCycle = cycleScheduled(DefSU);
3999  int DefStage = stageScheduled(DefSU);
4000 
4001  unsigned InitVal = 0;
4002  unsigned LoopVal = 0;
4003  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
4004  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
4005  if (!UseSU)
4006  return true;
4007  if (UseSU->getInstr()->isPHI())
4008  return true;
4009  unsigned LoopCycle = cycleScheduled(UseSU);
4010  int LoopStage = stageScheduled(UseSU);
4011  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
4012 }
4013 
4014 /// Return true if the instruction is a definition that is loop carried
4015 /// and defines the use on the next iteration.
4016 /// v1 = phi(v2, v3)
4017 /// (Def) v3 = op v1
4018 /// (MO) = v1
4019 /// If MO appears before Def, then then v1 and v3 may get assigned to the same
4020 /// register.
4021 bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
4023  if (!MO.isReg())
4024  return false;
4025  if (Def->isPHI())
4026  return false;
4027  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
4028  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
4029  return false;
4030  if (!isLoopCarried(SSD, *Phi))
4031  return false;
4032  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
4033  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
4034  MachineOperand &DMO = Def->getOperand(i);
4035  if (!DMO.isReg() || !DMO.isDef())
4036  continue;
4037  if (DMO.getReg() == LoopReg)
4038  return true;
4039  }
4040  return false;
4041 }
4042 
4043 // Check if the generated schedule is valid. This function checks if
4044 // an instruction that uses a physical register is scheduled in a
4045 // different stage than the definition. The pipeliner does not handle
4046 // physical register values that may cross a basic block boundary.
4047 bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
4048  for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) {
4049  SUnit &SU = SSD->SUnits[i];
4050  if (!SU.hasPhysRegDefs)
4051  continue;
4052  int StageDef = stageScheduled(&SU);
4053  assert(StageDef != -1 && "Instruction should have been scheduled.");
4054  for (auto &SI : SU.Succs)
4055  if (SI.isAssignedRegDep())
4056  if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg()))
4057  if (stageScheduled(SI.getSUnit()) != StageDef)
4058  return false;
4059  }
4060  return true;
4061 }
4062 
4063 /// A property of the node order in swing-modulo-scheduling is
4064 /// that for nodes outside circuits the following holds:
4065 /// none of them is scheduled after both a successor and a
4066 /// predecessor.
4067 /// The method below checks whether the property is met.
4068 /// If not, debug information is printed and statistics information updated.
4069 /// Note that we do not use an assert statement.
4070 /// The reason is that although an invalid node oder may prevent
4071 /// the pipeliner from finding a pipelined schedule for arbitrary II,
4072 /// it does not lead to the generation of incorrect code.
4073 void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
4074 
4075  // a sorted vector that maps each SUnit to its index in the NodeOrder
4076  typedef std::pair<SUnit *, unsigned> UnitIndex;
4077  std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
4078 
4079  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
4080  Indices.push_back(std::make_pair(NodeOrder[i], i));
4081 
4082  auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
4083  return std::get<0>(i1) < std::get<0>(i2);
4084  };
4085 
4086  // sort, so that we can perform a binary search
4087  llvm::sort(Indices, CompareKey);
4088 
4089  bool Valid = true;
4090  (void)Valid;
4091  // for each SUnit in the NodeOrder, check whether
4092  // it appears after both a successor and a predecessor
4093  // of the SUnit. If this is the case, and the SUnit
4094  // is not part of circuit, then the NodeOrder is not
4095  // valid.
4096  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
4097  SUnit *SU = NodeOrder[i];
4098  unsigned Index = i;
4099 
4100  bool PredBefore = false;
4101  bool SuccBefore = false;
4102 
4103  SUnit *Succ;
4104  SUnit *Pred;
4105  (void)Succ;
4106  (void)Pred;
4107 
4108  for (SDep &PredEdge : SU->Preds) {
4109  SUnit *PredSU = PredEdge.getSUnit();
4110  unsigned PredIndex =
4111  std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
4112  std::make_pair(PredSU, 0), CompareKey));
4113  if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
4114  PredBefore = true;
4115  Pred = PredSU;
4116  break;
4117  }
4118  }
4119 
4120  for (SDep &SuccEdge : SU->Succs) {
4121  SUnit *SuccSU = SuccEdge.getSUnit();
4122  unsigned SuccIndex =
4123  std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
4124  std::make_pair(SuccSU, 0), CompareKey));
4125  if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
4126  SuccBefore = true;
4127  Succ = SuccSU;
4128  break;
4129  }
4130  }
4131 
4132  if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
4133  // instructions in circuits are allowed to be scheduled
4134  // after both a successor and predecessor.
4135  bool InCircuit = std::any_of(
4136  Circuits.begin(), Circuits.end(),
4137  [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
4138  if (InCircuit)
4139  LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
4140  else {
4141  Valid = false;
4142  NumNodeOrderIssues++;
4143  LLVM_DEBUG(dbgs() << "Predecessor ";);
4144  }
4145  LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
4146  << " are scheduled before node " << SU->NodeNum
4147  << "\n";);
4148  }
4149  }
4150 
4151  LLVM_DEBUG({
4152  if (!Valid)
4153  dbgs() << "Invalid node order found!\n";
4154  });
4155 }
4156 
4157 /// Attempt to fix the degenerate cases when the instruction serialization
4158 /// causes the register lifetimes to overlap. For example,
4159 /// p' = store_pi(p, b)
4160 /// = load p, offset
4161 /// In this case p and p' overlap, which means that two registers are needed.
4162 /// Instead, this function changes the load to use p' and updates the offset.
4163 void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
4164  unsigned OverlapReg = 0;
4165  unsigned NewBaseReg = 0;
4166  for (SUnit *SU : Instrs) {
4167  MachineInstr *MI = SU->getInstr();
4168  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
4169  const MachineOperand &MO = MI->getOperand(i);
4170  // Look for an instruction that uses p. The instruction occurs in the
4171  // same cycle but occurs later in the serialized order.
4172  if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
4173  // Check that the instruction appears in the InstrChanges structure,
4174  // which contains instructions that can have the offset updated.
4176  InstrChanges.find(SU);
4177  if (It != InstrChanges.end()) {
4178  unsigned BasePos, OffsetPos;
4179  // Update the base register and adjust the offset.
4180  if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
4181  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
4182  NewMI->getOperand(BasePos).setReg(NewBaseReg);
4183  int64_t NewOffset =
4184  MI->getOperand(OffsetPos).getImm() - It->second.second;
4185  NewMI->getOperand(OffsetPos).setImm(NewOffset);
4186  SU->setInstr(NewMI);
4187  MISUnitMap[NewMI] = SU;
4188  NewMIs.insert(NewMI);
4189  }
4190  }
4191  OverlapReg = 0;
4192  NewBaseReg = 0;
4193  break;
4194  }
4195  // Look for an instruction of the form p' = op(p), which uses and defines
4196  // two virtual registers that get allocated to the same physical register.
4197  unsigned TiedUseIdx = 0;
4198  if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
4199  // OverlapReg is p in the example above.
4200  OverlapReg = MI->getOperand(TiedUseIdx).getReg();
4201  // NewBaseReg is p' in the example above.
4202  NewBaseReg = MI->getOperand(i).getReg();
4203  break;
4204  }
4205  }
4206  }
4207 }
4208 
4209 /// After the schedule has been formed, call this function to combine
4210 /// the instructions from the different stages/cycles. That is, this
4211 /// function creates a schedule that represents a single iteration.
4212 void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
4213  // Move all instructions to the first stage from later stages.
4214  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
4215  for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
4216  ++stage) {
4217  std::deque<SUnit *> &cycleInstrs =
4218  ScheduledInstrs[cycle + (stage * InitiationInterval)];
4219  for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
4220  E = cycleInstrs.rend();
4221  I != E; ++I)
4222  ScheduledInstrs[cycle].push_front(*I);
4223  }
4224  }
4225  // Iterate over the definitions in each instruction, and compute the
4226  // stage difference for each use. Keep the maximum value.
4227  for (auto &I : InstrToCycle) {
4228  int DefStage = stageScheduled(I.first);
4229  MachineInstr *MI = I.first->getInstr();
4230  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
4231  MachineOperand &Op = MI->getOperand(i);
4232  if (!Op.isReg() || !Op.isDef())
4233  continue;
4234 
4235  unsigned Reg = Op.getReg();
4236  unsigned MaxDiff = 0;
4237  bool PhiIsSwapped = false;
4238  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
4239  EI = MRI.use_end();
4240  UI != EI; ++UI) {
4241  MachineOperand &UseOp = *UI;
4242  MachineInstr *UseMI = UseOp.getParent();
4243  SUnit *SUnitUse = SSD->getSUnit(UseMI);
4244  int UseStage = stageScheduled(SUnitUse);
4245  unsigned Diff = 0;
4246  if (UseStage != -1 && UseStage >= DefStage)
4247  Diff = UseStage - DefStage;
4248  if (MI->isPHI()) {
4249  if (isLoopCarried(SSD, *MI))
4250  ++Diff;
4251  else
4252  PhiIsSwapped = true;
4253  }
4254  MaxDiff = std::max(Diff, MaxDiff);
4255  }
4256  RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
4257  }
4258  }
4259 
4260  // Erase all the elements in the later stages. Only one iteration should
4261  // remain in the scheduled list, and it contains all the instructions.
4262  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
4263  ScheduledInstrs.erase(cycle);
4264 
4265  // Change the registers in instruction as specified in the InstrChanges
4266  // map. We need to use the new registers to create the correct order.
4267  for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
4268  SUnit *SU = &SSD->SUnits[i];
4269  SSD->applyInstrChange(SU->getInstr(), *this);
4270  }
4271 
4272  // Reorder the instructions in each cycle to fix and improve the
4273  // generated code.
4274  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
4275  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
4276  std::deque<SUnit *> newOrderPhi;
4277  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
4278  SUnit *SU = cycleInstrs[i];
4279  if (SU->getInstr()->isPHI())
4280  newOrderPhi.push_back(SU);
4281  }
4282  std::deque<SUnit *> newOrderI;
4283  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
4284  SUnit *SU = cycleInstrs[i];
4285  if (!SU->getInstr()->isPHI())
4286  orderDependence(SSD, SU, newOrderI);
4287  }
4288  // Replace the old order with the new order.
4289  cycleInstrs.swap(newOrderPhi);
4290  cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
4291  SSD->fixupRegisterOverlaps(cycleInstrs);
4292  }
4293 
4294  LLVM_DEBUG(dump(););
4295 }
4296 
4297 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4298 /// Print the schedule information to the given output.
4299 void SMSchedule::print(raw_ostream &os) const {
4300  // Iterate over each cycle.
4301  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
4302  // Iterate over each instruction in the cycle.
4303  const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
4304  for (SUnit *CI : cycleInstrs->second) {
4305  os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
4306  os << "(" << CI->NodeNum << ") ";
4307  CI->getInstr()->print(os);
4308  os << "\n";
4309  }
4310  }
4311 }
4312 
4313 /// Utility function used for debugging to print the schedule.
4314 LLVM_DUMP_METHOD void SMSchedule::dump() const { print(dbgs()); }
4315 #endif
uint64_t CallInst * C
std::vector< int >::const_reverse_iterator const_reverse_iterator
Definition: ScheduleDAG.h:754
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:259
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII"))
BitVector & set()
Definition: BitVector.h:398
virtual void finishBlock()
Cleans up after scheduling in the given block.
mop_iterator operands_end()
Definition: MachineInstr.h:454
A common definition of LaneBitmask for use in TableGen and CodeGen.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void clear()
Definition: MapVector.h:89
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
instr_iterator instr_begin()
static bool pred_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi...
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:633
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:250
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned NumPreds
of SDep::Data preds.
Definition: ScheduleDAG.h:270
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
void RemoveMachineInstrFromMaps(MachineInstr &MI)
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static constexpr LocationSize unknown()
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:383
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
Definition: ScheduleDAG.h:402
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:327
static void getUnderlyingObjects(MachineInstr *MI, SmallVectorImpl< Value *> &Objs, const DataLayout &DL)
Return the underlying objects for the memory references of an instruction.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
unsigned getSubReg() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:174
bool isInlineAsm() const
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
The two locations do not alias at all.
Definition: AliasAnalysis.h:85
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
bool test(unsigned Idx) const
Definition: BitVector.h:502
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
bool isRegSequence() const
Mutate the DAG as a postpass after normal DAG building.
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:299
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:34
F(f)
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
block Block Frequency true
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:219
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:53
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:459
bool isPHI() const
virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar, MachineInstr &Cmp, SmallVectorImpl< MachineOperand > &Cond, SmallVectorImpl< MachineInstr *> &PrevInsts, unsigned Iter, unsigned MaxIter) const
Generate code to reduce the loop iteration by one and check if the loop is finished.
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:93
SmallVectorImpl< SDep >::iterator succ_iterator
Definition: ScheduleDAG.h:264
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:260
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:55
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
void eraseFromParent()
This method unlinks &#39;this&#39; from the containing function and deletes it.
A description of a memory reference used in the backend.
static use_iterator use_end()
SmallVectorImpl< SDep >::iterator pred_iterator
Definition: ScheduleDAG.h:263
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA)
Return true if the instruction causes a chain between memory references before and after it...
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:143
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:412
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:370
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:265
Modulo Software Pipelining
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:54
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1182
This file contains the simple types necessary to represent the attributes associated with functions a...
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:284
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
#define LLVM_DUMP_METHOD
Definition: Compiler.h:74
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
void pop_back()
Remove the last element of the SetVector.
Definition: SetVector.h:222
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:83
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
BlockT * getHeader() const
Definition: LoopInfo.h:100
void assign(size_type NumElts, const T &Elt)
Definition: SmallVector.h:423
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
PowerPC VSX FMA Mutation
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:56
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:406
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
SlotIndexes pass.
Definition: SlotIndexes.h:331
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
static bool succ_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static bool isSubset(S1Ty &Set1, S2Ty &Set2)
Return true if Set1 is a subset of Set2.
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:83
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Itinerary data supplied by a subtarget to be used by a target.
auto lower_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range))
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1144
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
virtual const TargetInstrInfo * getInstrInfo() const
SUnit * getSUnit() const
Definition: ScheduleDAG.h:484
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator find(const KeyT &Key)
Definition: MapVector.h:148
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
virtual bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const
Sometimes, it is possible for the target to tell, even without aliasing information, that two MIs access different memory addresses.
static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst)
Gets latency information for Inst, based on DC information.
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
const TargetRegisterClass * constrainRegClass(unsigned Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
#define DEBUG_TYPE
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:348
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:211
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1114
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget...
TargetInstrInfo - Interface to description of machine instruction set.
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:79
const Value * getValue() const
Return the base address of the memory access.
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
Scheduling dependency.
Definition: ScheduleDAG.h:50
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:583
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:571
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:820
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:419
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:377
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:201
unsigned const MachineRegisterInfo * MRI
bool hasInterval(unsigned Reg) const
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx...
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:516
virtual bool getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const
Get the base register and byte offset of an instruction that reads/writes memory. ...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
void setMBB(MachineBasicBlock *MBB)
Represent the analysis usage information of a pass.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
use_instr_iterator use_instr_begin(unsigned RegNo) const
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:549
BitVector & reset()
Definition: BitVector.h:439
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1055
Track the current register pressure at some position in the instruction stream, and remember the high...
void setImm(int64_t immVal)
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:181
void runOnMachineFunction(const MachineFunction &MF)
runOnFunction - Prepare to answer questions about MF.
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1088
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:194
bool isCopy() const
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1392
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Any other ordering dependency.
Definition: ScheduleDAG.h:57
size_t size() const
Definition: SmallVector.h:53
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
typename vector_type::const_iterator const_iterator
Definition: SetVector.h:50
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void push_back(bool Val)
Definition: BitVector.h:507
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:978
hexagon gen pred
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:200
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
An unknown scheduling barrier.
Definition: ScheduleDAG.h:70
const unsigned MaxDepth
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
typename vector_type::const_iterator iterator
Definition: SetVector.h:49
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
print lazy value Lazy Value Info Printer Pass
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:520
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1029
reverse_instr_iterator instr_rbegin()
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:378
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:534
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
MachineOperand class - Representation of each machine instruction operand.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
MachineInstrBuilder MachineInstrBuilder & DefMI
bool canReserveResources(const MCInstrDesc *MID)
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
int64_t getImm() const
MachineInstr * getUniqueVRegDef(unsigned Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
reverse_instr_iterator instr_rend()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
std::vector< int >::const_iterator const_iterator
Definition: ScheduleDAG.h:747
void clear()
Completely clear the SetVector.
Definition: SetVector.h:216
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
typename SuperClass::iterator iterator
Definition: SmallVector.h:327
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
SmallVectorImpl< SDep >::const_iterator const_succ_iterator
Definition: ScheduleDAG.h:266
LiveInterval & createEmptyInterval(unsigned Reg)
Interval creation.
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:212
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:148
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:254
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
void initializeMachinePipelinerPass(PassRegistry &)
TargetSubtargetInfo - Generic base class for all target subtargets.
std::set< NodeId > NodeSet
Definition: RDFGraph.h:514
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
Definition: ScheduleDAG.h:410
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1969
A ScheduleDAG for scheduling lists of MachineInstr.
Representation of each machine instruction.
Definition: MachineInstr.h:64
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:163
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
virtual bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, MachineInstr *&CmpInst) const
Analyze the loop code, return true if it cannot be understoo.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
iterator begin() const
Definition: SmallPtrSet.h:397
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
These values represent a non-pipelined step in the execution of an instruction.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:459
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
This file provides utility analysis objects describing memory locations.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:45
void setReg(unsigned Reg)
Change the register this operand corresponds to.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
void push_back(MachineInstr *MI)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:73
void setSubReg(unsigned subReg)
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don&#39;t consume any machine resources in their current form...
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:490
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
iterator end() const
Definition: SmallPtrSet.h:402
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:211
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:268
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:807
bool memoperands_empty() const
Return true if we don&#39;t have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:546
Store the effects of a change in pressure on things that MI scheduler cares about.
iterator end()
Definition: MapVector.h:72
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand *> MemRefs)
Assign this MachineInstr&#39;s memory reference descriptor list.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
LLVM Value Representation.
Definition: Value.h:73
mop_iterator operands_begin()
Definition: MachineInstr.h:453
A vector that has set insertion semantics.
Definition: SetVector.h:41
static use_instr_iterator use_instr_end()
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:261
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
Definition: SetVector.h:200
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:73
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
IRTranslator LLVM IR MI
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
void setPos(MachineBasicBlock::const_iterator Pos)
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1967
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
static bool computePath(SUnit *Cur, SetVector< SUnit *> &Path, SetVector< SUnit *> &DestNodes, SetVector< SUnit *> &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:566
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
#define LLVM_DEBUG(X)
Definition: Debug.h:123
This class can compute a topological ordering for SUnits and provides methods for dynamically updatin...
Definition: ScheduleDAG.h:693
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
static cl::opt< bool > SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable CopyToPhi DAG Mutation"))
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
std::vector< MachineBasicBlock * >::iterator succ_iterator
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:246
void reserveResources(const MCInstrDesc *MID)
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:165
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
Definition: ScheduleDAG.h:435