LLVM  9.0.0svn
MachinePipeliner.cpp
Go to the documentation of this file.
1 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10 //
11 // This SMS implementation is a target-independent back-end pass. When enabled,
12 // the pass runs just prior to the register allocation pass, while the machine
13 // IR is in SSA form. If software pipelining is successful, then the original
14 // loop is replaced by the optimized loop. The optimized loop contains one or
15 // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
16 // the instructions cannot be scheduled in a given MII, we increase the MII by
17 // one and try again.
18 //
19 // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
20 // represent loop carried dependences in the DAG as order edges to the Phi
21 // nodes. We also perform several passes over the DAG to eliminate unnecessary
22 // edges that inhibit the ability to pipeline. The implementation uses the
23 // DFAPacketizer class to compute the minimum initiation interval and the check
24 // where an instruction may be inserted in the pipelined schedule.
25 //
26 // In order for the SMS pass to work, several target specific hooks need to be
27 // implemented to get information about the loop structure and to rewrite
28 // instructions.
29 //
30 //===----------------------------------------------------------------------===//
31 
32 #include "llvm/ADT/ArrayRef.h"
33 #include "llvm/ADT/BitVector.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/MapVector.h"
36 #include "llvm/ADT/PriorityQueue.h"
37 #include "llvm/ADT/SetVector.h"
38 #include "llvm/ADT/SmallPtrSet.h"
39 #include "llvm/ADT/SmallSet.h"
40 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/ADT/Statistic.h"
65 #include "llvm/Config/llvm-config.h"
66 #include "llvm/IR/Attributes.h"
67 #include "llvm/IR/DebugLoc.h"
68 #include "llvm/IR/Function.h"
69 #include "llvm/MC/LaneBitmask.h"
70 #include "llvm/MC/MCInstrDesc.h"
72 #include "llvm/MC/MCRegisterInfo.h"
73 #include "llvm/Pass.h"
75 #include "llvm/Support/Compiler.h"
76 #include "llvm/Support/Debug.h"
79 #include <algorithm>
80 #include <cassert>
81 #include <climits>
82 #include <cstdint>
83 #include <deque>
84 #include <functional>
85 #include <iterator>
86 #include <map>
87 #include <memory>
88 #include <tuple>
89 #include <utility>
90 #include <vector>
91 
92 using namespace llvm;
93 
94 #define DEBUG_TYPE "pipeliner"
95 
96 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
97 STATISTIC(NumPipelined, "Number of loops software pipelined");
98 STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
99 
100 /// A command line option to turn software pipelining on or off.
101 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
103  cl::desc("Enable Software Pipelining"));
104 
105 /// A command line option to enable SWP at -Os.
106 static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
107  cl::desc("Enable SWP at Os."), cl::Hidden,
108  cl::init(false));
109 
110 /// A command line argument to limit minimum initial interval for pipelining.
111 static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
112  cl::desc("Size limit for the MII."),
113  cl::Hidden, cl::init(27));
114 
115 /// A command line argument to limit the number of stages in the pipeline.
116 static cl::opt<int>
117  SwpMaxStages("pipeliner-max-stages",
118  cl::desc("Maximum stages allowed in the generated scheduled."),
119  cl::Hidden, cl::init(3));
120 
121 /// A command line option to disable the pruning of chain dependences due to
122 /// an unrelated Phi.
123 static cl::opt<bool>
124  SwpPruneDeps("pipeliner-prune-deps",
125  cl::desc("Prune dependences between unrelated Phi nodes."),
126  cl::Hidden, cl::init(true));
127 
128 /// A command line option to disable the pruning of loop carried order
129 /// dependences.
130 static cl::opt<bool>
131  SwpPruneLoopCarried("pipeliner-prune-loop-carried",
132  cl::desc("Prune loop carried order dependences."),
133  cl::Hidden, cl::init(true));
134 
135 #ifndef NDEBUG
136 static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
137 #endif
138 
139 static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
140  cl::ReallyHidden, cl::init(false),
141  cl::ZeroOrMore, cl::desc("Ignore RecMII"));
142 
143 namespace llvm {
144 
145 // A command line option to enable the CopyToPhi DAG mutation.
147  SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
148  cl::init(true), cl::ZeroOrMore,
149  cl::desc("Enable CopyToPhi DAG Mutation"));
150 
151 } // end namespace llvm
152 
153 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
154 char MachinePipeliner::ID = 0;
155 #ifndef NDEBUG
157 #endif
159 
161  "Modulo Software Pipelining", false, false)
167  "Modulo Software Pipelining", false, false)
168 
169 /// The "main" function for implementing Swing Modulo Scheduling.
170 bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
171  if (skipFunction(mf.getFunction()))
172  return false;
173 
174  if (!EnableSWP)
175  return false;
176 
177  if (mf.getFunction().getAttributes().hasAttribute(
178  AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
179  !EnableSWPOptSize.getPosition())
180  return false;
181 
182  MF = &mf;
183  MLI = &getAnalysis<MachineLoopInfo>();
184  MDT = &getAnalysis<MachineDominatorTree>();
185  TII = MF->getSubtarget().getInstrInfo();
186  RegClassInfo.runOnMachineFunction(*MF);
187 
188  for (auto &L : *MLI)
189  scheduleLoop(*L);
190 
191  return false;
192 }
193 
194 /// Attempt to perform the SMS algorithm on the specified loop. This function is
195 /// the main entry point for the algorithm. The function identifies candidate
196 /// loops, calculates the minimum initiation interval, and attempts to schedule
197 /// the loop.
198 bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
199  bool Changed = false;
200  for (auto &InnerLoop : L)
201  Changed |= scheduleLoop(*InnerLoop);
202 
203 #ifndef NDEBUG
204  // Stop trying after reaching the limit (if any).
205  int Limit = SwpLoopLimit;
206  if (Limit >= 0) {
207  if (NumTries >= SwpLoopLimit)
208  return Changed;
209  NumTries++;
210  }
211 #endif
212 
213  setPragmaPipelineOptions(L);
214  if (!canPipelineLoop(L)) {
215  LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
216  return Changed;
217  }
218 
219  ++NumTrytoPipeline;
220 
221  Changed = swingModuloScheduler(L);
222 
223  return Changed;
224 }
225 
226 void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
227  MachineBasicBlock *LBLK = L.getTopBlock();
228 
229  if (LBLK == nullptr)
230  return;
231 
232  const BasicBlock *BBLK = LBLK->getBasicBlock();
233  if (BBLK == nullptr)
234  return;
235 
236  const Instruction *TI = BBLK->getTerminator();
237  if (TI == nullptr)
238  return;
239 
240  MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);
241  if (LoopID == nullptr)
242  return;
243 
244  assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");
245  assert(LoopID->getOperand(0) == LoopID && "invalid loop");
246 
247  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
248  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
249 
250  if (MD == nullptr)
251  continue;
252 
253  MDString *S = dyn_cast<MDString>(MD->getOperand(0));
254 
255  if (S == nullptr)
256  continue;
257 
258  if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
259  assert(MD->getNumOperands() == 2 &&
260  "Pipeline initiation interval hint metadata should have two operands.");
262  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
263  assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
264  } else if (S->getString() == "llvm.loop.pipeline.disable") {
265  disabledByPragma = true;
266  }
267  }
268 }
269 
270 /// Return true if the loop can be software pipelined. The algorithm is
271 /// restricted to loops with a single basic block. Make sure that the
272 /// branch in the loop can be analyzed.
273 bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
274  if (L.getNumBlocks() != 1)
275  return false;
276 
277  if (disabledByPragma)
278  return false;
279 
280  // Check if the branch can't be understood because we can't do pipelining
281  // if that's the case.
282  LI.TBB = nullptr;
283  LI.FBB = nullptr;
284  LI.BrCond.clear();
285  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond))
286  return false;
287 
288  LI.LoopInductionVar = nullptr;
289  LI.LoopCompare = nullptr;
291  return false;
292 
293  if (!L.getLoopPreheader())
294  return false;
295 
296  // Remove any subregisters from inputs to phi nodes.
297  preprocessPhiNodes(*L.getHeader());
298  return true;
299 }
300 
301 void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
303  SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
304 
305  for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) {
306  MachineOperand &DefOp = PI.getOperand(0);
307  assert(DefOp.getSubReg() == 0);
308  auto *RC = MRI.getRegClass(DefOp.getReg());
309 
310  for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
311  MachineOperand &RegOp = PI.getOperand(i);
312  if (RegOp.getSubReg() == 0)
313  continue;
314 
315  // If the operand uses a subregister, replace it with a new register
316  // without subregisters, and generate a copy to the new register.
317  unsigned NewReg = MRI.createVirtualRegister(RC);
318  MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
320  const DebugLoc &DL = PredB.findDebugLoc(At);
321  auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
322  .addReg(RegOp.getReg(), getRegState(RegOp),
323  RegOp.getSubReg());
324  Slots.insertMachineInstrInMaps(*Copy);
325  RegOp.setReg(NewReg);
326  RegOp.setSubReg(0);
327  }
328  }
329 }
330 
331 /// The SMS algorithm consists of the following main steps:
332 /// 1. Computation and analysis of the dependence graph.
333 /// 2. Ordering of the nodes (instructions).
334 /// 3. Attempt to Schedule the loop.
335 bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
336  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
337 
338  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
340 
341  MachineBasicBlock *MBB = L.getHeader();
342  // The kernel should not include any terminator instructions. These
343  // will be added back later.
344  SMS.startBlock(MBB);
345 
346  // Compute the number of 'real' instructions in the basic block by
347  // ignoring terminators.
348  unsigned size = MBB->size();
350  E = MBB->instr_end();
351  I != E; ++I, --size)
352  ;
353 
354  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
355  SMS.schedule();
356  SMS.exitRegion();
357 
358  SMS.finishBlock();
359  return SMS.hasNewSchedule();
360 }
361 
362 void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
363  if (II_setByPragma > 0)
364  MII = II_setByPragma;
365  else
366  MII = std::max(ResMII, RecMII);
367 }
368 
369 void SwingSchedulerDAG::setMAX_II() {
370  if (II_setByPragma > 0)
371  MAX_II = II_setByPragma;
372  else
373  MAX_II = MII + 10;
374 }
375 
376 /// We override the schedule function in ScheduleDAGInstrs to implement the
377 /// scheduling part of the Swing Modulo Scheduling algorithm.
379  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
380  buildSchedGraph(AA);
381  addLoopCarriedDependences(AA);
382  updatePhiDependences();
383  Topo.InitDAGTopologicalSorting();
384  changeDependences();
385  postprocessDAG();
386  LLVM_DEBUG(dump());
387 
388  NodeSetType NodeSets;
389  findCircuits(NodeSets);
390  NodeSetType Circuits = NodeSets;
391 
392  // Calculate the MII.
393  unsigned ResMII = calculateResMII();
394  unsigned RecMII = calculateRecMII(NodeSets);
395 
396  fuseRecs(NodeSets);
397 
398  // This flag is used for testing and can cause correctness problems.
399  if (SwpIgnoreRecMII)
400  RecMII = 0;
401 
402  setMII(ResMII, RecMII);
403  setMAX_II();
404 
405  LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
406  << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
407 
408  // Can't schedule a loop without a valid MII.
409  if (MII == 0)
410  return;
411 
412  // Don't pipeline large loops.
413  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii)
414  return;
415 
416  computeNodeFunctions(NodeSets);
417 
418  registerPressureFilter(NodeSets);
419 
420  colocateNodeSets(NodeSets);
421 
422  checkNodeSets(NodeSets);
423 
424  LLVM_DEBUG({
425  for (auto &I : NodeSets) {
426  dbgs() << " Rec NodeSet ";
427  I.dump();
428  }
429  });
430 
431  std::stable_sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());
432 
433  groupRemainingNodes(NodeSets);
434 
435  removeDuplicateNodes(NodeSets);
436 
437  LLVM_DEBUG({
438  for (auto &I : NodeSets) {
439  dbgs() << " NodeSet ";
440  I.dump();
441  }
442  });
443 
444  computeNodeOrder(NodeSets);
445 
446  // check for node order issues
447  checkValidNodeOrder(Circuits);
448 
449  SMSchedule Schedule(Pass.MF);
450  Scheduled = schedulePipeline(Schedule);
451 
452  if (!Scheduled)
453  return;
454 
455  unsigned numStages = Schedule.getMaxStageCount();
456  // No need to generate pipeline if there are no overlapped iterations.
457  if (numStages == 0)
458  return;
459 
460  // Check that the maximum stage count is less than user-defined limit.
461  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages)
462  return;
463 
464  generatePipelinedLoop(Schedule);
465  ++NumPipelined;
466 }
467 
468 /// Clean up after the software pipeliner runs.
470  for (MachineInstr *I : NewMIs)
472  NewMIs.clear();
473 
474  // Call the superclass.
476 }
477 
478 /// Return the register values for the operands of a Phi instruction.
479 /// This function assume the instruction is a Phi.
481  unsigned &InitVal, unsigned &LoopVal) {
482  assert(Phi.isPHI() && "Expecting a Phi.");
483 
484  InitVal = 0;
485  LoopVal = 0;
486  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
487  if (Phi.getOperand(i + 1).getMBB() != Loop)
488  InitVal = Phi.getOperand(i).getReg();
489  else
490  LoopVal = Phi.getOperand(i).getReg();
491 
492  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
493 }
494 
495 /// Return the Phi register value that comes from the incoming block.
496 static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
497  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
498  if (Phi.getOperand(i + 1).getMBB() != LoopBB)
499  return Phi.getOperand(i).getReg();
500  return 0;
501 }
502 
503 /// Return the Phi register value that comes the loop block.
504 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
505  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
506  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
507  return Phi.getOperand(i).getReg();
508  return 0;
509 }
510 
511 /// Return true if SUb can be reached from SUa following the chain edges.
512 static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
513  SmallPtrSet<SUnit *, 8> Visited;
514  SmallVector<SUnit *, 8> Worklist;
515  Worklist.push_back(SUa);
516  while (!Worklist.empty()) {
517  const SUnit *SU = Worklist.pop_back_val();
518  for (auto &SI : SU->Succs) {
519  SUnit *SuccSU = SI.getSUnit();
520  if (SI.getKind() == SDep::Order) {
521  if (Visited.count(SuccSU))
522  continue;
523  if (SuccSU == SUb)
524  return true;
525  Worklist.push_back(SuccSU);
526  Visited.insert(SuccSU);
527  }
528  }
529  }
530  return false;
531 }
532 
533 /// Return true if the instruction causes a chain between memory
534 /// references before and after it.
536  return MI.isCall() || MI.hasUnmodeledSideEffects() ||
537  (MI.hasOrderedMemoryRef() &&
538  (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
539 }
540 
541 /// Return the underlying objects for the memory references of an instruction.
542 /// This function calls the code in ValueTracking, but first checks that the
543 /// instruction has a memory operand.
546  const DataLayout &DL) {
547  if (!MI->hasOneMemOperand())
548  return;
550  if (!MM->getValue())
551  return;
552  GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
553  for (Value *V : Objs) {
554  if (!isIdentifiedObject(V)) {
555  Objs.clear();
556  return;
557  }
558  Objs.push_back(V);
559  }
560 }
561 
562 /// Add a chain edge between a load and store if the store can be an
563 /// alias of the load on a subsequent iteration, i.e., a loop carried
564 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
565 /// but that code doesn't create loop carried dependences.
566 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
568  Value *UnknownValue =
570  for (auto &SU : SUnits) {
571  MachineInstr &MI = *SU.getInstr();
572  if (isDependenceBarrier(MI, AA))
573  PendingLoads.clear();
574  else if (MI.mayLoad()) {
576  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
577  if (Objs.empty())
578  Objs.push_back(UnknownValue);
579  for (auto V : Objs) {
580  SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
581  SUs.push_back(&SU);
582  }
583  } else if (MI.mayStore()) {
585  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
586  if (Objs.empty())
587  Objs.push_back(UnknownValue);
588  for (auto V : Objs) {
590  PendingLoads.find(V);
591  if (I == PendingLoads.end())
592  continue;
593  for (auto Load : I->second) {
594  if (isSuccOrder(Load, &SU))
595  continue;
596  MachineInstr &LdMI = *Load->getInstr();
597  // First, perform the cheaper check that compares the base register.
598  // If they are the same and the load offset is less than the store
599  // offset, then mark the dependence as loop carried potentially.
600  MachineOperand *BaseOp1, *BaseOp2;
601  int64_t Offset1, Offset2;
602  if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) &&
603  TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) {
604  if (BaseOp1->isIdenticalTo(*BaseOp2) &&
605  (int)Offset1 < (int)Offset2) {
606  assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
607  "What happened to the chain edge?");
608  SDep Dep(Load, SDep::Barrier);
609  Dep.setLatency(1);
610  SU.addPred(Dep);
611  continue;
612  }
613  }
614  // Second, the more expensive check that uses alias analysis on the
615  // base registers. If they alias, and the load offset is less than
616  // the store offset, the mark the dependence as loop carried.
617  if (!AA) {
618  SDep Dep(Load, SDep::Barrier);
619  Dep.setLatency(1);
620  SU.addPred(Dep);
621  continue;
622  }
623  MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
624  MachineMemOperand *MMO2 = *MI.memoperands_begin();
625  if (!MMO1->getValue() || !MMO2->getValue()) {
626  SDep Dep(Load, SDep::Barrier);
627  Dep.setLatency(1);
628  SU.addPred(Dep);
629  continue;
630  }
631  if (MMO1->getValue() == MMO2->getValue() &&
632  MMO1->getOffset() <= MMO2->getOffset()) {
633  SDep Dep(Load, SDep::Barrier);
634  Dep.setLatency(1);
635  SU.addPred(Dep);
636  continue;
637  }
638  AliasResult AAResult = AA->alias(
640  MMO1->getAAInfo()),
642  MMO2->getAAInfo()));
643 
644  if (AAResult != NoAlias) {
645  SDep Dep(Load, SDep::Barrier);
646  Dep.setLatency(1);
647  SU.addPred(Dep);
648  }
649  }
650  }
651  }
652  }
653 }
654 
655 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
656 /// processes dependences for PHIs. This function adds true dependences
657 /// from a PHI to a use, and a loop carried dependence from the use to the
658 /// PHI. The loop carried dependence is represented as an anti dependence
659 /// edge. This function also removes chain dependences between unrelated
660 /// PHIs.
661 void SwingSchedulerDAG::updatePhiDependences() {
662  SmallVector<SDep, 4> RemoveDeps;
664 
665  // Iterate over each DAG node.
666  for (SUnit &I : SUnits) {
667  RemoveDeps.clear();
668  // Set to true if the instruction has an operand defined by a Phi.
669  unsigned HasPhiUse = 0;
670  unsigned HasPhiDef = 0;
671  MachineInstr *MI = I.getInstr();
672  // Iterate over each operand, and we process the definitions.
674  MOE = MI->operands_end();
675  MOI != MOE; ++MOI) {
676  if (!MOI->isReg())
677  continue;
678  unsigned Reg = MOI->getReg();
679  if (MOI->isDef()) {
680  // If the register is used by a Phi, then create an anti dependence.
682  UI = MRI.use_instr_begin(Reg),
683  UE = MRI.use_instr_end();
684  UI != UE; ++UI) {
685  MachineInstr *UseMI = &*UI;
686  SUnit *SU = getSUnit(UseMI);
687  if (SU != nullptr && UseMI->isPHI()) {
688  if (!MI->isPHI()) {
689  SDep Dep(SU, SDep::Anti, Reg);
690  Dep.setLatency(1);
691  I.addPred(Dep);
692  } else {
693  HasPhiDef = Reg;
694  // Add a chain edge to a dependent Phi that isn't an existing
695  // predecessor.
696  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
697  I.addPred(SDep(SU, SDep::Barrier));
698  }
699  }
700  }
701  } else if (MOI->isUse()) {
702  // If the register is defined by a Phi, then create a true dependence.
703  MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
704  if (DefMI == nullptr)
705  continue;
706  SUnit *SU = getSUnit(DefMI);
707  if (SU != nullptr && DefMI->isPHI()) {
708  if (!MI->isPHI()) {
709  SDep Dep(SU, SDep::Data, Reg);
710  Dep.setLatency(0);
711  ST.adjustSchedDependency(SU, &I, Dep);
712  I.addPred(Dep);
713  } else {
714  HasPhiUse = Reg;
715  // Add a chain edge to a dependent Phi that isn't an existing
716  // predecessor.
717  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
718  I.addPred(SDep(SU, SDep::Barrier));
719  }
720  }
721  }
722  }
723  // Remove order dependences from an unrelated Phi.
724  if (!SwpPruneDeps)
725  continue;
726  for (auto &PI : I.Preds) {
727  MachineInstr *PMI = PI.getSUnit()->getInstr();
728  if (PMI->isPHI() && PI.getKind() == SDep::Order) {
729  if (I.getInstr()->isPHI()) {
730  if (PMI->getOperand(0).getReg() == HasPhiUse)
731  continue;
732  if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
733  continue;
734  }
735  RemoveDeps.push_back(PI);
736  }
737  }
738  for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
739  I.removePred(RemoveDeps[i]);
740  }
741 }
742 
743 /// Iterate over each DAG node and see if we can change any dependences
744 /// in order to reduce the recurrence MII.
745 void SwingSchedulerDAG::changeDependences() {
746  // See if an instruction can use a value from the previous iteration.
747  // If so, we update the base and offset of the instruction and change
748  // the dependences.
749  for (SUnit &I : SUnits) {
750  unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
751  int64_t NewOffset = 0;
752  if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
753  NewOffset))
754  continue;
755 
756  // Get the MI and SUnit for the instruction that defines the original base.
757  unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg();
758  MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
759  if (!DefMI)
760  continue;
761  SUnit *DefSU = getSUnit(DefMI);
762  if (!DefSU)
763  continue;
764  // Get the MI and SUnit for the instruction that defins the new base.
765  MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
766  if (!LastMI)
767  continue;
768  SUnit *LastSU = getSUnit(LastMI);
769  if (!LastSU)
770  continue;
771 
772  if (Topo.IsReachable(&I, LastSU))
773  continue;
774 
775  // Remove the dependence. The value now depends on a prior iteration.
777  for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
778  ++P)
779  if (P->getSUnit() == DefSU)
780  Deps.push_back(*P);
781  for (int i = 0, e = Deps.size(); i != e; i++) {
782  Topo.RemovePred(&I, Deps[i].getSUnit());
783  I.removePred(Deps[i]);
784  }
785  // Remove the chain dependence between the instructions.
786  Deps.clear();
787  for (auto &P : LastSU->Preds)
788  if (P.getSUnit() == &I && P.getKind() == SDep::Order)
789  Deps.push_back(P);
790  for (int i = 0, e = Deps.size(); i != e; i++) {
791  Topo.RemovePred(LastSU, Deps[i].getSUnit());
792  LastSU->removePred(Deps[i]);
793  }
794 
795  // Add a dependence between the new instruction and the instruction
796  // that defines the new base.
797  SDep Dep(&I, SDep::Anti, NewBase);
798  Topo.AddPred(LastSU, &I);
799  LastSU->addPred(Dep);
800 
801  // Remember the base and offset information so that we can update the
802  // instruction during code generation.
803  InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
804  }
805 }
806 
807 namespace {
808 
809 // FuncUnitSorter - Comparison operator used to sort instructions by
810 // the number of functional unit choices.
811 struct FuncUnitSorter {
814 
815  FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
816 
817  // Compute the number of functional unit alternatives needed
818  // at each stage, and take the minimum value. We prioritize the
819  // instructions by the least number of choices first.
820  unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
821  unsigned schedClass = Inst->getDesc().getSchedClass();
822  unsigned min = UINT_MAX;
823  for (const InstrStage *IS = InstrItins->beginStage(schedClass),
824  *IE = InstrItins->endStage(schedClass);
825  IS != IE; ++IS) {
826  unsigned funcUnits = IS->getUnits();
827  unsigned numAlternatives = countPopulation(funcUnits);
828  if (numAlternatives < min) {
829  min = numAlternatives;
830  F = funcUnits;
831  }
832  }
833  return min;
834  }
835 
836  // Compute the critical resources needed by the instruction. This
837  // function records the functional units needed by instructions that
838  // must use only one functional unit. We use this as a tie breaker
839  // for computing the resource MII. The instrutions that require
840  // the same, highly used, functional unit have high priority.
841  void calcCriticalResources(MachineInstr &MI) {
842  unsigned SchedClass = MI.getDesc().getSchedClass();
843  for (const InstrStage *IS = InstrItins->beginStage(SchedClass),
844  *IE = InstrItins->endStage(SchedClass);
845  IS != IE; ++IS) {
846  unsigned FuncUnits = IS->getUnits();
847  if (countPopulation(FuncUnits) == 1)
848  Resources[FuncUnits]++;
849  }
850  }
851 
852  /// Return true if IS1 has less priority than IS2.
853  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
854  unsigned F1 = 0, F2 = 0;
855  unsigned MFUs1 = minFuncUnits(IS1, F1);
856  unsigned MFUs2 = minFuncUnits(IS2, F2);
857  if (MFUs1 == 1 && MFUs2 == 1)
858  return Resources.lookup(F1) < Resources.lookup(F2);
859  return MFUs1 > MFUs2;
860  }
861 };
862 
863 } // end anonymous namespace
864 
865 /// Calculate the resource constrained minimum initiation interval for the
866 /// specified loop. We use the DFA to model the resources needed for
867 /// each instruction, and we ignore dependences. A different DFA is created
868 /// for each cycle that is required. When adding a new instruction, we attempt
869 /// to add it to each existing DFA, until a legal space is found. If the
870 /// instruction cannot be reserved in an existing DFA, we create a new one.
871 unsigned SwingSchedulerDAG::calculateResMII() {
875 
876  // Sort the instructions by the number of available choices for scheduling,
877  // least to most. Use the number of critical resources as the tie breaker.
878  FuncUnitSorter FUS =
879  FuncUnitSorter(MF.getSubtarget().getInstrItineraryData());
881  E = MBB->getFirstTerminator();
882  I != E; ++I)
883  FUS.calcCriticalResources(*I);
885  FuncUnitOrder(FUS);
886 
888  E = MBB->getFirstTerminator();
889  I != E; ++I)
890  FuncUnitOrder.push(&*I);
891 
892  while (!FuncUnitOrder.empty()) {
893  MachineInstr *MI = FuncUnitOrder.top();
894  FuncUnitOrder.pop();
895  if (TII->isZeroCost(MI->getOpcode()))
896  continue;
897  // Attempt to reserve the instruction in an existing DFA. At least one
898  // DFA is needed for each cycle.
899  unsigned NumCycles = getSUnit(MI)->Latency;
900  unsigned ReservedCycles = 0;
903  for (unsigned C = 0; C < NumCycles; ++C)
904  while (RI != RE) {
905  if ((*RI++)->canReserveResources(*MI)) {
906  ++ReservedCycles;
907  break;
908  }
909  }
910  // Start reserving resources using existing DFAs.
911  for (unsigned C = 0; C < ReservedCycles; ++C) {
912  --RI;
913  (*RI)->reserveResources(*MI);
914  }
915  // Add new DFAs, if needed, to reserve resources.
916  for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
917  DFAPacketizer *NewResource =
919  assert(NewResource->canReserveResources(*MI) && "Reserve error.");
920  NewResource->reserveResources(*MI);
921  Resources.push_back(NewResource);
922  }
923  }
924  int Resmii = Resources.size();
925  // Delete the memory for each of the DFAs that were created earlier.
926  for (DFAPacketizer *RI : Resources) {
927  DFAPacketizer *D = RI;
928  delete D;
929  }
930  Resources.clear();
931  return Resmii;
932 }
933 
934 /// Calculate the recurrence-constrainted minimum initiation interval.
935 /// Iterate over each circuit. Compute the delay(c) and distance(c)
936 /// for each circuit. The II needs to satisfy the inequality
937 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
938 /// II that satisfies the inequality, and the RecMII is the maximum
939 /// of those values.
940 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
941  unsigned RecMII = 0;
942 
943  for (NodeSet &Nodes : NodeSets) {
944  if (Nodes.empty())
945  continue;
946 
947  unsigned Delay = Nodes.getLatency();
948  unsigned Distance = 1;
949 
950  // ii = ceil(delay / distance)
951  unsigned CurMII = (Delay + Distance - 1) / Distance;
952  Nodes.setRecMII(CurMII);
953  if (CurMII > RecMII)
954  RecMII = CurMII;
955  }
956 
957  return RecMII;
958 }
959 
960 /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
961 /// but we do this to find the circuits, and then change them back.
962 static void swapAntiDependences(std::vector<SUnit> &SUnits) {
964  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
965  SUnit *SU = &SUnits[i];
966  for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
967  IP != EP; ++IP) {
968  if (IP->getKind() != SDep::Anti)
969  continue;
970  DepsAdded.push_back(std::make_pair(SU, *IP));
971  }
972  }
973  for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
974  E = DepsAdded.end();
975  I != E; ++I) {
976  // Remove this anti dependency and add one in the reverse direction.
977  SUnit *SU = I->first;
978  SDep &D = I->second;
979  SUnit *TargetSU = D.getSUnit();
980  unsigned Reg = D.getReg();
981  unsigned Lat = D.getLatency();
982  SU->removePred(D);
983  SDep Dep(SU, SDep::Anti, Reg);
984  Dep.setLatency(Lat);
985  TargetSU->addPred(Dep);
986  }
987 }
988 
989 /// Create the adjacency structure of the nodes in the graph.
990 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
991  SwingSchedulerDAG *DAG) {
992  BitVector Added(SUnits.size());
993  DenseMap<int, int> OutputDeps;
994  for (int i = 0, e = SUnits.size(); i != e; ++i) {
995  Added.reset();
996  // Add any successor to the adjacency matrix and exclude duplicates.
997  for (auto &SI : SUnits[i].Succs) {
998  // Only create a back-edge on the first and last nodes of a dependence
999  // chain. This records any chains and adds them later.
1000  if (SI.getKind() == SDep::Output) {
1001  int N = SI.getSUnit()->NodeNum;
1002  int BackEdge = i;
1003  auto Dep = OutputDeps.find(BackEdge);
1004  if (Dep != OutputDeps.end()) {
1005  BackEdge = Dep->second;
1006  OutputDeps.erase(Dep);
1007  }
1008  OutputDeps[N] = BackEdge;
1009  }
1010  // Do not process a boundary node, an artificial node.
1011  // A back-edge is processed only if it goes to a Phi.
1012  if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||
1013  (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
1014  continue;
1015  int N = SI.getSUnit()->NodeNum;
1016  if (!Added.test(N)) {
1017  AdjK[i].push_back(N);
1018  Added.set(N);
1019  }
1020  }
1021  // A chain edge between a store and a load is treated as a back-edge in the
1022  // adjacency matrix.
1023  for (auto &PI : SUnits[i].Preds) {
1024  if (!SUnits[i].getInstr()->mayStore() ||
1025  !DAG->isLoopCarriedDep(&SUnits[i], PI, false))
1026  continue;
1027  if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1028  int N = PI.getSUnit()->NodeNum;
1029  if (!Added.test(N)) {
1030  AdjK[i].push_back(N);
1031  Added.set(N);
1032  }
1033  }
1034  }
1035  }
1036  // Add back-edges in the adjacency matrix for the output dependences.
1037  for (auto &OD : OutputDeps)
1038  if (!Added.test(OD.second)) {
1039  AdjK[OD.first].push_back(OD.second);
1040  Added.set(OD.second);
1041  }
1042 }
1043 
1044 /// Identify an elementary circuit in the dependence graph starting at the
1045 /// specified node.
1046 bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1047  bool HasBackedge) {
1048  SUnit *SV = &SUnits[V];
1049  bool F = false;
1050  Stack.insert(SV);
1051  Blocked.set(V);
1052 
1053  for (auto W : AdjK[V]) {
1054  if (NumPaths > MaxPaths)
1055  break;
1056  if (W < S)
1057  continue;
1058  if (W == S) {
1059  if (!HasBackedge)
1060  NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
1061  F = true;
1062  ++NumPaths;
1063  break;
1064  } else if (!Blocked.test(W)) {
1065  if (circuit(W, S, NodeSets,
1066  Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
1067  F = true;
1068  }
1069  }
1070 
1071  if (F)
1072  unblock(V);
1073  else {
1074  for (auto W : AdjK[V]) {
1075  if (W < S)
1076  continue;
1077  if (B[W].count(SV) == 0)
1078  B[W].insert(SV);
1079  }
1080  }
1081  Stack.pop_back();
1082  return F;
1083 }
1084 
1085 /// Unblock a node in the circuit finding algorithm.
1086 void SwingSchedulerDAG::Circuits::unblock(int U) {
1087  Blocked.reset(U);
1088  SmallPtrSet<SUnit *, 4> &BU = B[U];
1089  while (!BU.empty()) {
1091  assert(SI != BU.end() && "Invalid B set.");
1092  SUnit *W = *SI;
1093  BU.erase(W);
1094  if (Blocked.test(W->NodeNum))
1095  unblock(W->NodeNum);
1096  }
1097 }
1098 
1099 /// Identify all the elementary circuits in the dependence graph using
1100 /// Johnson's circuit algorithm.
1101 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1102  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1103  // but we do this to find the circuits, and then change them back.
1104  swapAntiDependences(SUnits);
1105 
1106  Circuits Cir(SUnits, Topo);
1107  // Create the adjacency structure.
1108  Cir.createAdjacencyStructure(this);
1109  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1110  Cir.reset();
1111  Cir.circuit(i, i, NodeSets);
1112  }
1113 
1114  // Change the dependences back so that we've created a DAG again.
1115  swapAntiDependences(SUnits);
1116 }
1117 
1118 // Create artificial dependencies between the source of COPY/REG_SEQUENCE that
1119 // is loop-carried to the USE in next iteration. This will help pipeliner avoid
1120 // additional copies that are needed across iterations. An artificial dependence
1121 // edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
1122 
1123 // PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
1124 // SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
1125 // PHI-------True-Dep------> USEOfPhi
1126 
1127 // The mutation creates
1128 // USEOfPHI -------Artificial-Dep---> SRCOfCopy
1129 
1130 // This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
1131 // (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
1132 // late to avoid additional copies across iterations. The possible scheduling
1133 // order would be
1134 // USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
1135 
1137  for (SUnit &SU : DAG->SUnits) {
1138  // Find the COPY/REG_SEQUENCE instruction.
1139  if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
1140  continue;
1141 
1142  // Record the loop carried PHIs.
1143  SmallVector<SUnit *, 4> PHISUs;
1144  // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
1145  SmallVector<SUnit *, 4> SrcSUs;
1146 
1147  for (auto &Dep : SU.Preds) {
1148  SUnit *TmpSU = Dep.getSUnit();
1149  MachineInstr *TmpMI = TmpSU->getInstr();
1150  SDep::Kind DepKind = Dep.getKind();
1151  // Save the loop carried PHI.
1152  if (DepKind == SDep::Anti && TmpMI->isPHI())
1153  PHISUs.push_back(TmpSU);
1154  // Save the source of COPY/REG_SEQUENCE.
1155  // If the source has no pre-decessors, we will end up creating cycles.
1156  else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)
1157  SrcSUs.push_back(TmpSU);
1158  }
1159 
1160  if (PHISUs.size() == 0 || SrcSUs.size() == 0)
1161  continue;
1162 
1163  // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
1164  // SUnit to the container.
1165  SmallVector<SUnit *, 8> UseSUs;
1166  for (auto I = PHISUs.begin(); I != PHISUs.end(); ++I) {
1167  for (auto &Dep : (*I)->Succs) {
1168  if (Dep.getKind() != SDep::Data)
1169  continue;
1170 
1171  SUnit *TmpSU = Dep.getSUnit();
1172  MachineInstr *TmpMI = TmpSU->getInstr();
1173  if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
1174  PHISUs.push_back(TmpSU);
1175  continue;
1176  }
1177  UseSUs.push_back(TmpSU);
1178  }
1179  }
1180 
1181  if (UseSUs.size() == 0)
1182  continue;
1183 
1184  SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
1185  // Add the artificial dependencies if it does not form a cycle.
1186  for (auto I : UseSUs) {
1187  for (auto Src : SrcSUs) {
1188  if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
1189  Src->addPred(SDep(I, SDep::Artificial));
1190  SDAG->Topo.AddPred(Src, I);
1191  }
1192  }
1193  }
1194  }
1195 }
1196 
1197 /// Return true for DAG nodes that we ignore when computing the cost functions.
1198 /// We ignore the back-edge recurrence in order to avoid unbounded recursion
1199 /// in the calculation of the ASAP, ALAP, etc functions.
1200 static bool ignoreDependence(const SDep &D, bool isPred) {
1201  if (D.isArtificial())
1202  return true;
1203  return D.getKind() == SDep::Anti && isPred;
1204 }
1205 
1206 /// Compute several functions need to order the nodes for scheduling.
1207 /// ASAP - Earliest time to schedule a node.
1208 /// ALAP - Latest time to schedule a node.
1209 /// MOV - Mobility function, difference between ALAP and ASAP.
1210 /// D - Depth of each node.
1211 /// H - Height of each node.
1212 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1213  ScheduleInfo.resize(SUnits.size());
1214 
1215  LLVM_DEBUG({
1216  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
1217  E = Topo.end();
1218  I != E; ++I) {
1219  const SUnit &SU = SUnits[*I];
1220  dumpNode(SU);
1221  }
1222  });
1223 
1224  int maxASAP = 0;
1225  // Compute ASAP and ZeroLatencyDepth.
1226  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
1227  E = Topo.end();
1228  I != E; ++I) {
1229  int asap = 0;
1230  int zeroLatencyDepth = 0;
1231  SUnit *SU = &SUnits[*I];
1232  for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
1233  EP = SU->Preds.end();
1234  IP != EP; ++IP) {
1235  SUnit *pred = IP->getSUnit();
1236  if (IP->getLatency() == 0)
1237  zeroLatencyDepth =
1238  std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
1239  if (ignoreDependence(*IP, true))
1240  continue;
1241  asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() -
1242  getDistance(pred, SU, *IP) * MII));
1243  }
1244  maxASAP = std::max(maxASAP, asap);
1245  ScheduleInfo[*I].ASAP = asap;
1246  ScheduleInfo[*I].ZeroLatencyDepth = zeroLatencyDepth;
1247  }
1248 
1249  // Compute ALAP, ZeroLatencyHeight, and MOV.
1251  E = Topo.rend();
1252  I != E; ++I) {
1253  int alap = maxASAP;
1254  int zeroLatencyHeight = 0;
1255  SUnit *SU = &SUnits[*I];
1256  for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
1257  ES = SU->Succs.end();
1258  IS != ES; ++IS) {
1259  SUnit *succ = IS->getSUnit();
1260  if (IS->getLatency() == 0)
1261  zeroLatencyHeight =
1262  std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
1263  if (ignoreDependence(*IS, true))
1264  continue;
1265  alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() +
1266  getDistance(SU, succ, *IS) * MII));
1267  }
1268 
1269  ScheduleInfo[*I].ALAP = alap;
1270  ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight;
1271  }
1272 
1273  // After computing the node functions, compute the summary for each node set.
1274  for (NodeSet &I : NodeSets)
1275  I.computeNodeSetInfo(this);
1276 
1277  LLVM_DEBUG({
1278  for (unsigned i = 0; i < SUnits.size(); i++) {
1279  dbgs() << "\tNode " << i << ":\n";
1280  dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
1281  dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
1282  dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
1283  dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
1284  dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
1285  dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
1286  dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
1287  }
1288  });
1289 }
1290 
1291 /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1292 /// as the predecessors of the elements of NodeOrder that are not also in
1293 /// NodeOrder.
1296  const NodeSet *S = nullptr) {
1297  Preds.clear();
1298  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1299  I != E; ++I) {
1300  for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
1301  PI != PE; ++PI) {
1302  if (S && S->count(PI->getSUnit()) == 0)
1303  continue;
1304  if (ignoreDependence(*PI, true))
1305  continue;
1306  if (NodeOrder.count(PI->getSUnit()) == 0)
1307  Preds.insert(PI->getSUnit());
1308  }
1309  // Back-edges are predecessors with an anti-dependence.
1310  for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
1311  ES = (*I)->Succs.end();
1312  IS != ES; ++IS) {
1313  if (IS->getKind() != SDep::Anti)
1314  continue;
1315  if (S && S->count(IS->getSUnit()) == 0)
1316  continue;
1317  if (NodeOrder.count(IS->getSUnit()) == 0)
1318  Preds.insert(IS->getSUnit());
1319  }
1320  }
1321  return !Preds.empty();
1322 }
1323 
1324 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1325 /// as the successors of the elements of NodeOrder that are not also in
1326 /// NodeOrder.
1329  const NodeSet *S = nullptr) {
1330  Succs.clear();
1331  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1332  I != E; ++I) {
1333  for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
1334  SI != SE; ++SI) {
1335  if (S && S->count(SI->getSUnit()) == 0)
1336  continue;
1337  if (ignoreDependence(*SI, false))
1338  continue;
1339  if (NodeOrder.count(SI->getSUnit()) == 0)
1340  Succs.insert(SI->getSUnit());
1341  }
1342  for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
1343  PE = (*I)->Preds.end();
1344  PI != PE; ++PI) {
1345  if (PI->getKind() != SDep::Anti)
1346  continue;
1347  if (S && S->count(PI->getSUnit()) == 0)
1348  continue;
1349  if (NodeOrder.count(PI->getSUnit()) == 0)
1350  Succs.insert(PI->getSUnit());
1351  }
1352  }
1353  return !Succs.empty();
1354 }
1355 
1356 /// Return true if there is a path from the specified node to any of the nodes
1357 /// in DestNodes. Keep track and return the nodes in any path.
1358 static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
1359  SetVector<SUnit *> &DestNodes,
1360  SetVector<SUnit *> &Exclude,
1361  SmallPtrSet<SUnit *, 8> &Visited) {
1362  if (Cur->isBoundaryNode())
1363  return false;
1364  if (Exclude.count(Cur) != 0)
1365  return false;
1366  if (DestNodes.count(Cur) != 0)
1367  return true;
1368  if (!Visited.insert(Cur).second)
1369  return Path.count(Cur) != 0;
1370  bool FoundPath = false;
1371  for (auto &SI : Cur->Succs)
1372  FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1373  for (auto &PI : Cur->Preds)
1374  if (PI.getKind() == SDep::Anti)
1375  FoundPath |=
1376  computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1377  if (FoundPath)
1378  Path.insert(Cur);
1379  return FoundPath;
1380 }
1381 
1382 /// Return true if Set1 is a subset of Set2.
1383 template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
1384  for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I)
1385  if (Set2.count(*I) == 0)
1386  return false;
1387  return true;
1388 }
1389 
1390 /// Compute the live-out registers for the instructions in a node-set.
1391 /// The live-out registers are those that are defined in the node-set,
1392 /// but not used. Except for use operands of Phis.
1394  NodeSet &NS) {
1398  SmallSet<unsigned, 4> Uses;
1399  for (SUnit *SU : NS) {
1400  const MachineInstr *MI = SU->getInstr();
1401  if (MI->isPHI())
1402  continue;
1403  for (const MachineOperand &MO : MI->operands())
1404  if (MO.isReg() && MO.isUse()) {
1405  unsigned Reg = MO.getReg();
1407  Uses.insert(Reg);
1408  else if (MRI.isAllocatable(Reg))
1409  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1410  Uses.insert(*Units);
1411  }
1412  }
1413  for (SUnit *SU : NS)
1414  for (const MachineOperand &MO : SU->getInstr()->operands())
1415  if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1416  unsigned Reg = MO.getReg();
1418  if (!Uses.count(Reg))
1419  LiveOutRegs.push_back(RegisterMaskPair(Reg,
1421  } else if (MRI.isAllocatable(Reg)) {
1422  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1423  if (!Uses.count(*Units))
1424  LiveOutRegs.push_back(RegisterMaskPair(*Units,
1426  }
1427  }
1428  RPTracker.addLiveRegs(LiveOutRegs);
1429 }
1430 
1431 /// A heuristic to filter nodes in recurrent node-sets if the register
1432 /// pressure of a set is too high.
1433 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1434  for (auto &NS : NodeSets) {
1435  // Skip small node-sets since they won't cause register pressure problems.
1436  if (NS.size() <= 2)
1437  continue;
1438  IntervalPressure RecRegPressure;
1439  RegPressureTracker RecRPTracker(RecRegPressure);
1440  RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
1441  computeLiveOuts(MF, RecRPTracker, NS);
1442  RecRPTracker.closeBottom();
1443 
1444  std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1445  llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
1446  return A->NodeNum > B->NodeNum;
1447  });
1448 
1449  for (auto &SU : SUnits) {
1450  // Since we're computing the register pressure for a subset of the
1451  // instructions in a block, we need to set the tracker for each
1452  // instruction in the node-set. The tracker is set to the instruction
1453  // just after the one we're interested in.
1454  MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
1455  RecRPTracker.setPos(std::next(CurInstI));
1456 
1457  RegPressureDelta RPDelta;
1458  ArrayRef<PressureChange> CriticalPSets;
1459  RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
1460  CriticalPSets,
1461  RecRegPressure.MaxSetPressure);
1462  if (RPDelta.Excess.isValid()) {
1463  LLVM_DEBUG(
1464  dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
1465  << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
1466  << ":" << RPDelta.Excess.getUnitInc());
1467  NS.setExceedPressure(SU);
1468  break;
1469  }
1470  RecRPTracker.recede();
1471  }
1472  }
1473 }
1474 
1475 /// A heuristic to colocate node sets that have the same set of
1476 /// successors.
1477 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
1478  unsigned Colocate = 0;
1479  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
1480  NodeSet &N1 = NodeSets[i];
1482  if (N1.empty() || !succ_L(N1, S1))
1483  continue;
1484  for (int j = i + 1; j < e; ++j) {
1485  NodeSet &N2 = NodeSets[j];
1486  if (N1.compareRecMII(N2) != 0)
1487  continue;
1489  if (N2.empty() || !succ_L(N2, S2))
1490  continue;
1491  if (isSubset(S1, S2) && S1.size() == S2.size()) {
1492  N1.setColocate(++Colocate);
1493  N2.setColocate(Colocate);
1494  break;
1495  }
1496  }
1497  }
1498 }
1499 
1500 /// Check if the existing node-sets are profitable. If not, then ignore the
1501 /// recurrent node-sets, and attempt to schedule all nodes together. This is
1502 /// a heuristic. If the MII is large and all the recurrent node-sets are small,
1503 /// then it's best to try to schedule all instructions together instead of
1504 /// starting with the recurrent node-sets.
1505 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
1506  // Look for loops with a large MII.
1507  if (MII < 17)
1508  return;
1509  // Check if the node-set contains only a simple add recurrence.
1510  for (auto &NS : NodeSets) {
1511  if (NS.getRecMII() > 2)
1512  return;
1513  if (NS.getMaxDepth() > MII)
1514  return;
1515  }
1516  NodeSets.clear();
1517  LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
1518  return;
1519 }
1520 
1521 /// Add the nodes that do not belong to a recurrence set into groups
1522 /// based upon connected componenets.
1523 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
1524  SetVector<SUnit *> NodesAdded;
1525  SmallPtrSet<SUnit *, 8> Visited;
1526  // Add the nodes that are on a path between the previous node sets and
1527  // the current node set.
1528  for (NodeSet &I : NodeSets) {
1530  // Add the nodes from the current node set to the previous node set.
1531  if (succ_L(I, N)) {
1532  SetVector<SUnit *> Path;
1533  for (SUnit *NI : N) {
1534  Visited.clear();
1535  computePath(NI, Path, NodesAdded, I, Visited);
1536  }
1537  if (!Path.empty())
1538  I.insert(Path.begin(), Path.end());
1539  }
1540  // Add the nodes from the previous node set to the current node set.
1541  N.clear();
1542  if (succ_L(NodesAdded, N)) {
1543  SetVector<SUnit *> Path;
1544  for (SUnit *NI : N) {
1545  Visited.clear();
1546  computePath(NI, Path, I, NodesAdded, Visited);
1547  }
1548  if (!Path.empty())
1549  I.insert(Path.begin(), Path.end());
1550  }
1551  NodesAdded.insert(I.begin(), I.end());
1552  }
1553 
1554  // Create a new node set with the connected nodes of any successor of a node
1555  // in a recurrent set.
1556  NodeSet NewSet;
1558  if (succ_L(NodesAdded, N))
1559  for (SUnit *I : N)
1560  addConnectedNodes(I, NewSet, NodesAdded);
1561  if (!NewSet.empty())
1562  NodeSets.push_back(NewSet);
1563 
1564  // Create a new node set with the connected nodes of any predecessor of a node
1565  // in a recurrent set.
1566  NewSet.clear();
1567  if (pred_L(NodesAdded, N))
1568  for (SUnit *I : N)
1569  addConnectedNodes(I, NewSet, NodesAdded);
1570  if (!NewSet.empty())
1571  NodeSets.push_back(NewSet);
1572 
1573  // Create new nodes sets with the connected nodes any remaining node that
1574  // has no predecessor.
1575  for (unsigned i = 0; i < SUnits.size(); ++i) {
1576  SUnit *SU = &SUnits[i];
1577  if (NodesAdded.count(SU) == 0) {
1578  NewSet.clear();
1579  addConnectedNodes(SU, NewSet, NodesAdded);
1580  if (!NewSet.empty())
1581  NodeSets.push_back(NewSet);
1582  }
1583  }
1584 }
1585 
1586 /// Add the node to the set, and add all of its connected nodes to the set.
1587 void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
1588  SetVector<SUnit *> &NodesAdded) {
1589  NewSet.insert(SU);
1590  NodesAdded.insert(SU);
1591  for (auto &SI : SU->Succs) {
1592  SUnit *Successor = SI.getSUnit();
1593  if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
1594  addConnectedNodes(Successor, NewSet, NodesAdded);
1595  }
1596  for (auto &PI : SU->Preds) {
1597  SUnit *Predecessor = PI.getSUnit();
1598  if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
1599  addConnectedNodes(Predecessor, NewSet, NodesAdded);
1600  }
1601 }
1602 
1603 /// Return true if Set1 contains elements in Set2. The elements in common
1604 /// are returned in a different container.
1605 static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
1606  SmallSetVector<SUnit *, 8> &Result) {
1607  Result.clear();
1608  for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
1609  SUnit *SU = Set1[i];
1610  if (Set2.count(SU) != 0)
1611  Result.insert(SU);
1612  }
1613  return !Result.empty();
1614 }
1615 
1616 /// Merge the recurrence node sets that have the same initial node.
1617 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
1618  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1619  ++I) {
1620  NodeSet &NI = *I;
1621  for (NodeSetType::iterator J = I + 1; J != E;) {
1622  NodeSet &NJ = *J;
1623  if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
1624  if (NJ.compareRecMII(NI) > 0)
1625  NI.setRecMII(NJ.getRecMII());
1626  for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
1627  ++NII)
1628  I->insert(*NII);
1629  NodeSets.erase(J);
1630  E = NodeSets.end();
1631  } else {
1632  ++J;
1633  }
1634  }
1635  }
1636 }
1637 
1638 /// Remove nodes that have been scheduled in previous NodeSets.
1639 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
1640  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1641  ++I)
1642  for (NodeSetType::iterator J = I + 1; J != E;) {
1643  J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
1644 
1645  if (J->empty()) {
1646  NodeSets.erase(J);
1647  E = NodeSets.end();
1648  } else {
1649  ++J;
1650  }
1651  }
1652 }
1653 
1654 /// Compute an ordered list of the dependence graph nodes, which
1655 /// indicates the order that the nodes will be scheduled. This is a
1656 /// two-level algorithm. First, a partial order is created, which
1657 /// consists of a list of sets ordered from highest to lowest priority.
1658 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
1660  NodeOrder.clear();
1661 
1662  for (auto &Nodes : NodeSets) {
1663  LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
1664  OrderKind Order;
1666  if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
1667  R.insert(N.begin(), N.end());
1668  Order = BottomUp;
1669  LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
1670  } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
1671  R.insert(N.begin(), N.end());
1672  Order = TopDown;
1673  LLVM_DEBUG(dbgs() << " Top down (succs) ");
1674  } else if (isIntersect(N, Nodes, R)) {
1675  // If some of the successors are in the existing node-set, then use the
1676  // top-down ordering.
1677  Order = TopDown;
1678  LLVM_DEBUG(dbgs() << " Top down (intersect) ");
1679  } else if (NodeSets.size() == 1) {
1680  for (auto &N : Nodes)
1681  if (N->Succs.size() == 0)
1682  R.insert(N);
1683  Order = BottomUp;
1684  LLVM_DEBUG(dbgs() << " Bottom up (all) ");
1685  } else {
1686  // Find the node with the highest ASAP.
1687  SUnit *maxASAP = nullptr;
1688  for (SUnit *SU : Nodes) {
1689  if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||
1690  (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))
1691  maxASAP = SU;
1692  }
1693  R.insert(maxASAP);
1694  Order = BottomUp;
1695  LLVM_DEBUG(dbgs() << " Bottom up (default) ");
1696  }
1697 
1698  while (!R.empty()) {
1699  if (Order == TopDown) {
1700  // Choose the node with the maximum height. If more than one, choose
1701  // the node wiTH the maximum ZeroLatencyHeight. If still more than one,
1702  // choose the node with the lowest MOV.
1703  while (!R.empty()) {
1704  SUnit *maxHeight = nullptr;
1705  for (SUnit *I : R) {
1706  if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
1707  maxHeight = I;
1708  else if (getHeight(I) == getHeight(maxHeight) &&
1709  getZeroLatencyHeight(I) > getZeroLatencyHeight(maxHeight))
1710  maxHeight = I;
1711  else if (getHeight(I) == getHeight(maxHeight) &&
1712  getZeroLatencyHeight(I) ==
1713  getZeroLatencyHeight(maxHeight) &&
1714  getMOV(I) < getMOV(maxHeight))
1715  maxHeight = I;
1716  }
1717  NodeOrder.insert(maxHeight);
1718  LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
1719  R.remove(maxHeight);
1720  for (const auto &I : maxHeight->Succs) {
1721  if (Nodes.count(I.getSUnit()) == 0)
1722  continue;
1723  if (NodeOrder.count(I.getSUnit()) != 0)
1724  continue;
1725  if (ignoreDependence(I, false))
1726  continue;
1727  R.insert(I.getSUnit());
1728  }
1729  // Back-edges are predecessors with an anti-dependence.
1730  for (const auto &I : maxHeight->Preds) {
1731  if (I.getKind() != SDep::Anti)
1732  continue;
1733  if (Nodes.count(I.getSUnit()) == 0)
1734  continue;
1735  if (NodeOrder.count(I.getSUnit()) != 0)
1736  continue;
1737  R.insert(I.getSUnit());
1738  }
1739  }
1740  Order = BottomUp;
1741  LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
1743  if (pred_L(NodeOrder, N, &Nodes))
1744  R.insert(N.begin(), N.end());
1745  } else {
1746  // Choose the node with the maximum depth. If more than one, choose
1747  // the node with the maximum ZeroLatencyDepth. If still more than one,
1748  // choose the node with the lowest MOV.
1749  while (!R.empty()) {
1750  SUnit *maxDepth = nullptr;
1751  for (SUnit *I : R) {
1752  if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
1753  maxDepth = I;
1754  else if (getDepth(I) == getDepth(maxDepth) &&
1755  getZeroLatencyDepth(I) > getZeroLatencyDepth(maxDepth))
1756  maxDepth = I;
1757  else if (getDepth(I) == getDepth(maxDepth) &&
1758  getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&
1759  getMOV(I) < getMOV(maxDepth))
1760  maxDepth = I;
1761  }
1762  NodeOrder.insert(maxDepth);
1763  LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
1764  R.remove(maxDepth);
1765  if (Nodes.isExceedSU(maxDepth)) {
1766  Order = TopDown;
1767  R.clear();
1768  R.insert(Nodes.getNode(0));
1769  break;
1770  }
1771  for (const auto &I : maxDepth->Preds) {
1772  if (Nodes.count(I.getSUnit()) == 0)
1773  continue;
1774  if (NodeOrder.count(I.getSUnit()) != 0)
1775  continue;
1776  R.insert(I.getSUnit());
1777  }
1778  // Back-edges are predecessors with an anti-dependence.
1779  for (const auto &I : maxDepth->Succs) {
1780  if (I.getKind() != SDep::Anti)
1781  continue;
1782  if (Nodes.count(I.getSUnit()) == 0)
1783  continue;
1784  if (NodeOrder.count(I.getSUnit()) != 0)
1785  continue;
1786  R.insert(I.getSUnit());
1787  }
1788  }
1789  Order = TopDown;
1790  LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
1792  if (succ_L(NodeOrder, N, &Nodes))
1793  R.insert(N.begin(), N.end());
1794  }
1795  }
1796  LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
1797  }
1798 
1799  LLVM_DEBUG({
1800  dbgs() << "Node order: ";
1801  for (SUnit *I : NodeOrder)
1802  dbgs() << " " << I->NodeNum << " ";
1803  dbgs() << "\n";
1804  });
1805 }
1806 
1807 /// Process the nodes in the computed order and create the pipelined schedule
1808 /// of the instructions, if possible. Return true if a schedule is found.
1809 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
1810  if (NodeOrder.empty())
1811  return false;
1812 
1813  bool scheduleFound = false;
1814  unsigned II = 0;
1815  // Keep increasing II until a valid schedule is found.
1816  for (II = MII; II <= MAX_II && !scheduleFound; ++II) {
1817  Schedule.reset();
1818  Schedule.setInitiationInterval(II);
1819  LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
1820 
1823  do {
1824  SUnit *SU = *NI;
1825 
1826  // Compute the schedule time for the instruction, which is based
1827  // upon the scheduled time for any predecessors/successors.
1828  int EarlyStart = INT_MIN;
1829  int LateStart = INT_MAX;
1830  // These values are set when the size of the schedule window is limited
1831  // due to chain dependences.
1832  int SchedEnd = INT_MAX;
1833  int SchedStart = INT_MIN;
1834  Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
1835  II, this);
1836  LLVM_DEBUG({
1837  dbgs() << "Inst (" << SU->NodeNum << ") ";
1838  SU->getInstr()->dump();
1839  dbgs() << "\n";
1840  });
1841  LLVM_DEBUG({
1842  dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart
1843  << " me: " << SchedEnd << " ms: " << SchedStart << "\n";
1844  });
1845 
1846  if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
1847  SchedStart > LateStart)
1848  scheduleFound = false;
1849  else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
1850  SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
1851  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
1852  } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
1853  SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
1854  scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
1855  } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
1856  SchedEnd =
1857  std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
1858  // When scheduling a Phi it is better to start at the late cycle and go
1859  // backwards. The default order may insert the Phi too far away from
1860  // its first dependence.
1861  if (SU->getInstr()->isPHI())
1862  scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
1863  else
1864  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
1865  } else {
1866  int FirstCycle = Schedule.getFirstCycle();
1867  scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
1868  FirstCycle + getASAP(SU) + II - 1, II);
1869  }
1870  // Even if we find a schedule, make sure the schedule doesn't exceed the
1871  // allowable number of stages. We keep trying if this happens.
1872  if (scheduleFound)
1873  if (SwpMaxStages > -1 &&
1874  Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
1875  scheduleFound = false;
1876 
1877  LLVM_DEBUG({
1878  if (!scheduleFound)
1879  dbgs() << "\tCan't schedule\n";
1880  });
1881  } while (++NI != NE && scheduleFound);
1882 
1883  // If a schedule is found, check if it is a valid schedule too.
1884  if (scheduleFound)
1885  scheduleFound = Schedule.isValidSchedule(this);
1886  }
1887 
1888  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II
1889  << ")\n");
1890 
1891  if (scheduleFound)
1892  Schedule.finalizeSchedule(this);
1893  else
1894  Schedule.reset();
1895 
1896  return scheduleFound && Schedule.getMaxStageCount() > 0;
1897 }
1898 
1899 /// Given a schedule for the loop, generate a new version of the loop,
1900 /// and replace the old version. This function generates a prolog
1901 /// that contains the initial iterations in the pipeline, and kernel
1902 /// loop, and the epilogue that contains the code for the final
1903 /// iterations.
1904 void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
1905  // Create a new basic block for the kernel and add it to the CFG.
1906  MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
1907 
1908  unsigned MaxStageCount = Schedule.getMaxStageCount();
1909 
1910  // Remember the registers that are used in different stages. The index is
1911  // the iteration, or stage, that the instruction is scheduled in. This is
1912  // a map between register names in the original block and the names created
1913  // in each stage of the pipelined loop.
1914  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
1915  InstrMapTy InstrMap;
1916 
1918  // Generate the prolog instructions that set up the pipeline.
1919  generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
1920  MF.insert(BB->getIterator(), KernelBB);
1921 
1922  // Rearrange the instructions to generate the new, pipelined loop,
1923  // and update register names as needed.
1924  for (int Cycle = Schedule.getFirstCycle(),
1925  LastCycle = Schedule.getFinalCycle();
1926  Cycle <= LastCycle; ++Cycle) {
1927  std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle);
1928  // This inner loop schedules each instruction in the cycle.
1929  for (SUnit *CI : CycleInstrs) {
1930  if (CI->getInstr()->isPHI())
1931  continue;
1932  unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr()));
1933  MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
1934  updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap);
1935  KernelBB->push_back(NewMI);
1936  InstrMap[NewMI] = CI->getInstr();
1937  }
1938  }
1939 
1940  // Copy any terminator instructions to the new kernel, and update
1941  // names as needed.
1942  for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
1943  E = BB->instr_end();
1944  I != E; ++I) {
1945  MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
1946  updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap);
1947  KernelBB->push_back(NewMI);
1948  InstrMap[NewMI] = &*I;
1949  }
1950 
1951  KernelBB->transferSuccessors(BB);
1952  KernelBB->replaceSuccessor(BB, KernelBB);
1953 
1954  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule,
1955  VRMap, InstrMap, MaxStageCount, MaxStageCount, false);
1956  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
1957  InstrMap, MaxStageCount, MaxStageCount, false);
1958 
1959  LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
1960 
1962  // Generate the epilog instructions to complete the pipeline.
1963  generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
1964  PrologBBs);
1965 
1966  // We need this step because the register allocation doesn't handle some
1967  // situations well, so we insert copies to help out.
1968  splitLifetimes(KernelBB, EpilogBBs, Schedule);
1969 
1970  // Remove dead instructions due to loop induction variables.
1971  removeDeadInstructions(KernelBB, EpilogBBs);
1972 
1973  // Add branches between prolog and epilog blocks.
1974  addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
1975 
1976  // Remove the original loop since it's no longer referenced.
1977  for (auto &I : *BB)
1978  LIS.RemoveMachineInstrFromMaps(I);
1979  BB->clear();
1980  BB->eraseFromParent();
1981 
1982  delete[] VRMap;
1983 }
1984 
1985 /// Generate the pipeline prolog code.
1986 void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
1987  MachineBasicBlock *KernelBB,
1988  ValueMapTy *VRMap,
1989  MBBVectorTy &PrologBBs) {
1990  MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
1991  assert(PreheaderBB != nullptr &&
1992  "Need to add code to handle loops w/o preheader");
1993  MachineBasicBlock *PredBB = PreheaderBB;
1994  InstrMapTy InstrMap;
1995 
1996  // Generate a basic block for each stage, not including the last stage,
1997  // which will be generated in the kernel. Each basic block may contain
1998  // instructions from multiple stages/iterations.
1999  for (unsigned i = 0; i < LastStage; ++i) {
2000  // Create and insert the prolog basic block prior to the original loop
2001  // basic block. The original loop is removed later.
2002  MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2003  PrologBBs.push_back(NewBB);
2004  MF.insert(BB->getIterator(), NewBB);
2005  NewBB->transferSuccessors(PredBB);
2006  PredBB->addSuccessor(NewBB);
2007  PredBB = NewBB;
2008 
2009  // Generate instructions for each appropriate stage. Process instructions
2010  // in original program order.
2011  for (int StageNum = i; StageNum >= 0; --StageNum) {
2012  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2013  BBE = BB->getFirstTerminator();
2014  BBI != BBE; ++BBI) {
2015  if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) {
2016  if (BBI->isPHI())
2017  continue;
2018  MachineInstr *NewMI =
2019  cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule);
2020  updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule,
2021  VRMap);
2022  NewBB->push_back(NewMI);
2023  InstrMap[NewMI] = &*BBI;
2024  }
2025  }
2026  }
2027  rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
2028  LLVM_DEBUG({
2029  dbgs() << "prolog:\n";
2030  NewBB->dump();
2031  });
2032  }
2033 
2034  PredBB->replaceSuccessor(BB, KernelBB);
2035 
2036  // Check if we need to remove the branch from the preheader to the original
2037  // loop, and replace it with a branch to the new loop.
2038  unsigned numBranches = TII->removeBranch(*PreheaderBB);
2039  if (numBranches) {
2041  TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc());
2042  }
2043 }
2044 
2045 /// Generate the pipeline epilog code. The epilog code finishes the iterations
2046 /// that were started in either the prolog or the kernel. We create a basic
2047 /// block for each stage that needs to complete.
2048 void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
2049  MachineBasicBlock *KernelBB,
2050  ValueMapTy *VRMap,
2051  MBBVectorTy &EpilogBBs,
2052  MBBVectorTy &PrologBBs) {
2053  // We need to change the branch from the kernel to the first epilog block, so
2054  // this call to analyze branch uses the kernel rather than the original BB.
2055  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2057  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
2058  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
2059  if (checkBranch)
2060  return;
2061 
2062  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
2063  if (*LoopExitI == KernelBB)
2064  ++LoopExitI;
2065  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
2066  MachineBasicBlock *LoopExitBB = *LoopExitI;
2067 
2068  MachineBasicBlock *PredBB = KernelBB;
2069  MachineBasicBlock *EpilogStart = LoopExitBB;
2070  InstrMapTy InstrMap;
2071 
2072  // Generate a basic block for each stage, not including the last stage,
2073  // which was generated for the kernel. Each basic block may contain
2074  // instructions from multiple stages/iterations.
2075  int EpilogStage = LastStage + 1;
2076  for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
2078  EpilogBBs.push_back(NewBB);
2079  MF.insert(BB->getIterator(), NewBB);
2080 
2081  PredBB->replaceSuccessor(LoopExitBB, NewBB);
2082  NewBB->addSuccessor(LoopExitBB);
2083 
2084  if (EpilogStart == LoopExitBB)
2085  EpilogStart = NewBB;
2086 
2087  // Add instructions to the epilog depending on the current block.
2088  // Process instructions in original program order.
2089  for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
2090  for (auto &BBI : *BB) {
2091  if (BBI.isPHI())
2092  continue;
2093  MachineInstr *In = &BBI;
2094  if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
2095  // Instructions with memoperands in the epilog are updated with
2096  // conservative values.
2097  MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
2098  updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
2099  NewBB->push_back(NewMI);
2100  InstrMap[NewMI] = In;
2101  }
2102  }
2103  }
2104  generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
2105  VRMap, InstrMap, LastStage, EpilogStage, i == 1);
2106  generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
2107  InstrMap, LastStage, EpilogStage, i == 1);
2108  PredBB = NewBB;
2109 
2110  LLVM_DEBUG({
2111  dbgs() << "epilog:\n";
2112  NewBB->dump();
2113  });
2114  }
2115 
2116  // Fix any Phi nodes in the loop exit block.
2117  for (MachineInstr &MI : *LoopExitBB) {
2118  if (!MI.isPHI())
2119  break;
2120  for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
2121  MachineOperand &MO = MI.getOperand(i);
2122  if (MO.getMBB() == BB)
2123  MO.setMBB(PredBB);
2124  }
2125  }
2126 
2127  // Create a branch to the new epilog from the kernel.
2128  // Remove the original branch and add a new branch to the epilog.
2129  TII->removeBranch(*KernelBB);
2130  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
2131  // Add a branch to the loop exit.
2132  if (EpilogBBs.size() > 0) {
2133  MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
2135  TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
2136  }
2137 }
2138 
2139 /// Replace all uses of FromReg that appear outside the specified
2140 /// basic block with ToReg.
2141 static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
2142  MachineBasicBlock *MBB,
2144  LiveIntervals &LIS) {
2145  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
2146  E = MRI.use_end();
2147  I != E;) {
2148  MachineOperand &O = *I;
2149  ++I;
2150  if (O.getParent()->getParent() != MBB)
2151  O.setReg(ToReg);
2152  }
2153  if (!LIS.hasInterval(ToReg))
2154  LIS.createEmptyInterval(ToReg);
2155 }
2156 
2157 /// Return true if the register has a use that occurs outside the
2158 /// specified loop.
2159 static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
2162  E = MRI.use_end();
2163  I != E; ++I)
2164  if (I->getParent()->getParent() != BB)
2165  return true;
2166  return false;
2167 }
2168 
2169 /// Generate Phis for the specific block in the generated pipelined code.
2170 /// This function looks at the Phis from the original code to guide the
2171 /// creation of new Phis.
2172 void SwingSchedulerDAG::generateExistingPhis(
2174  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2175  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2176  bool IsLast) {
2177  // Compute the stage number for the initial value of the Phi, which
2178  // comes from the prolog. The prolog to use depends on to which kernel/
2179  // epilog that we're adding the Phi.
2180  unsigned PrologStage = 0;
2181  unsigned PrevStage = 0;
2182  bool InKernel = (LastStageNum == CurStageNum);
2183  if (InKernel) {
2184  PrologStage = LastStageNum - 1;
2185  PrevStage = CurStageNum;
2186  } else {
2187  PrologStage = LastStageNum - (CurStageNum - LastStageNum);
2188  PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
2189  }
2190 
2191  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2192  BBE = BB->getFirstNonPHI();
2193  BBI != BBE; ++BBI) {
2194  unsigned Def = BBI->getOperand(0).getReg();
2195 
2196  unsigned InitVal = 0;
2197  unsigned LoopVal = 0;
2198  getPhiRegs(*BBI, BB, InitVal, LoopVal);
2199 
2200  unsigned PhiOp1 = 0;
2201  // The Phi value from the loop body typically is defined in the loop, but
2202  // not always. So, we need to check if the value is defined in the loop.
2203  unsigned PhiOp2 = LoopVal;
2204  if (VRMap[LastStageNum].count(LoopVal))
2205  PhiOp2 = VRMap[LastStageNum][LoopVal];
2206 
2207  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2208  int LoopValStage =
2209  Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
2210  unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum);
2211  if (NumStages == 0) {
2212  // We don't need to generate a Phi anymore, but we need to rename any uses
2213  // of the Phi value.
2214  unsigned NewReg = VRMap[PrevStage][LoopVal];
2215  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
2216  Def, InitVal, NewReg);
2217  if (VRMap[CurStageNum].count(LoopVal))
2218  VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
2219  }
2220  // Adjust the number of Phis needed depending on the number of prologs left,
2221  // and the distance from where the Phi is first scheduled. The number of
2222  // Phis cannot exceed the number of prolog stages. Each stage can
2223  // potentially define two values.
2224  unsigned MaxPhis = PrologStage + 2;
2225  if (!InKernel && (int)PrologStage <= LoopValStage)
2226  MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
2227  unsigned NumPhis = std::min(NumStages, MaxPhis);
2228 
2229  unsigned NewReg = 0;
2230  unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
2231  // In the epilog, we may need to look back one stage to get the correct
2232  // Phi name because the epilog and prolog blocks execute the same stage.
2233  // The correct name is from the previous block only when the Phi has
2234  // been completely scheduled prior to the epilog, and Phi value is not
2235  // needed in multiple stages.
2236  int StageDiff = 0;
2237  if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
2238  NumPhis == 1)
2239  StageDiff = 1;
2240  // Adjust the computations below when the phi and the loop definition
2241  // are scheduled in different stages.
2242  if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
2243  StageDiff = StageScheduled - LoopValStage;
2244  for (unsigned np = 0; np < NumPhis; ++np) {
2245  // If the Phi hasn't been scheduled, then use the initial Phi operand
2246  // value. Otherwise, use the scheduled version of the instruction. This
2247  // is a little complicated when a Phi references another Phi.
2248  if (np > PrologStage || StageScheduled >= (int)LastStageNum)
2249  PhiOp1 = InitVal;
2250  // Check if the Phi has already been scheduled in a prolog stage.
2251  else if (PrologStage >= AccessStage + StageDiff + np &&
2252  VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
2253  PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
2254  // Check if the Phi has already been scheduled, but the loop instruction
2255  // is either another Phi, or doesn't occur in the loop.
2256  else if (PrologStage >= AccessStage + StageDiff + np) {
2257  // If the Phi references another Phi, we need to examine the other
2258  // Phi to get the correct value.
2259  PhiOp1 = LoopVal;
2260  MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
2261  int Indirects = 1;
2262  while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
2263  int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1));
2264  if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
2265  PhiOp1 = getInitPhiReg(*InstOp1, BB);
2266  else
2267  PhiOp1 = getLoopPhiReg(*InstOp1, BB);
2268  InstOp1 = MRI.getVRegDef(PhiOp1);
2269  int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1));
2270  int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
2271  if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
2272  VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
2273  PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
2274  break;
2275  }
2276  ++Indirects;
2277  }
2278  } else
2279  PhiOp1 = InitVal;
2280  // If this references a generated Phi in the kernel, get the Phi operand
2281  // from the incoming block.
2282  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
2283  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2284  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2285 
2286  MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
2287  bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
2288  // In the epilog, a map lookup is needed to get the value from the kernel,
2289  // or previous epilog block. How is does this depends on if the
2290  // instruction is scheduled in the previous block.
2291  if (!InKernel) {
2292  int StageDiffAdj = 0;
2293  if (LoopValStage != -1 && StageScheduled > LoopValStage)
2294  StageDiffAdj = StageScheduled - LoopValStage;
2295  // Use the loop value defined in the kernel, unless the kernel
2296  // contains the last definition of the Phi.
2297  if (np == 0 && PrevStage == LastStageNum &&
2298  (StageScheduled != 0 || LoopValStage != 0) &&
2299  VRMap[PrevStage - StageDiffAdj].count(LoopVal))
2300  PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
2301  // Use the value defined by the Phi. We add one because we switch
2302  // from looking at the loop value to the Phi definition.
2303  else if (np > 0 && PrevStage == LastStageNum &&
2304  VRMap[PrevStage - np + 1].count(Def))
2305  PhiOp2 = VRMap[PrevStage - np + 1][Def];
2306  // Use the loop value defined in the kernel.
2307  else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
2308  VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
2309  PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
2310  // Use the value defined by the Phi, unless we're generating the first
2311  // epilog and the Phi refers to a Phi in a different stage.
2312  else if (VRMap[PrevStage - np].count(Def) &&
2313  (!LoopDefIsPhi || PrevStage != LastStageNum))
2314  PhiOp2 = VRMap[PrevStage - np][Def];
2315  }
2316 
2317  // Check if we can reuse an existing Phi. This occurs when a Phi
2318  // references another Phi, and the other Phi is scheduled in an
2319  // earlier stage. We can try to reuse an existing Phi up until the last
2320  // stage of the current Phi.
2321  if (LoopDefIsPhi) {
2322  if (static_cast<int>(PrologStage - np) >= StageScheduled) {
2323  int LVNumStages = Schedule.getStagesForPhi(LoopVal);
2324  int StageDiff = (StageScheduled - LoopValStage);
2325  LVNumStages -= StageDiff;
2326  // Make sure the loop value Phi has been processed already.
2327  if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
2328  NewReg = PhiOp2;
2329  unsigned ReuseStage = CurStageNum;
2330  if (Schedule.isLoopCarried(this, *PhiInst))
2331  ReuseStage -= LVNumStages;
2332  // Check if the Phi to reuse has been generated yet. If not, then
2333  // there is nothing to reuse.
2334  if (VRMap[ReuseStage - np].count(LoopVal)) {
2335  NewReg = VRMap[ReuseStage - np][LoopVal];
2336 
2337  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2338  &*BBI, Def, NewReg);
2339  // Update the map with the new Phi name.
2340  VRMap[CurStageNum - np][Def] = NewReg;
2341  PhiOp2 = NewReg;
2342  if (VRMap[LastStageNum - np - 1].count(LoopVal))
2343  PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
2344 
2345  if (IsLast && np == NumPhis - 1)
2346  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2347  continue;
2348  }
2349  }
2350  }
2351  if (InKernel && StageDiff > 0 &&
2352  VRMap[CurStageNum - StageDiff - np].count(LoopVal))
2353  PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
2354  }
2355 
2356  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2357  NewReg = MRI.createVirtualRegister(RC);
2358 
2359  MachineInstrBuilder NewPhi =
2360  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2361  TII->get(TargetOpcode::PHI), NewReg);
2362  NewPhi.addReg(PhiOp1).addMBB(BB1);
2363  NewPhi.addReg(PhiOp2).addMBB(BB2);
2364  if (np == 0)
2365  InstrMap[NewPhi] = &*BBI;
2366 
2367  // We define the Phis after creating the new pipelined code, so
2368  // we need to rename the Phi values in scheduled instructions.
2369 
2370  unsigned PrevReg = 0;
2371  if (InKernel && VRMap[PrevStage - np].count(LoopVal))
2372  PrevReg = VRMap[PrevStage - np][LoopVal];
2373  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2374  Def, NewReg, PrevReg);
2375  // If the Phi has been scheduled, use the new name for rewriting.
2376  if (VRMap[CurStageNum - np].count(Def)) {
2377  unsigned R = VRMap[CurStageNum - np][Def];
2378  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
2379  R, NewReg);
2380  }
2381 
2382  // Check if we need to rename any uses that occurs after the loop. The
2383  // register to replace depends on whether the Phi is scheduled in the
2384  // epilog.
2385  if (IsLast && np == NumPhis - 1)
2386  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2387 
2388  // In the kernel, a dependent Phi uses the value from this Phi.
2389  if (InKernel)
2390  PhiOp2 = NewReg;
2391 
2392  // Update the map with the new Phi name.
2393  VRMap[CurStageNum - np][Def] = NewReg;
2394  }
2395 
2396  while (NumPhis++ < NumStages) {
2397  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
2398  &*BBI, Def, NewReg, 0);
2399  }
2400 
2401  // Check if we need to rename a Phi that has been eliminated due to
2402  // scheduling.
2403  if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
2404  replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
2405  }
2406 }
2407 
2408 /// Generate Phis for the specified block in the generated pipelined code.
2409 /// These are new Phis needed because the definition is scheduled after the
2410 /// use in the pipelined sequence.
2411 void SwingSchedulerDAG::generatePhis(
2413  MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
2414  InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
2415  bool IsLast) {
2416  // Compute the stage number that contains the initial Phi value, and
2417  // the Phi from the previous stage.
2418  unsigned PrologStage = 0;
2419  unsigned PrevStage = 0;
2420  unsigned StageDiff = CurStageNum - LastStageNum;
2421  bool InKernel = (StageDiff == 0);
2422  if (InKernel) {
2423  PrologStage = LastStageNum - 1;
2424  PrevStage = CurStageNum;
2425  } else {
2426  PrologStage = LastStageNum - StageDiff;
2427  PrevStage = LastStageNum + StageDiff - 1;
2428  }
2429 
2430  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
2431  BBE = BB->instr_end();
2432  BBI != BBE; ++BBI) {
2433  for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
2434  MachineOperand &MO = BBI->getOperand(i);
2435  if (!MO.isReg() || !MO.isDef() ||
2437  continue;
2438 
2439  int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
2440  assert(StageScheduled != -1 && "Expecting scheduled instruction.");
2441  unsigned Def = MO.getReg();
2442  unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum);
2443  // An instruction scheduled in stage 0 and is used after the loop
2444  // requires a phi in the epilog for the last definition from either
2445  // the kernel or prolog.
2446  if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
2447  hasUseAfterLoop(Def, BB, MRI))
2448  NumPhis = 1;
2449  if (!InKernel && (unsigned)StageScheduled > PrologStage)
2450  continue;
2451 
2452  unsigned PhiOp2 = VRMap[PrevStage][Def];
2453  if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
2454  if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
2455  PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
2456  // The number of Phis can't exceed the number of prolog stages. The
2457  // prolog stage number is zero based.
2458  if (NumPhis > PrologStage + 1 - StageScheduled)
2459  NumPhis = PrologStage + 1 - StageScheduled;
2460  for (unsigned np = 0; np < NumPhis; ++np) {
2461  unsigned PhiOp1 = VRMap[PrologStage][Def];
2462  if (np <= PrologStage)
2463  PhiOp1 = VRMap[PrologStage - np][Def];
2464  if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
2465  if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
2466  PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
2467  if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
2468  PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
2469  }
2470  if (!InKernel)
2471  PhiOp2 = VRMap[PrevStage - np][Def];
2472 
2473  const TargetRegisterClass *RC = MRI.getRegClass(Def);
2474  unsigned NewReg = MRI.createVirtualRegister(RC);
2475 
2476  MachineInstrBuilder NewPhi =
2477  BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
2478  TII->get(TargetOpcode::PHI), NewReg);
2479  NewPhi.addReg(PhiOp1).addMBB(BB1);
2480  NewPhi.addReg(PhiOp2).addMBB(BB2);
2481  if (np == 0)
2482  InstrMap[NewPhi] = &*BBI;
2483 
2484  // Rewrite uses and update the map. The actions depend upon whether
2485  // we generating code for the kernel or epilog blocks.
2486  if (InKernel) {
2487  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2488  &*BBI, PhiOp1, NewReg);
2489  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2490  &*BBI, PhiOp2, NewReg);
2491 
2492  PhiOp2 = NewReg;
2493  VRMap[PrevStage - np - 1][Def] = NewReg;
2494  } else {
2495  VRMap[CurStageNum - np][Def] = NewReg;
2496  if (np == NumPhis - 1)
2497  rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
2498  &*BBI, Def, NewReg);
2499  }
2500  if (IsLast && np == NumPhis - 1)
2501  replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
2502  }
2503  }
2504  }
2505 }
2506 
2507 /// Remove instructions that generate values with no uses.
2508 /// Typically, these are induction variable operations that generate values
2509 /// used in the loop itself. A dead instruction has a definition with
2510 /// no uses, or uses that occur in the original loop only.
2511 void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
2512  MBBVectorTy &EpilogBBs) {
2513  // For each epilog block, check that the value defined by each instruction
2514  // is used. If not, delete it.
2515  for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
2516  MBE = EpilogBBs.rend();
2517  MBB != MBE; ++MBB)
2518  for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
2519  ME = (*MBB)->instr_rend();
2520  MI != ME;) {
2521  // From DeadMachineInstructionElem. Don't delete inline assembly.
2522  if (MI->isInlineAsm()) {
2523  ++MI;
2524  continue;
2525  }
2526  bool SawStore = false;
2527  // Check if it's safe to remove the instruction due to side effects.
2528  // We can, and want to, remove Phis here.
2529  if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
2530  ++MI;
2531  continue;
2532  }
2533  bool used = true;
2534  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
2535  MOE = MI->operands_end();
2536  MOI != MOE; ++MOI) {
2537  if (!MOI->isReg() || !MOI->isDef())
2538  continue;
2539  unsigned reg = MOI->getReg();
2540  // Assume physical registers are used, unless they are marked dead.
2542  used = !MOI->isDead();
2543  if (used)
2544  break;
2545  continue;
2546  }
2547  unsigned realUses = 0;
2548  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
2549  EI = MRI.use_end();
2550  UI != EI; ++UI) {
2551  // Check if there are any uses that occur only in the original
2552  // loop. If so, that's not a real use.
2553  if (UI->getParent()->getParent() != BB) {
2554  realUses++;
2555  used = true;
2556  break;
2557  }
2558  }
2559  if (realUses > 0)
2560  break;
2561  used = false;
2562  }
2563  if (!used) {
2564  LIS.RemoveMachineInstrFromMaps(*MI);
2565  MI++->eraseFromParent();
2566  continue;
2567  }
2568  ++MI;
2569  }
2570  // In the kernel block, check if we can remove a Phi that generates a value
2571  // used in an instruction removed in the epilog block.
2572  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
2573  BBE = KernelBB->getFirstNonPHI();
2574  BBI != BBE;) {
2575  MachineInstr *MI = &*BBI;
2576  ++BBI;
2577  unsigned reg = MI->getOperand(0).getReg();
2578  if (MRI.use_begin(reg) == MRI.use_end()) {
2579  LIS.RemoveMachineInstrFromMaps(*MI);
2580  MI->eraseFromParent();
2581  }
2582  }
2583 }
2584 
2585 /// For loop carried definitions, we split the lifetime of a virtual register
2586 /// that has uses past the definition in the next iteration. A copy with a new
2587 /// virtual register is inserted before the definition, which helps with
2588 /// generating a better register assignment.
2589 ///
2590 /// v1 = phi(a, v2) v1 = phi(a, v2)
2591 /// v2 = phi(b, v3) v2 = phi(b, v3)
2592 /// v3 = .. v4 = copy v1
2593 /// .. = V1 v3 = ..
2594 /// .. = v4
2595 void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
2596  MBBVectorTy &EpilogBBs,
2597  SMSchedule &Schedule) {
2599  for (auto &PHI : KernelBB->phis()) {
2600  unsigned Def = PHI.getOperand(0).getReg();
2601  // Check for any Phi definition that used as an operand of another Phi
2602  // in the same block.
2603  for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
2604  E = MRI.use_instr_end();
2605  I != E; ++I) {
2606  if (I->isPHI() && I->getParent() == KernelBB) {
2607  // Get the loop carried definition.
2608  unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
2609  if (!LCDef)
2610  continue;
2611  MachineInstr *MI = MRI.getVRegDef(LCDef);
2612  if (!MI || MI->getParent() != KernelBB || MI->isPHI())
2613  continue;
2614  // Search through the rest of the block looking for uses of the Phi
2615  // definition. If one occurs, then split the lifetime.
2616  unsigned SplitReg = 0;
2617  for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
2618  KernelBB->instr_end()))
2619  if (BBJ.readsRegister(Def)) {
2620  // We split the lifetime when we find the first use.
2621  if (SplitReg == 0) {
2622  SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
2623  BuildMI(*KernelBB, MI, MI->getDebugLoc(),
2624  TII->get(TargetOpcode::COPY), SplitReg)
2625  .addReg(Def);
2626  }
2627  BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
2628  }
2629  if (!SplitReg)
2630  continue;
2631  // Search through each of the epilog blocks for any uses to be renamed.
2632  for (auto &Epilog : EpilogBBs)
2633  for (auto &I : *Epilog)
2634  if (I.readsRegister(Def))
2635  I.substituteRegister(Def, SplitReg, 0, *TRI);
2636  break;
2637  }
2638  }
2639  }
2640 }
2641 
2642 /// Remove the incoming block from the Phis in a basic block.
2643 static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
2644  for (MachineInstr &MI : *BB) {
2645  if (!MI.isPHI())
2646  break;
2647  for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
2648  if (MI.getOperand(i + 1).getMBB() == Incoming) {
2649  MI.RemoveOperand(i + 1);
2650  MI.RemoveOperand(i);
2651  break;
2652  }
2653  }
2654 }
2655 
2656 /// Create branches from each prolog basic block to the appropriate epilog
2657 /// block. These edges are needed if the loop ends before reaching the
2658 /// kernel.
2659 void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
2660  MachineBasicBlock *KernelBB,
2661  MBBVectorTy &EpilogBBs,
2662  SMSchedule &Schedule, ValueMapTy *VRMap) {
2663  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
2664  MachineInstr *IndVar = Pass.LI.LoopInductionVar;
2665  MachineInstr *Cmp = Pass.LI.LoopCompare;
2666  MachineBasicBlock *LastPro = KernelBB;
2667  MachineBasicBlock *LastEpi = KernelBB;
2668 
2669  // Start from the blocks connected to the kernel and work "out"
2670  // to the first prolog and the last epilog blocks.
2672  unsigned MaxIter = PrologBBs.size() - 1;
2673  unsigned LC = UINT_MAX;
2674  unsigned LCMin = UINT_MAX;
2675  for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
2676  // Add branches to the prolog that go to the corresponding
2677  // epilog, and the fall-thru prolog/kernel block.
2678  MachineBasicBlock *Prolog = PrologBBs[j];
2679  MachineBasicBlock *Epilog = EpilogBBs[i];
2680  // We've executed one iteration, so decrement the loop count and check for
2681  // the loop end.
2683  // Check if the LOOP0 has already been removed. If so, then there is no need
2684  // to reduce the trip count.
2685  if (LC != 0)
2686  LC = TII->reduceLoopCount(*Prolog, IndVar, *Cmp, Cond, PrevInsts, j,
2687  MaxIter);
2688 
2689  // Record the value of the first trip count, which is used to determine if
2690  // branches and blocks can be removed for constant trip counts.
2691  if (LCMin == UINT_MAX)
2692  LCMin = LC;
2693 
2694  unsigned numAdded = 0;
2696  Prolog->addSuccessor(Epilog);
2697  numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
2698  } else if (j >= LCMin) {
2699  Prolog->addSuccessor(Epilog);
2700  Prolog->removeSuccessor(LastPro);
2701  LastEpi->removeSuccessor(Epilog);
2702  numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
2703  removePhis(Epilog, LastEpi);
2704  // Remove the blocks that are no longer referenced.
2705  if (LastPro != LastEpi) {
2706  LastEpi->clear();
2707  LastEpi->eraseFromParent();
2708  }
2709  LastPro->clear();
2710  LastPro->eraseFromParent();
2711  } else {
2712  numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
2713  removePhis(Epilog, Prolog);
2714  }
2715  LastPro = Prolog;
2716  LastEpi = Epilog;
2718  E = Prolog->instr_rend();
2719  I != E && numAdded > 0; ++I, --numAdded)
2720  updateInstruction(&*I, false, j, 0, Schedule, VRMap);
2721  }
2722 }
2723 
2724 /// Return true if we can compute the amount the instruction changes
2725 /// during each iteration. Set Delta to the amount of the change.
2726 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
2728  MachineOperand *BaseOp;
2729  int64_t Offset;
2730  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
2731  return false;
2732 
2733  if (!BaseOp->isReg())
2734  return false;
2735 
2736  unsigned BaseReg = BaseOp->getReg();
2737 
2739  // Check if there is a Phi. If so, get the definition in the loop.
2740  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
2741  if (BaseDef && BaseDef->isPHI()) {
2742  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
2743  BaseDef = MRI.getVRegDef(BaseReg);
2744  }
2745  if (!BaseDef)
2746  return false;
2747 
2748  int D = 0;
2749  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
2750  return false;
2751 
2752  Delta = D;
2753  return true;
2754 }
2755 
2756 /// Update the memory operand with a new offset when the pipeliner
2757 /// generates a new copy of the instruction that refers to a
2758 /// different memory location.
2759 void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
2760  MachineInstr &OldMI, unsigned Num) {
2761  if (Num == 0)
2762  return;
2763  // If the instruction has memory operands, then adjust the offset
2764  // when the instruction appears in different stages.
2765  if (NewMI.memoperands_empty())
2766  return;
2768  for (MachineMemOperand *MMO : NewMI.memoperands()) {
2769  // TODO: Figure out whether isAtomic is really necessary (see D57601).
2770  if (MMO->isVolatile() || MMO->isAtomic() ||
2771  (MMO->isInvariant() && MMO->isDereferenceable()) ||
2772  (!MMO->getValue())) {
2773  NewMMOs.push_back(MMO);
2774  continue;
2775  }
2776  unsigned Delta;
2777  if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
2778  int64_t AdjOffset = Delta * Num;
2779  NewMMOs.push_back(
2780  MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
2781  } else {
2782  NewMMOs.push_back(
2784  }
2785  }
2786  NewMI.setMemRefs(MF, NewMMOs);
2787 }
2788 
2789 /// Clone the instruction for the new pipelined loop and update the
2790 /// memory operands, if needed.
2791 MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI,
2792  unsigned CurStageNum,
2793  unsigned InstStageNum) {
2794  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2795  // Check for tied operands in inline asm instructions. This should be handled
2796  // elsewhere, but I'm not sure of the best solution.
2797  if (OldMI->isInlineAsm())
2798  for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
2799  const auto &MO = OldMI->getOperand(i);
2800  if (MO.isReg() && MO.isUse())
2801  break;
2802  unsigned UseIdx;
2803  if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
2804  NewMI->tieOperands(i, UseIdx);
2805  }
2806  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
2807  return NewMI;
2808 }
2809 
2810 /// Clone the instruction for the new pipelined loop. If needed, this
2811 /// function updates the instruction using the values saved in the
2812 /// InstrChanges structure.
2813 MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI,
2814  unsigned CurStageNum,
2815  unsigned InstStageNum,
2816  SMSchedule &Schedule) {
2817  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2819  InstrChanges.find(getSUnit(OldMI));
2820  if (It != InstrChanges.end()) {
2821  std::pair<unsigned, int64_t> RegAndOffset = It->second;
2822  unsigned BasePos, OffsetPos;
2823  if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
2824  return nullptr;
2825  int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
2826  MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
2827  if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
2828  NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
2829  NewMI->getOperand(OffsetPos).setImm(NewOffset);
2830  }
2831  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
2832  return NewMI;
2833 }
2834 
2835 /// Update the machine instruction with new virtual registers. This
2836 /// function may change the defintions and/or uses.
2837 void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef,
2838  unsigned CurStageNum,
2839  unsigned InstrStageNum,
2840  SMSchedule &Schedule,
2841  ValueMapTy *VRMap) {
2842  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
2843  MachineOperand &MO = NewMI->getOperand(i);
2845  continue;
2846  unsigned reg = MO.getReg();
2847  if (MO.isDef()) {
2848  // Create a new virtual register for the definition.
2849  const TargetRegisterClass *RC = MRI.getRegClass(reg);
2850  unsigned NewReg = MRI.createVirtualRegister(RC);
2851  MO.setReg(NewReg);
2852  VRMap[CurStageNum][reg] = NewReg;
2853  if (LastDef)
2854  replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
2855  } else if (MO.isUse()) {
2856  MachineInstr *Def = MRI.getVRegDef(reg);
2857  // Compute the stage that contains the last definition for instruction.
2858  int DefStageNum = Schedule.stageScheduled(getSUnit(Def));
2859  unsigned StageNum = CurStageNum;
2860  if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
2861  // Compute the difference in stages between the defintion and the use.
2862  unsigned StageDiff = (InstrStageNum - DefStageNum);
2863  // Make an adjustment to get the last definition.
2864  StageNum -= StageDiff;
2865  }
2866  if (VRMap[StageNum].count(reg))
2867  MO.setReg(VRMap[StageNum][reg]);
2868  }
2869  }
2870 }
2871 
2872 /// Return the instruction in the loop that defines the register.
2873 /// If the definition is a Phi, then follow the Phi operand to
2874 /// the instruction in the loop.
2875 MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
2877  MachineInstr *Def = MRI.getVRegDef(Reg);
2878  while (Def->isPHI()) {
2879  if (!Visited.insert(Def).second)
2880  break;
2881  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
2882  if (Def->getOperand(i + 1).getMBB() == BB) {
2883  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
2884  break;
2885  }
2886  }
2887  return Def;
2888 }
2889 
2890 /// Return the new name for the value from the previous stage.
2891 unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
2892  unsigned LoopVal, unsigned LoopStage,
2893  ValueMapTy *VRMap,
2894  MachineBasicBlock *BB) {
2895  unsigned PrevVal = 0;
2896  if (StageNum > PhiStage) {
2897  MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
2898  if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
2899  // The name is defined in the previous stage.
2900  PrevVal = VRMap[StageNum - 1][LoopVal];
2901  else if (VRMap[StageNum].count(LoopVal))
2902  // The previous name is defined in the current stage when the instruction
2903  // order is swapped.
2904  PrevVal = VRMap[StageNum][LoopVal];
2905  else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
2906  // The loop value hasn't yet been scheduled.
2907  PrevVal = LoopVal;
2908  else if (StageNum == PhiStage + 1)
2909  // The loop value is another phi, which has not been scheduled.
2910  PrevVal = getInitPhiReg(*LoopInst, BB);
2911  else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
2912  // The loop value is another phi, which has been scheduled.
2913  PrevVal =
2914  getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
2915  LoopStage, VRMap, BB);
2916  }
2917  return PrevVal;
2918 }
2919 
2920 /// Rewrite the Phi values in the specified block to use the mappings
2921 /// from the initial operand. Once the Phi is scheduled, we switch
2922 /// to using the loop value instead of the Phi value, so those names
2923 /// do not need to be rewritten.
2924 void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
2925  unsigned StageNum,
2926  SMSchedule &Schedule,
2927  ValueMapTy *VRMap,
2928  InstrMapTy &InstrMap) {
2929  for (auto &PHI : BB->phis()) {
2930  unsigned InitVal = 0;
2931  unsigned LoopVal = 0;
2932  getPhiRegs(PHI, BB, InitVal, LoopVal);
2933  unsigned PhiDef = PHI.getOperand(0).getReg();
2934 
2935  unsigned PhiStage =
2936  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
2937  unsigned LoopStage =
2938  (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
2939  unsigned NumPhis = Schedule.getStagesForPhi(PhiDef);
2940  if (NumPhis > StageNum)
2941  NumPhis = StageNum;
2942  for (unsigned np = 0; np <= NumPhis; ++np) {
2943  unsigned NewVal =
2944  getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
2945  if (!NewVal)
2946  NewVal = InitVal;
2947  rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI,
2948  PhiDef, NewVal);
2949  }
2950  }
2951 }
2952 
2953 /// Rewrite a previously scheduled instruction to use the register value
2954 /// from the new instruction. Make sure the instruction occurs in the
2955 /// basic block, and we don't change the uses in the new instruction.
2956 void SwingSchedulerDAG::rewriteScheduledInstr(
2957  MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap,
2958  unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg,
2959  unsigned NewReg, unsigned PrevReg) {
2960  bool InProlog = (CurStageNum < Schedule.getMaxStageCount());
2961  int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum;
2962  // Rewrite uses that have been scheduled already to use the new
2963  // Phi register.
2964  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
2965  EI = MRI.use_end();
2966  UI != EI;) {
2967  MachineOperand &UseOp = *UI;
2968  MachineInstr *UseMI = UseOp.getParent();
2969  ++UI;
2970  if (UseMI->getParent() != BB)
2971  continue;
2972  if (UseMI->isPHI()) {
2973  if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
2974  continue;
2975  if (getLoopPhiReg(*UseMI, BB) != OldReg)
2976  continue;
2977  }
2978  InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
2979  assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
2980  SUnit *OrigMISU = getSUnit(OrigInstr->second);
2981  int StageSched = Schedule.stageScheduled(OrigMISU);
2982  int CycleSched = Schedule.cycleScheduled(OrigMISU);
2983  unsigned ReplaceReg = 0;
2984  // This is the stage for the scheduled instruction.
2985  if (StagePhi == StageSched && Phi->isPHI()) {
2986  int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi));
2987  if (PrevReg && InProlog)
2988  ReplaceReg = PrevReg;
2989  else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) &&
2990  (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI()))
2991  ReplaceReg = PrevReg;
2992  else
2993  ReplaceReg = NewReg;
2994  }
2995  // The scheduled instruction occurs before the scheduled Phi, and the
2996  // Phi is not loop carried.
2997  if (!InProlog && StagePhi + 1 == StageSched &&
2998  !Schedule.isLoopCarried(this, *Phi))
2999  ReplaceReg = NewReg;
3000  if (StagePhi > StageSched && Phi->isPHI())
3001  ReplaceReg = NewReg;
3002  if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
3003  ReplaceReg = NewReg;
3004  if (ReplaceReg) {
3005  MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
3006  UseOp.setReg(ReplaceReg);
3007  }
3008  }
3009 }
3010 
3011 /// Check if we can change the instruction to use an offset value from the
3012 /// previous iteration. If so, return true and set the base and offset values
3013 /// so that we can rewrite the load, if necessary.
3014 /// v1 = Phi(v0, v3)
3015 /// v2 = load v1, 0
3016 /// v3 = post_store v1, 4, x
3017 /// This function enables the load to be rewritten as v2 = load v3, 4.
3018 bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
3019  unsigned &BasePos,
3020  unsigned &OffsetPos,
3021  unsigned &NewBase,
3022  int64_t &Offset) {
3023  // Get the load instruction.
3024  if (TII->isPostIncrement(*MI))
3025  return false;
3026  unsigned BasePosLd, OffsetPosLd;
3027  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
3028  return false;
3029  unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
3030 
3031  // Look for the Phi instruction.
3033  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
3034  if (!Phi || !Phi->isPHI())
3035  return false;
3036  // Get the register defined in the loop block.
3037  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
3038  if (!PrevReg)
3039  return false;
3040 
3041  // Check for the post-increment load/store instruction.
3042  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
3043  if (!PrevDef || PrevDef == MI)
3044  return false;
3045 
3046  if (!TII->isPostIncrement(*PrevDef))
3047  return false;
3048 
3049  unsigned BasePos1 = 0, OffsetPos1 = 0;
3050  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
3051  return false;
3052 
3053  // Make sure that the instructions do not access the same memory location in
3054  // the next iteration.
3055  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
3056  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
3057  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3058  NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
3059  bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
3060  MF.DeleteMachineInstr(NewMI);
3061  if (!Disjoint)
3062  return false;
3063 
3064  // Set the return value once we determine that we return true.
3065  BasePos = BasePosLd;
3066  OffsetPos = OffsetPosLd;
3067  NewBase = PrevReg;
3068  Offset = StoreOffset;
3069  return true;
3070 }
3071 
3072 /// Apply changes to the instruction if needed. The changes are need
3073 /// to improve the scheduling and depend up on the final schedule.
3075  SMSchedule &Schedule) {
3076  SUnit *SU = getSUnit(MI);
3078  InstrChanges.find(SU);
3079  if (It != InstrChanges.end()) {
3080  std::pair<unsigned, int64_t> RegAndOffset = It->second;
3081  unsigned BasePos, OffsetPos;
3082  if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3083  return;
3084  unsigned BaseReg = MI->getOperand(BasePos).getReg();
3085  MachineInstr *LoopDef = findDefInLoop(BaseReg);
3086  int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
3087  int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
3088  int BaseStageNum = Schedule.stageScheduled(SU);
3089  int BaseCycleNum = Schedule.cycleScheduled(SU);
3090  if (BaseStageNum < DefStageNum) {
3091  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3092  int OffsetDiff = DefStageNum - BaseStageNum;
3093  if (DefCycleNum < BaseCycleNum) {
3094  NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
3095  if (OffsetDiff > 0)
3096  --OffsetDiff;
3097  }
3098  int64_t NewOffset =
3099  MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
3100  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3101  SU->setInstr(NewMI);
3102  MISUnitMap[NewMI] = SU;
3103  NewMIs.insert(NewMI);
3104  }
3105  }
3106 }
3107 
3108 /// Return true for an order or output dependence that is loop carried
3109 /// potentially. A dependence is loop carried if the destination defines a valu
3110 /// that may be used or defined by the source in a subsequent iteration.
3112  bool isSucc) {
3113  if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
3114  Dep.isArtificial())
3115  return false;
3116 
3117  if (!SwpPruneLoopCarried)
3118  return true;
3119 
3120  if (Dep.getKind() == SDep::Output)
3121  return true;
3122 
3123  MachineInstr *SI = Source->getInstr();
3124  MachineInstr *DI = Dep.getSUnit()->getInstr();
3125  if (!isSucc)
3126  std::swap(SI, DI);
3127  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
3128 
3129  // Assume ordered loads and stores may have a loop carried dependence.
3130  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
3132  return true;
3133 
3134  // Only chain dependences between a load and store can be loop carried.
3135  if (!DI->mayStore() || !SI->mayLoad())
3136  return false;
3137 
3138  unsigned DeltaS, DeltaD;
3139  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
3140  return true;
3141 
3142  MachineOperand *BaseOpS, *BaseOpD;
3143  int64_t OffsetS, OffsetD;
3145  if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) ||
3146  !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI))
3147  return true;
3148 
3149  if (!BaseOpS->isIdenticalTo(*BaseOpD))
3150  return true;
3151 
3152  // Check that the base register is incremented by a constant value for each
3153  // iteration.
3154  MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg());
3155  if (!Def || !Def->isPHI())
3156  return true;
3157  unsigned InitVal = 0;
3158  unsigned LoopVal = 0;
3159  getPhiRegs(*Def, BB, InitVal, LoopVal);
3160  MachineInstr *LoopDef = MRI.getVRegDef(LoopVal);
3161  int D = 0;
3162  if (!LoopDef || !TII->getIncrementValue(*LoopDef, D))
3163  return true;
3164 
3165  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
3166  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
3167 
3168  // This is the main test, which checks the offset values and the loop
3169  // increment value to determine if the accesses may be loop carried.
3170  if (OffsetS >= OffsetD)
3171  return OffsetS + AccessSizeS > DeltaS;
3172  else
3173  return OffsetD + AccessSizeD > DeltaD;
3174 
3175  return true;
3176 }
3177 
3178 void SwingSchedulerDAG::postprocessDAG() {
3179  for (auto &M : Mutations)
3180  M->apply(this);
3181 }
3182 
3183 /// Try to schedule the node at the specified StartCycle and continue
3184 /// until the node is schedule or the EndCycle is reached. This function
3185 /// returns true if the node is scheduled. This routine may search either
3186 /// forward or backward for a place to insert the instruction based upon
3187 /// the relative values of StartCycle and EndCycle.
3188 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
3189  bool forward = true;
3190  if (StartCycle > EndCycle)
3191  forward = false;
3192 
3193  // The terminating condition depends on the direction.
3194  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
3195  for (int curCycle = StartCycle; curCycle != termCycle;
3196  forward ? ++curCycle : --curCycle) {
3197 
3198  // Add the already scheduled instructions at the specified cycle to the DFA.
3199  Resources->clearResources();
3200  for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
3201  checkCycle <= LastCycle; checkCycle += II) {
3202  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
3203 
3204  for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
3205  E = cycleInstrs.end();
3206  I != E; ++I) {
3207  if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
3208  continue;
3209  assert(Resources->canReserveResources(*(*I)->getInstr()) &&
3210  "These instructions have already been scheduled.");
3211  Resources->reserveResources(*(*I)->getInstr());
3212  }
3213  }
3214  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
3215  Resources->canReserveResources(*SU->getInstr())) {
3216  LLVM_DEBUG({
3217  dbgs() << "\tinsert at cycle " << curCycle << " ";
3218  SU->getInstr()->dump();
3219  });
3220 
3221  ScheduledInstrs[curCycle].push_back(SU);
3222  InstrToCycle.insert(std::make_pair(SU, curCycle));
3223  if (curCycle > LastCycle)
3224  LastCycle = curCycle;
3225  if (curCycle < FirstCycle)
3226  FirstCycle = curCycle;
3227  return true;
3228  }
3229  LLVM_DEBUG({
3230  dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
3231  SU->getInstr()->dump();
3232  });
3233  }
3234  return false;
3235 }
3236 
3237 // Return the cycle of the earliest scheduled instruction in the chain.
3239  SmallPtrSet<SUnit *, 8> Visited;
3240  SmallVector<SDep, 8> Worklist;
3241  Worklist.push_back(Dep);
3242  int EarlyCycle = INT_MAX;
3243  while (!Worklist.empty()) {
3244  const SDep &Cur = Worklist.pop_back_val();
3245  SUnit *PrevSU = Cur.getSUnit();
3246  if (Visited.count(PrevSU))
3247  continue;
3248  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
3249  if (it == InstrToCycle.end())
3250  continue;
3251  EarlyCycle = std::min(EarlyCycle, it->second);
3252  for (const auto &PI : PrevSU->Preds)
3253  if (PI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
3254  Worklist.push_back(PI);
3255  Visited.insert(PrevSU);
3256  }
3257  return EarlyCycle;
3258 }
3259 
3260 // Return the cycle of the latest scheduled instruction in the chain.
3262  SmallPtrSet<SUnit *, 8> Visited;
3263  SmallVector<SDep, 8> Worklist;
3264  Worklist.push_back(Dep);
3265  int LateCycle = INT_MIN;
3266  while (!Worklist.empty()) {
3267  const SDep &Cur = Worklist.pop_back_val();
3268  SUnit *SuccSU = Cur.getSUnit();
3269  if (Visited.count(SuccSU))
3270  continue;
3271  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
3272  if (it == InstrToCycle.end())
3273  continue;
3274  LateCycle = std::max(LateCycle, it->second);
3275  for (const auto &SI : SuccSU->Succs)
3276  if (SI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
3277  Worklist.push_back(SI);
3278  Visited.insert(SuccSU);
3279  }
3280  return LateCycle;
3281 }
3282 
3283 /// If an instruction has a use that spans multiple iterations, then
3284 /// return true. These instructions are characterized by having a back-ege
3285 /// to a Phi, which contains a reference to another Phi.
3287  for (auto &P : SU->Preds)
3288  if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
3289  for (auto &S : P.getSUnit()->Succs)
3290  if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
3291  return P.getSUnit();
3292  return nullptr;
3293 }
3294 
3295 /// Compute the scheduling start slot for the instruction. The start slot
3296 /// depends on any predecessor or successor nodes scheduled already.
3297 void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
3298  int *MinEnd, int *MaxStart, int II,
3299  SwingSchedulerDAG *DAG) {
3300  // Iterate over each instruction that has been scheduled already. The start
3301  // slot computation depends on whether the previously scheduled instruction
3302  // is a predecessor or successor of the specified instruction.
3303  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
3304 
3305  // Iterate over each instruction in the current cycle.
3306  for (SUnit *I : getInstructions(cycle)) {
3307  // Because we're processing a DAG for the dependences, we recognize
3308  // the back-edge in recurrences by anti dependences.
3309  for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
3310  const SDep &Dep = SU->Preds[i];
3311  if (Dep.getSUnit() == I) {
3312  if (!DAG->isBackedge(SU, Dep)) {
3313  int EarlyStart = cycle + Dep.getLatency() -
3314  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3315  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3316  if (DAG->isLoopCarriedDep(SU, Dep, false)) {
3317  int End = earliestCycleInChain(Dep) + (II - 1);
3318  *MinEnd = std::min(*MinEnd, End);
3319  }
3320  } else {
3321  int LateStart = cycle - Dep.getLatency() +
3322  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3323  *MinLateStart = std::min(*MinLateStart, LateStart);
3324  }
3325  }
3326  // For instruction that requires multiple iterations, make sure that
3327  // the dependent instruction is not scheduled past the definition.
3328  SUnit *BE = multipleIterations(I, DAG);
3329  if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
3330  !SU->isPred(I))
3331  *MinLateStart = std::min(*MinLateStart, cycle);
3332  }
3333  for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {
3334  if (SU->Succs[i].getSUnit() == I) {
3335  const SDep &Dep = SU->Succs[i];
3336  if (!DAG->isBackedge(SU, Dep)) {
3337  int LateStart = cycle - Dep.getLatency() +
3338  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
3339  *MinLateStart = std::min(*MinLateStart, LateStart);
3340  if (DAG->isLoopCarriedDep(SU, Dep)) {
3341  int Start = latestCycleInChain(Dep) + 1 - II;
3342  *MaxStart = std::max(*MaxStart, Start);
3343  }
3344  } else {
3345  int EarlyStart = cycle + Dep.getLatency() -
3346  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
3347  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
3348  }
3349  }
3350  }
3351  }
3352  }
3353 }
3354 
3355 /// Order the instructions within a cycle so that the definitions occur
3356 /// before the uses. Returns true if the instruction is added to the start
3357 /// of the list, or false if added to the end.
3359  std::deque<SUnit *> &Insts) {
3360  MachineInstr *MI = SU->getInstr();
3361  bool OrderBeforeUse = false;
3362  bool OrderAfterDef = false;
3363  bool OrderBeforeDef = false;
3364  unsigned MoveDef = 0;
3365  unsigned MoveUse = 0;
3366  int StageInst1 = stageScheduled(SU);
3367 
3368  unsigned Pos = 0;
3369  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
3370  ++I, ++Pos) {
3371  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3372  MachineOperand &MO = MI->getOperand(i);
3374  continue;
3375 
3376  unsigned Reg = MO.getReg();
3377  unsigned BasePos, OffsetPos;
3378  if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
3379  if (MI->getOperand(BasePos).getReg() == Reg)
3380  if (unsigned NewReg = SSD->getInstrBaseReg(SU))
3381  Reg = NewReg;
3382  bool Reads, Writes;
3383  std::tie(Reads, Writes) =
3384  (*I)->getInstr()->readsWritesVirtualRegister(Reg);
3385  if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
3386  OrderBeforeUse = true;
3387  if (MoveUse == 0)
3388  MoveUse = Pos;
3389  } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
3390  // Add the instruction after the scheduled instruction.
3391  OrderAfterDef = true;
3392  MoveDef = Pos;
3393  } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
3394  if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
3395  OrderBeforeUse = true;
3396  if (MoveUse == 0)
3397  MoveUse = Pos;
3398  } else {
3399  OrderAfterDef = true;
3400  MoveDef = Pos;
3401  }
3402  } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
3403  OrderBeforeUse = true;
3404  if (MoveUse == 0)
3405  MoveUse = Pos;
3406  if (MoveUse != 0) {
3407  OrderAfterDef = true;
3408  MoveDef = Pos - 1;
3409  }
3410  } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
3411  // Add the instruction before the scheduled instruction.
3412  OrderBeforeUse = true;
3413  if (MoveUse == 0)
3414  MoveUse = Pos;
3415  } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
3416  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
3417  if (MoveUse == 0) {
3418  OrderBeforeDef = true;
3419  MoveUse = Pos;
3420  }
3421  }
3422  }
3423  // Check for order dependences between instructions. Make sure the source
3424  // is ordered before the destination.
3425  for (auto &S : SU->Succs) {
3426  if (S.getSUnit() != *I)
3427  continue;
3428  if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
3429  OrderBeforeUse = true;
3430  if (Pos < MoveUse)
3431  MoveUse = Pos;
3432  }
3433  }
3434  for (auto &P : SU->Preds) {
3435  if (P.getSUnit() != *I)
3436  continue;
3437  if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
3438  OrderAfterDef = true;
3439  MoveDef = Pos;
3440  }
3441  }
3442  }
3443 
3444  // A circular dependence.
3445  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
3446  OrderBeforeUse = false;
3447 
3448  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
3449  // to a loop-carried dependence.
3450  if (OrderBeforeDef)
3451  OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
3452 
3453  // The uncommon case when the instruction order needs to be updated because
3454  // there is both a use and def.
3455  if (OrderBeforeUse && OrderAfterDef) {
3456  SUnit *UseSU = Insts.at(MoveUse);
3457  SUnit *DefSU = Insts.at(MoveDef);
3458  if (MoveUse > MoveDef) {
3459  Insts.erase(Insts.begin() + MoveUse);
3460  Insts.erase(Insts.begin() + MoveDef);
3461  } else {
3462  Insts.erase(Insts.begin() + MoveDef);
3463  Insts.erase(Insts.begin() + MoveUse);
3464  }
3465  orderDependence(SSD, UseSU, Insts);
3466  orderDependence(SSD, SU, Insts);
3467  orderDependence(SSD, DefSU, Insts);
3468  return;
3469  }
3470  // Put the new instruction first if there is a use in the list. Otherwise,
3471  // put it at the end of the list.
3472  if (OrderBeforeUse)
3473  Insts.push_front(SU);
3474  else
3475  Insts.push_back(SU);
3476 }
3477 
3478 /// Return true if the scheduled Phi has a loop carried operand.
3480  if (!Phi.isPHI())
3481  return false;
3482  assert(Phi.isPHI() && "Expecting a Phi.");
3483  SUnit *DefSU = SSD->getSUnit(&Phi);
3484  unsigned DefCycle = cycleScheduled(DefSU);
3485  int DefStage = stageScheduled(DefSU);
3486 
3487  unsigned InitVal = 0;
3488  unsigned LoopVal = 0;
3489  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
3490  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
3491  if (!UseSU)
3492  return true;
3493  if (UseSU->getInstr()->isPHI())
3494  return true;
3495  unsigned LoopCycle = cycleScheduled(UseSU);
3496  int LoopStage = stageScheduled(UseSU);
3497  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
3498 }
3499 
3500 /// Return true if the instruction is a definition that is loop carried
3501 /// and defines the use on the next iteration.
3502 /// v1 = phi(v2, v3)
3503 /// (Def) v3 = op v1
3504 /// (MO) = v1
3505 /// If MO appears before Def, then then v1 and v3 may get assigned to the same
3506 /// register.
3509  if (!MO.isReg())
3510  return false;
3511  if (Def->isPHI())
3512  return false;
3513  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
3514  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
3515  return false;
3516  if (!isLoopCarried(SSD, *Phi))
3517  return false;
3518  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
3519  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
3520  MachineOperand &DMO = Def->getOperand(i);
3521  if (!DMO.isReg() || !DMO.isDef())
3522  continue;
3523  if (DMO.getReg() == LoopReg)
3524  return true;
3525  }
3526  return false;
3527 }
3528 
3529 // Check if the generated schedule is valid. This function checks if
3530 // an instruction that uses a physical register is scheduled in a
3531 // different stage than the definition. The pipeliner does not handle
3532 // physical register values that may cross a basic block boundary.
3534  for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) {
3535  SUnit &SU = SSD->SUnits[i];
3536  if (!SU.hasPhysRegDefs)
3537  continue;
3538  int StageDef = stageScheduled(&SU);
3539  assert(StageDef != -1 && "Instruction should have been scheduled.");
3540  for (auto &SI : SU.Succs)
3541  if (SI.isAssignedRegDep())
3542  if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg()))
3543  if (stageScheduled(SI.getSUnit()) != StageDef)
3544  return false;
3545  }
3546  return true;
3547 }
3548 
3549 /// A property of the node order in swing-modulo-scheduling is
3550 /// that for nodes outside circuits the following holds:
3551 /// none of them is scheduled after both a successor and a
3552 /// predecessor.
3553 /// The method below checks whether the property is met.
3554 /// If not, debug information is printed and statistics information updated.
3555 /// Note that we do not use an assert statement.
3556 /// The reason is that although an invalid node oder may prevent
3557 /// the pipeliner from finding a pipelined schedule for arbitrary II,
3558 /// it does not lead to the generation of incorrect code.
3559 void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
3560 
3561  // a sorted vector that maps each SUnit to its index in the NodeOrder
3562  typedef std::pair<SUnit *, unsigned> UnitIndex;
3563  std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
3564 
3565  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
3566  Indices.push_back(std::make_pair(NodeOrder[i], i));
3567 
3568  auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
3569  return std::get<0>(i1) < std::get<0>(i2);
3570  };
3571 
3572  // sort, so that we can perform a binary search
3573  llvm::sort(Indices, CompareKey);
3574 
3575  bool Valid = true;
3576  (void)Valid;
3577  // for each SUnit in the NodeOrder, check whether
3578  // it appears after both a successor and a predecessor
3579  // of the SUnit. If this is the case, and the SUnit
3580  // is not part of circuit, then the NodeOrder is not
3581  // valid.
3582  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
3583  SUnit *SU = NodeOrder[i];
3584  unsigned Index = i;
3585 
3586  bool PredBefore = false;
3587  bool SuccBefore = false;
3588 
3589  SUnit *Succ;
3590  SUnit *Pred;
3591  (void)Succ;
3592  (void)Pred;
3593 
3594  for (SDep &PredEdge : SU->Preds) {
3595  SUnit *PredSU = PredEdge.getSUnit();
3596  unsigned PredIndex =
3597  std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
3598  std::make_pair(PredSU, 0), CompareKey));
3599  if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
3600  PredBefore = true;
3601  Pred = PredSU;
3602  break;
3603  }
3604  }
3605 
3606  for (SDep &SuccEdge : SU->Succs) {
3607  SUnit *SuccSU = SuccEdge.getSUnit();
3608  unsigned SuccIndex =
3609  std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
3610  std::make_pair(SuccSU, 0), CompareKey));
3611  if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
3612  SuccBefore = true;
3613  Succ = SuccSU;
3614  break;
3615  }
3616  }
3617 
3618  if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
3619  // instructions in circuits are allowed to be scheduled
3620  // after both a successor and predecessor.
3621  bool InCircuit = std::any_of(
3622  Circuits.begin(), Circuits.end(),
3623  [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
3624  if (InCircuit)
3625  LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
3626  else {
3627  Valid = false;
3628  NumNodeOrderIssues++;
3629  LLVM_DEBUG(dbgs() << "Predecessor ";);
3630  }
3631  LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
3632  << " are scheduled before node " << SU->NodeNum
3633  << "\n";);
3634  }
3635  }
3636 
3637  LLVM_DEBUG({
3638  if (!Valid)
3639  dbgs() << "Invalid node order found!\n";
3640  });
3641 }
3642 
3643 /// Attempt to fix the degenerate cases when the instruction serialization
3644 /// causes the register lifetimes to overlap. For example,
3645 /// p' = store_pi(p, b)
3646 /// = load p, offset
3647 /// In this case p and p' overlap, which means that two registers are needed.
3648 /// Instead, this function changes the load to use p' and updates the offset.
3649 void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
3650  unsigned OverlapReg = 0;
3651  unsigned NewBaseReg = 0;
3652  for (SUnit *SU : Instrs) {
3653  MachineInstr *MI = SU->getInstr();
3654  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3655  const MachineOperand &MO = MI->getOperand(i);
3656  // Look for an instruction that uses p. The instruction occurs in the
3657  // same cycle but occurs later in the serialized order.
3658  if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
3659  // Check that the instruction appears in the InstrChanges structure,
3660  // which contains instructions that can have the offset updated.
3662  InstrChanges.find(SU);
3663  if (It != InstrChanges.end()) {
3664  unsigned BasePos, OffsetPos;
3665  // Update the base register and adjust the offset.
3666  if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
3667  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
3668  NewMI->getOperand(BasePos).setReg(NewBaseReg);
3669  int64_t NewOffset =
3670  MI->getOperand(OffsetPos).getImm() - It->second.second;
3671  NewMI->getOperand(OffsetPos).setImm(NewOffset);
3672  SU->setInstr(NewMI);
3673  MISUnitMap[NewMI] = SU;
3674  NewMIs.insert(NewMI);
3675  }
3676  }
3677  OverlapReg = 0;
3678  NewBaseReg = 0;
3679  break;
3680  }
3681  // Look for an instruction of the form p' = op(p), which uses and defines
3682  // two virtual registers that get allocated to the same physical register.
3683  unsigned TiedUseIdx = 0;
3684  if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
3685  // OverlapReg is p in the example above.
3686  OverlapReg = MI->getOperand(TiedUseIdx).getReg();
3687  // NewBaseReg is p' in the example above.
3688  NewBaseReg = MI->getOperand(i).getReg();
3689  break;
3690  }
3691  }
3692  }
3693 }
3694 
3695 /// After the schedule has been formed, call this function to combine
3696 /// the instructions from the different stages/cycles. That is, this
3697 /// function creates a schedule that represents a single iteration.
3699  // Move all instructions to the first stage from later stages.
3700  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3701  for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
3702  ++stage) {
3703  std::deque<SUnit *> &cycleInstrs =
3704  ScheduledInstrs[cycle + (stage * InitiationInterval)];
3705  for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
3706  E = cycleInstrs.rend();
3707  I != E; ++I)
3708  ScheduledInstrs[cycle].push_front(*I);
3709  }
3710  }
3711  // Iterate over the definitions in each instruction, and compute the
3712  // stage difference for each use. Keep the maximum value.
3713  for (auto &I : InstrToCycle) {
3714  int DefStage = stageScheduled(I.first);
3715  MachineInstr *MI = I.first->getInstr();
3716  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
3717  MachineOperand &Op = MI->getOperand(i);
3718  if (!Op.isReg() || !Op.isDef())
3719  continue;
3720 
3721  unsigned Reg = Op.getReg();
3722  unsigned MaxDiff = 0;
3723  bool PhiIsSwapped = false;
3724  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
3725  EI = MRI.use_end();
3726  UI != EI; ++UI) {
3727  MachineOperand &UseOp = *UI;
3728  MachineInstr *UseMI = UseOp.getParent();
3729  SUnit *SUnitUse = SSD->getSUnit(UseMI);
3730  int UseStage = stageScheduled(SUnitUse);
3731  unsigned Diff = 0;
3732  if (UseStage != -1 && UseStage >= DefStage)
3733  Diff = UseStage - DefStage;
3734  if (MI->isPHI()) {
3735  if (isLoopCarried(SSD, *MI))
3736  ++Diff;
3737  else
3738  PhiIsSwapped = true;
3739  }
3740  MaxDiff = std::max(Diff, MaxDiff);
3741  }
3742  RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
3743  }
3744  }
3745 
3746  // Erase all the elements in the later stages. Only one iteration should
3747  // remain in the scheduled list, and it contains all the instructions.
3748  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
3749  ScheduledInstrs.erase(cycle);
3750 
3751  // Change the registers in instruction as specified in the InstrChanges
3752  // map. We need to use the new registers to create the correct order.
3753  for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
3754  SUnit *SU = &SSD->SUnits[i];
3755  SSD->applyInstrChange(SU->getInstr(), *this);
3756  }
3757 
3758  // Reorder the instructions in each cycle to fix and improve the
3759  // generated code.
3760  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
3761  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
3762  std::deque<SUnit *> newOrderPhi;
3763  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
3764  SUnit *SU = cycleInstrs[i];
3765  if (SU->getInstr()->isPHI())
3766  newOrderPhi.push_back(SU);
3767  }
3768  std::deque<SUnit *> newOrderI;
3769  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
3770  SUnit *SU = cycleInstrs[i];
3771  if (!SU->getInstr()->isPHI())
3772  orderDependence(SSD, SU, newOrderI);
3773  }
3774  // Replace the old order with the new order.
3775  cycleInstrs.swap(newOrderPhi);
3776  cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
3777  SSD->fixupRegisterOverlaps(cycleInstrs);
3778  }
3779 
3780  LLVM_DEBUG(dump(););
3781 }
3782 
3783 void NodeSet::print(raw_ostream &os) const {
3784  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
3785  << " depth " << MaxDepth << " col " << Colocate << "\n";
3786  for (const auto &I : Nodes)
3787  os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
3788  os << "\n";
3789 }
3790 
3791 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3792 /// Print the schedule information to the given output.
3794  // Iterate over each cycle.
3795  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3796  // Iterate over each instruction in the cycle.
3797  const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
3798  for (SUnit *CI : cycleInstrs->second) {
3799  os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
3800  os << "(" << CI->NodeNum << ") ";
3801  CI->getInstr()->print(os);
3802  os << "\n";
3803  }
3804  }
3805 }
3806 
3807 /// Utility function used for debugging to print the schedule.
3810 
3811 #endif
3812 
3813 
3814 
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
uint64_t CallInst * C
std::vector< int >::const_reverse_iterator const_reverse_iterator
Definition: ScheduleDAG.h:750
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII"))
virtual void finishBlock()
Cleans up after scheduling in the given block.
mop_iterator operands_end()
Definition: MachineInstr.h:453
RegisterClassInfo RegClassInfo
A common definition of LaneBitmask for use in TableGen and CodeGen.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void clear()
Definition: MapVector.h:88
instr_iterator instr_begin()
static bool pred_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi...
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:632
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
const TargetRegisterClass * getRegClass(unsigned Reg) const
Return the register class of the specified virtual register.
unsigned NumPreds
of SDep::Data preds.
Definition: ScheduleDAG.h:266
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:464
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static constexpr LocationSize unknown()
bool empty() const
cl::opt< bool > SwpEnableCopyToPhi
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:382
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
bool isValidSchedule(SwingSchedulerDAG *SSD)
static void getUnderlyingObjects(MachineInstr *MI, SmallVectorImpl< Value *> &Objs, const DataLayout &DL)
Return the underlying objects for the memory references of an instruction.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
unsigned Reg
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
const MachineLoopInfo * MLI
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
unsigned getSubReg() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:173
bool isInlineAsm() const
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
The two locations do not alias at all.
Definition: AliasAnalysis.h:83
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const
Insert branch code into the end of the specified MachineBasicBlock.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
bool isRegSequence() const
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
Metadata node.
Definition: Metadata.h:863
F(f)
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
block Block Frequency true
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:458
bool isPHI() const
virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar, MachineInstr &Cmp, SmallVectorImpl< MachineOperand > &Cond, SmallVectorImpl< MachineInstr *> &PrevInsts, unsigned Iter, unsigned MaxIter) const
Generate code to reduce the loop iteration by one and check if the loop is finished.
virtual unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const
Remove the branching code at the end of the specific MBB.
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:92
SmallVectorImpl< SDep >::iterator succ_iterator
Definition: ScheduleDAG.h:260
SUnit * getNode(unsigned i) const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
virtual bool getMemOperandWithOffset(MachineInstr &MI, MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
int compareRecMII(NodeSet &RHS)
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
void eraseFromParent()
This method unlinks &#39;this&#39; from the containing function and deletes it.
A description of a memory reference used in the backend.
static use_iterator use_end()
SmallVectorImpl< SDep >::iterator pred_iterator
Definition: ScheduleDAG.h:259
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout, ignoring any parts of the loop not contiguous with the part that contains the header.
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA)
Return true if the instruction causes a chain between memory references before and after it...
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:366
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:261
Modulo Software Pipelining
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1186
This file contains the simple types necessary to represent the attributes associated with functions a...
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:82
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
BlockT * getHeader() const
Definition: LoopInfo.h:99
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:405
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
SlotIndexes pass.
Definition: SlotIndexes.h:328
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
static bool succ_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static bool isSubset(S1Ty &Set1, S2Ty &Set2)
Return true if Set1 is a subset of Set2.
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
void dump() const
Definition: Pass.cpp:134
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:234
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Itinerary data supplied by a subtarget to be used by a target.
auto lower_bound(R &&Range, ForwardIt I) -> decltype(adl_begin(Range))
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1281
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
unsigned count(SUnit *SU) const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator find(const KeyT &Key)
Definition: MapVector.h:147
bool insert(SUnit *SU)
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
virtual bool areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA=nullptr) const
Sometimes, it is possible for the target to tell, even without aliasing information, that two MIs access different memory addresses.
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
#define DEBUG_TYPE
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:344
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1251
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget...
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:77
const Value * getValue() const
Return the base address of the memory access.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
StringRef getString() const
Definition: Metadata.cpp:463
static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming)
Remove the incoming block from the Phis in a basic block.
Scheduling dependency.
Definition: ScheduleDAG.h:49
static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:580
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:576
void finishBlock() override
Clean up after the software pipeliner runs.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:819
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true)
Return true for an order or output dependence that is loop carried potentially.
int getFirstCycle() const
Return the first cycle in the completed schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
The main class in the implementation of the target independent software pipeliner pass...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:200
MachineFunction * MF
unsigned const MachineRegisterInfo * MRI
bool hasInterval(unsigned Reg) const
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx...
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:515
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
MachineInstrBuilder & UseMI
unsigned getStagesForPhi(int Reg)
The number of stages for a Phi is a little different than other instructions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
void setMBB(MachineBasicBlock *MBB)
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:160
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:548
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1192
unsigned getInstrBaseReg(SUnit *SU)
Return the new base register that was stored away for the changed instruction.
Track the current register pressure at some position in the instruction stream, and remember the high...
void setImm(int64_t immVal)
void dump() const
Utility function used for debugging to print the schedule.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
const TargetInstrInfo * TII
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
void print(raw_ostream &os) const
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:192
void fixupRegisterOverlaps(std::deque< SUnit *> &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
LLVM_DUMP_METHOD void dump() const
bool isScheduledAtStage(SUnit *SU, unsigned StageNum)
Return true if the instruction is scheduled at the specified stage.
bool isCopy() const
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Any other ordering dependency.
Definition: ScheduleDAG.h:56
size_t size() const
Definition: SmallVector.h:52
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
const InstrItineraryData * InstrItins
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1115
hexagon gen pred
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI, LiveIntervals &LIS)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
An unknown scheduling barrier.
Definition: ScheduleDAG.h:69
const unsigned MaxDepth
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
SetVector< SUnit * >::const_iterator iterator
typename vector_type::const_iterator iterator
Definition: SetVector.h:48
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:519
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1166
reverse_instr_iterator instr_rbegin()
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:533
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
MachineOperand class - Representation of each machine instruction operand.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
static unsigned getDepth(const SmallVectorImpl< const MachineBasicBlock *> &Stack, const MachineBasicBlock *MBB)
MachineInstrBuilder MachineInstrBuilder & DefMI
bool canReserveResources(const MCInstrDesc *MID)
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
void setRecMII(unsigned mii)
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
reverse_instr_iterator instr_rend()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
std::vector< int >::const_iterator const_iterator
Definition: ScheduleDAG.h:743
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
typename SuperClass::iterator iterator
Definition: SmallVector.h:319
This class represents the scheduled code.
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
SmallVectorImpl< SDep >::const_iterator const_succ_iterator
Definition: ScheduleDAG.h:262
unsigned getStagesForReg(int Reg, unsigned CurStage)
Return the max.
LiveInterval & createEmptyInterval(unsigned Reg)
Interval creation.
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
void setColocate(unsigned c)
std::set< NodeId > NodeSet
Definition: RDFGraph.h:513
A ScheduleDAG for scheduling lists of MachineInstr.
Representation of each machine instruction.
Definition: MachineInstr.h:63
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:162
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
virtual bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, MachineInstr *&CmpInst) const
Analyze the loop code, return true if it cannot be understoo.
iterator begin() const
Definition: SmallPtrSet.h:396
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
These values represent a non-pipelined step in the execution of an instruction.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit *> &Insts)
Order the instructions within a cycle so that the definitions occur before the uses.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:148
This file provides utility analysis objects describing memory locations.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
void setReg(unsigned Reg)
Change the register this operand corresponds to.
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
void push_back(MachineInstr *MI)
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
void setSubReg(unsigned subReg)
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don&#39;t consume any machine resources in their current form...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:128
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi)
Return true if the scheduled Phi has a loop carried operand.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
iterator end() const
Definition: SmallPtrSet.h:401
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:211
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:806
bool memoperands_empty() const
Return true if we don&#39;t have any memory operands which described the memory access done by this instr...
Definition: MachineInstr.h:545
Store the effects of a change in pressure on things that MI scheduler cares about.
iterator end()
Definition: MapVector.h:71
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void insert(iterator MBBI, MachineBasicBlock *MBB)
void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand *> MemRefs)
Assign this MachineInstr&#39;s memory reference descriptor list.
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO)
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
LLVM Value Representation.
Definition: Value.h:72
mop_iterator operands_begin()
Definition: MachineInstr.h:452
A vector that has set insertion semantics.
Definition: SetVector.h:40
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y...
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
SmallVector< MachineOperand, 4 > BrCond
IRTranslator LLVM IR MI
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
A single uniqued string.
Definition: Metadata.h:603
void setPos(MachineBasicBlock::const_iterator Pos)
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
static bool computePath(SUnit *Cur, SetVector< SUnit *> &Path, SetVector< SUnit *> &DestNodes, SetVector< SUnit *> &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
std::vector< MachineBasicBlock * >::iterator succ_iterator
unsigned createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
void reserveResources(const MCInstrDesc *MID)
void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
Definition: ScheduleDAG.h:431