LLVM  10.0.0svn
MachinePipeliner.cpp
Go to the documentation of this file.
1 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10 //
11 // This SMS implementation is a target-independent back-end pass. When enabled,
12 // the pass runs just prior to the register allocation pass, while the machine
13 // IR is in SSA form. If software pipelining is successful, then the original
14 // loop is replaced by the optimized loop. The optimized loop contains one or
15 // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
16 // the instructions cannot be scheduled in a given MII, we increase the MII by
17 // one and try again.
18 //
19 // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
20 // represent loop carried dependences in the DAG as order edges to the Phi
21 // nodes. We also perform several passes over the DAG to eliminate unnecessary
22 // edges that inhibit the ability to pipeline. The implementation uses the
23 // DFAPacketizer class to compute the minimum initiation interval and the check
24 // where an instruction may be inserted in the pipelined schedule.
25 //
26 // In order for the SMS pass to work, several target specific hooks need to be
27 // implemented to get information about the loop structure and to rewrite
28 // instructions.
29 //
30 //===----------------------------------------------------------------------===//
31 
32 #include "llvm/ADT/ArrayRef.h"
33 #include "llvm/ADT/BitVector.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/MapVector.h"
36 #include "llvm/ADT/PriorityQueue.h"
37 #include "llvm/ADT/SetVector.h"
38 #include "llvm/ADT/SmallPtrSet.h"
39 #include "llvm/ADT/SmallSet.h"
40 #include "llvm/ADT/SmallVector.h"
41 #include "llvm/ADT/Statistic.h"
66 #include "llvm/Config/llvm-config.h"
67 #include "llvm/IR/Attributes.h"
68 #include "llvm/IR/DebugLoc.h"
69 #include "llvm/IR/Function.h"
70 #include "llvm/MC/LaneBitmask.h"
71 #include "llvm/MC/MCInstrDesc.h"
73 #include "llvm/MC/MCRegisterInfo.h"
74 #include "llvm/Pass.h"
76 #include "llvm/Support/Compiler.h"
77 #include "llvm/Support/Debug.h"
80 #include <algorithm>
81 #include <cassert>
82 #include <climits>
83 #include <cstdint>
84 #include <deque>
85 #include <functional>
86 #include <iterator>
87 #include <map>
88 #include <memory>
89 #include <tuple>
90 #include <utility>
91 #include <vector>
92 
93 using namespace llvm;
94 
95 #define DEBUG_TYPE "pipeliner"
96 
97 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
98 STATISTIC(NumPipelined, "Number of loops software pipelined");
99 STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
100 STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
101 STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
102 STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
103 STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
104 STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
105 STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
106 STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
107 STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
108 
109 /// A command line option to turn software pipelining on or off.
110 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
112  cl::desc("Enable Software Pipelining"));
113 
114 /// A command line option to enable SWP at -Os.
115 static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
116  cl::desc("Enable SWP at Os."), cl::Hidden,
117  cl::init(false));
118 
119 /// A command line argument to limit minimum initial interval for pipelining.
120 static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
121  cl::desc("Size limit for the MII."),
122  cl::Hidden, cl::init(27));
123 
124 /// A command line argument to limit the number of stages in the pipeline.
125 static cl::opt<int>
126  SwpMaxStages("pipeliner-max-stages",
127  cl::desc("Maximum stages allowed in the generated scheduled."),
128  cl::Hidden, cl::init(3));
129 
130 /// A command line option to disable the pruning of chain dependences due to
131 /// an unrelated Phi.
132 static cl::opt<bool>
133  SwpPruneDeps("pipeliner-prune-deps",
134  cl::desc("Prune dependences between unrelated Phi nodes."),
135  cl::Hidden, cl::init(true));
136 
137 /// A command line option to disable the pruning of loop carried order
138 /// dependences.
139 static cl::opt<bool>
140  SwpPruneLoopCarried("pipeliner-prune-loop-carried",
141  cl::desc("Prune loop carried order dependences."),
142  cl::Hidden, cl::init(true));
143 
144 #ifndef NDEBUG
145 static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
146 #endif
147 
148 static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
149  cl::ReallyHidden, cl::init(false),
150  cl::ZeroOrMore, cl::desc("Ignore RecMII"));
151 
152 static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
153  cl::init(false));
154 static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
155  cl::init(false));
156 
158  "pipeliner-annotate-for-testing", cl::Hidden, cl::init(false),
159  cl::desc("Instead of emitting the pipelined code, annotate instructions "
160  "with the generated schedule for feeding into the "
161  "-modulo-schedule-test pass"));
162 
164  "pipeliner-experimental-cg", cl::Hidden, cl::init(false),
165  cl::desc(
166  "Use the experimental peeling code generator for software pipelining"));
167 
168 namespace llvm {
169 
170 // A command line option to enable the CopyToPhi DAG mutation.
172  SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
173  cl::init(true), cl::ZeroOrMore,
174  cl::desc("Enable CopyToPhi DAG Mutation"));
175 
176 } // end namespace llvm
177 
178 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
179 char MachinePipeliner::ID = 0;
180 #ifndef NDEBUG
182 #endif
184 
186  "Modulo Software Pipelining", false, false)
192  "Modulo Software Pipelining", false, false)
193 
194 /// The "main" function for implementing Swing Modulo Scheduling.
195 bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
196  if (skipFunction(mf.getFunction()))
197  return false;
198 
199  if (!EnableSWP)
200  return false;
201 
202  if (mf.getFunction().getAttributes().hasAttribute(
203  AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
204  !EnableSWPOptSize.getPosition())
205  return false;
206 
207  if (!mf.getSubtarget().enableMachinePipeliner())
208  return false;
209 
210  // Cannot pipeline loops without instruction itineraries if we are using
211  // DFA for the pipeliner.
212  if (mf.getSubtarget().useDFAforSMS() &&
213  (!mf.getSubtarget().getInstrItineraryData() ||
214  mf.getSubtarget().getInstrItineraryData()->isEmpty()))
215  return false;
216 
217  MF = &mf;
218  MLI = &getAnalysis<MachineLoopInfo>();
219  MDT = &getAnalysis<MachineDominatorTree>();
220  TII = MF->getSubtarget().getInstrInfo();
221  RegClassInfo.runOnMachineFunction(*MF);
222 
223  for (auto &L : *MLI)
224  scheduleLoop(*L);
225 
226  return false;
227 }
228 
229 /// Attempt to perform the SMS algorithm on the specified loop. This function is
230 /// the main entry point for the algorithm. The function identifies candidate
231 /// loops, calculates the minimum initiation interval, and attempts to schedule
232 /// the loop.
233 bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
234  bool Changed = false;
235  for (auto &InnerLoop : L)
236  Changed |= scheduleLoop(*InnerLoop);
237 
238 #ifndef NDEBUG
239  // Stop trying after reaching the limit (if any).
240  int Limit = SwpLoopLimit;
241  if (Limit >= 0) {
242  if (NumTries >= SwpLoopLimit)
243  return Changed;
244  NumTries++;
245  }
246 #endif
247 
248  setPragmaPipelineOptions(L);
249  if (!canPipelineLoop(L)) {
250  LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
251  return Changed;
252  }
253 
254  ++NumTrytoPipeline;
255 
256  Changed = swingModuloScheduler(L);
257 
258  return Changed;
259 }
260 
261 void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
262  MachineBasicBlock *LBLK = L.getTopBlock();
263 
264  if (LBLK == nullptr)
265  return;
266 
267  const BasicBlock *BBLK = LBLK->getBasicBlock();
268  if (BBLK == nullptr)
269  return;
270 
271  const Instruction *TI = BBLK->getTerminator();
272  if (TI == nullptr)
273  return;
274 
275  MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);
276  if (LoopID == nullptr)
277  return;
278 
279  assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");
280  assert(LoopID->getOperand(0) == LoopID && "invalid loop");
281 
282  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
283  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
284 
285  if (MD == nullptr)
286  continue;
287 
288  MDString *S = dyn_cast<MDString>(MD->getOperand(0));
289 
290  if (S == nullptr)
291  continue;
292 
293  if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
294  assert(MD->getNumOperands() == 2 &&
295  "Pipeline initiation interval hint metadata should have two operands.");
297  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
298  assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
299  } else if (S->getString() == "llvm.loop.pipeline.disable") {
300  disabledByPragma = true;
301  }
302  }
303 }
304 
305 /// Return true if the loop can be software pipelined. The algorithm is
306 /// restricted to loops with a single basic block. Make sure that the
307 /// branch in the loop can be analyzed.
308 bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
309  if (L.getNumBlocks() != 1)
310  return false;
311 
312  if (disabledByPragma)
313  return false;
314 
315  // Check if the branch can't be understood because we can't do pipelining
316  // if that's the case.
317  LI.TBB = nullptr;
318  LI.FBB = nullptr;
319  LI.BrCond.clear();
320  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
321  LLVM_DEBUG(
322  dbgs() << "Unable to analyzeBranch, can NOT pipeline current Loop\n");
323  NumFailBranch++;
324  return false;
325  }
326 
327  LI.LoopInductionVar = nullptr;
328  LI.LoopCompare = nullptr;
330  LLVM_DEBUG(
331  dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n");
332  NumFailLoop++;
333  return false;
334  }
335 
336  if (!L.getLoopPreheader()) {
337  LLVM_DEBUG(
338  dbgs() << "Preheader not found, can NOT pipeline current Loop\n");
339  NumFailPreheader++;
340  return false;
341  }
342 
343  // Remove any subregisters from inputs to phi nodes.
344  preprocessPhiNodes(*L.getHeader());
345  return true;
346 }
347 
348 void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
350  SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
351 
352  for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) {
353  MachineOperand &DefOp = PI.getOperand(0);
354  assert(DefOp.getSubReg() == 0);
355  auto *RC = MRI.getRegClass(DefOp.getReg());
356 
357  for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
358  MachineOperand &RegOp = PI.getOperand(i);
359  if (RegOp.getSubReg() == 0)
360  continue;
361 
362  // If the operand uses a subregister, replace it with a new register
363  // without subregisters, and generate a copy to the new register.
364  Register NewReg = MRI.createVirtualRegister(RC);
365  MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
367  const DebugLoc &DL = PredB.findDebugLoc(At);
368  auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
369  .addReg(RegOp.getReg(), getRegState(RegOp),
370  RegOp.getSubReg());
371  Slots.insertMachineInstrInMaps(*Copy);
372  RegOp.setReg(NewReg);
373  RegOp.setSubReg(0);
374  }
375  }
376 }
377 
378 /// The SMS algorithm consists of the following main steps:
379 /// 1. Computation and analysis of the dependence graph.
380 /// 2. Ordering of the nodes (instructions).
381 /// 3. Attempt to Schedule the loop.
382 bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
383  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
384 
385  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
387 
388  MachineBasicBlock *MBB = L.getHeader();
389  // The kernel should not include any terminator instructions. These
390  // will be added back later.
391  SMS.startBlock(MBB);
392 
393  // Compute the number of 'real' instructions in the basic block by
394  // ignoring terminators.
395  unsigned size = MBB->size();
397  E = MBB->instr_end();
398  I != E; ++I, --size)
399  ;
400 
401  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
402  SMS.schedule();
403  SMS.exitRegion();
404 
405  SMS.finishBlock();
406  return SMS.hasNewSchedule();
407 }
408 
409 void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
410  if (II_setByPragma > 0)
411  MII = II_setByPragma;
412  else
413  MII = std::max(ResMII, RecMII);
414 }
415 
416 void SwingSchedulerDAG::setMAX_II() {
417  if (II_setByPragma > 0)
418  MAX_II = II_setByPragma;
419  else
420  MAX_II = MII + 10;
421 }
422 
423 /// We override the schedule function in ScheduleDAGInstrs to implement the
424 /// scheduling part of the Swing Modulo Scheduling algorithm.
426  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
427  buildSchedGraph(AA);
428  addLoopCarriedDependences(AA);
429  updatePhiDependences();
430  Topo.InitDAGTopologicalSorting();
431  changeDependences();
432  postprocessDAG();
433  LLVM_DEBUG(dump());
434 
435  NodeSetType NodeSets;
436  findCircuits(NodeSets);
437  NodeSetType Circuits = NodeSets;
438 
439  // Calculate the MII.
440  unsigned ResMII = calculateResMII();
441  unsigned RecMII = calculateRecMII(NodeSets);
442 
443  fuseRecs(NodeSets);
444 
445  // This flag is used for testing and can cause correctness problems.
446  if (SwpIgnoreRecMII)
447  RecMII = 0;
448 
449  setMII(ResMII, RecMII);
450  setMAX_II();
451 
452  LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
453  << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
454 
455  // Can't schedule a loop without a valid MII.
456  if (MII == 0) {
457  LLVM_DEBUG(
458  dbgs()
459  << "0 is not a valid Minimal Initiation Interval, can NOT schedule\n");
460  NumFailZeroMII++;
461  return;
462  }
463 
464  // Don't pipeline large loops.
465  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {
466  LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
467  << ", we don't pipleline large loops\n");
468  NumFailLargeMaxMII++;
469  return;
470  }
471 
472  computeNodeFunctions(NodeSets);
473 
474  registerPressureFilter(NodeSets);
475 
476  colocateNodeSets(NodeSets);
477 
478  checkNodeSets(NodeSets);
479 
480  LLVM_DEBUG({
481  for (auto &I : NodeSets) {
482  dbgs() << " Rec NodeSet ";
483  I.dump();
484  }
485  });
486 
487  llvm::stable_sort(NodeSets, std::greater<NodeSet>());
488 
489  groupRemainingNodes(NodeSets);
490 
491  removeDuplicateNodes(NodeSets);
492 
493  LLVM_DEBUG({
494  for (auto &I : NodeSets) {
495  dbgs() << " NodeSet ";
496  I.dump();
497  }
498  });
499 
500  computeNodeOrder(NodeSets);
501 
502  // check for node order issues
503  checkValidNodeOrder(Circuits);
504 
505  SMSchedule Schedule(Pass.MF);
506  Scheduled = schedulePipeline(Schedule);
507 
508  if (!Scheduled){
509  LLVM_DEBUG(dbgs() << "No schedule found, return\n");
510  NumFailNoSchedule++;
511  return;
512  }
513 
514  unsigned numStages = Schedule.getMaxStageCount();
515  // No need to generate pipeline if there are no overlapped iterations.
516  if (numStages == 0) {
517  LLVM_DEBUG(
518  dbgs() << "No overlapped iterations, no need to generate pipeline\n");
519  NumFailZeroStage++;
520  return;
521  }
522  // Check that the maximum stage count is less than user-defined limit.
523  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) {
524  LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
525  << " : too many stages, abort\n");
526  NumFailLargeMaxStage++;
527  return;
528  }
529 
530  // Generate the schedule as a ModuloSchedule.
531  DenseMap<MachineInstr *, int> Cycles, Stages;
532  std::vector<MachineInstr *> OrderedInsts;
533  for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle();
534  ++Cycle) {
535  for (SUnit *SU : Schedule.getInstructions(Cycle)) {
536  OrderedInsts.push_back(SU->getInstr());
537  Cycles[SU->getInstr()] = Cycle;
538  Stages[SU->getInstr()] = Schedule.stageScheduled(SU);
539  }
540  }
542  for (auto &KV : NewMIs) {
543  Cycles[KV.first] = Cycles[KV.second];
544  Stages[KV.first] = Stages[KV.second];
545  NewInstrChanges[KV.first] = InstrChanges[getSUnit(KV.first)];
546  }
547 
548  ModuloSchedule MS(MF, &Loop, std::move(OrderedInsts), std::move(Cycles),
549  std::move(Stages));
550  if (EmitTestAnnotations) {
551  assert(NewInstrChanges.empty() &&
552  "Cannot serialize a schedule with InstrChanges!");
554  MSTI.annotate();
555  return;
556  }
557  // The experimental code generator can't work if there are InstChanges.
558  if (ExperimentalCodeGen && NewInstrChanges.empty()) {
559  PeelingModuloScheduleExpander MSE(MF, MS, &LIS);
560  // Experimental code generation isn't complete yet, but it can partially
561  // validate the code it generates against the original
562  // ModuloScheduleExpander.
564  } else {
565  ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
566  MSE.expand();
567  MSE.cleanup();
568  }
569  ++NumPipelined;
570 }
571 
572 /// Clean up after the software pipeliner runs.
574  for (auto &KV : NewMIs)
575  MF.DeleteMachineInstr(KV.second);
576  NewMIs.clear();
577 
578  // Call the superclass.
580 }
581 
582 /// Return the register values for the operands of a Phi instruction.
583 /// This function assume the instruction is a Phi.
585  unsigned &InitVal, unsigned &LoopVal) {
586  assert(Phi.isPHI() && "Expecting a Phi.");
587 
588  InitVal = 0;
589  LoopVal = 0;
590  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
591  if (Phi.getOperand(i + 1).getMBB() != Loop)
592  InitVal = Phi.getOperand(i).getReg();
593  else
594  LoopVal = Phi.getOperand(i).getReg();
595 
596  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
597 }
598 
599 /// Return the Phi register value that comes the loop block.
600 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
601  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
602  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
603  return Phi.getOperand(i).getReg();
604  return 0;
605 }
606 
607 /// Return true if SUb can be reached from SUa following the chain edges.
608 static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
609  SmallPtrSet<SUnit *, 8> Visited;
610  SmallVector<SUnit *, 8> Worklist;
611  Worklist.push_back(SUa);
612  while (!Worklist.empty()) {
613  const SUnit *SU = Worklist.pop_back_val();
614  for (auto &SI : SU->Succs) {
615  SUnit *SuccSU = SI.getSUnit();
616  if (SI.getKind() == SDep::Order) {
617  if (Visited.count(SuccSU))
618  continue;
619  if (SuccSU == SUb)
620  return true;
621  Worklist.push_back(SuccSU);
622  Visited.insert(SuccSU);
623  }
624  }
625  }
626  return false;
627 }
628 
629 /// Return true if the instruction causes a chain between memory
630 /// references before and after it.
632  return MI.isCall() || MI.mayRaiseFPException() ||
634  (MI.hasOrderedMemoryRef() &&
635  (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
636 }
637 
638 /// Return the underlying objects for the memory references of an instruction.
639 /// This function calls the code in ValueTracking, but first checks that the
640 /// instruction has a memory operand.
643  const DataLayout &DL) {
644  if (!MI->hasOneMemOperand())
645  return;
647  if (!MM->getValue())
648  return;
649  GetUnderlyingObjects(MM->getValue(), Objs, DL);
650  for (const Value *V : Objs) {
651  if (!isIdentifiedObject(V)) {
652  Objs.clear();
653  return;
654  }
655  Objs.push_back(V);
656  }
657 }
658 
659 /// Add a chain edge between a load and store if the store can be an
660 /// alias of the load on a subsequent iteration, i.e., a loop carried
661 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
662 /// but that code doesn't create loop carried dependences.
663 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
665  Value *UnknownValue =
667  for (auto &SU : SUnits) {
668  MachineInstr &MI = *SU.getInstr();
669  if (isDependenceBarrier(MI, AA))
670  PendingLoads.clear();
671  else if (MI.mayLoad()) {
673  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
674  if (Objs.empty())
675  Objs.push_back(UnknownValue);
676  for (auto V : Objs) {
677  SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
678  SUs.push_back(&SU);
679  }
680  } else if (MI.mayStore()) {
682  getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
683  if (Objs.empty())
684  Objs.push_back(UnknownValue);
685  for (auto V : Objs) {
687  PendingLoads.find(V);
688  if (I == PendingLoads.end())
689  continue;
690  for (auto Load : I->second) {
691  if (isSuccOrder(Load, &SU))
692  continue;
693  MachineInstr &LdMI = *Load->getInstr();
694  // First, perform the cheaper check that compares the base register.
695  // If they are the same and the load offset is less than the store
696  // offset, then mark the dependence as loop carried potentially.
697  const MachineOperand *BaseOp1, *BaseOp2;
698  int64_t Offset1, Offset2;
699  if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) &&
700  TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) {
701  if (BaseOp1->isIdenticalTo(*BaseOp2) &&
702  (int)Offset1 < (int)Offset2) {
703  assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
704  "What happened to the chain edge?");
705  SDep Dep(Load, SDep::Barrier);
706  Dep.setLatency(1);
707  SU.addPred(Dep);
708  continue;
709  }
710  }
711  // Second, the more expensive check that uses alias analysis on the
712  // base registers. If they alias, and the load offset is less than
713  // the store offset, the mark the dependence as loop carried.
714  if (!AA) {
715  SDep Dep(Load, SDep::Barrier);
716  Dep.setLatency(1);
717  SU.addPred(Dep);
718  continue;
719  }
720  MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
721  MachineMemOperand *MMO2 = *MI.memoperands_begin();
722  if (!MMO1->getValue() || !MMO2->getValue()) {
723  SDep Dep(Load, SDep::Barrier);
724  Dep.setLatency(1);
725  SU.addPred(Dep);
726  continue;
727  }
728  if (MMO1->getValue() == MMO2->getValue() &&
729  MMO1->getOffset() <= MMO2->getOffset()) {
730  SDep Dep(Load, SDep::Barrier);
731  Dep.setLatency(1);
732  SU.addPred(Dep);
733  continue;
734  }
735  AliasResult AAResult = AA->alias(
737  MMO1->getAAInfo()),
739  MMO2->getAAInfo()));
740 
741  if (AAResult != NoAlias) {
742  SDep Dep(Load, SDep::Barrier);
743  Dep.setLatency(1);
744  SU.addPred(Dep);
745  }
746  }
747  }
748  }
749  }
750 }
751 
752 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
753 /// processes dependences for PHIs. This function adds true dependences
754 /// from a PHI to a use, and a loop carried dependence from the use to the
755 /// PHI. The loop carried dependence is represented as an anti dependence
756 /// edge. This function also removes chain dependences between unrelated
757 /// PHIs.
758 void SwingSchedulerDAG::updatePhiDependences() {
759  SmallVector<SDep, 4> RemoveDeps;
761 
762  // Iterate over each DAG node.
763  for (SUnit &I : SUnits) {
764  RemoveDeps.clear();
765  // Set to true if the instruction has an operand defined by a Phi.
766  unsigned HasPhiUse = 0;
767  unsigned HasPhiDef = 0;
768  MachineInstr *MI = I.getInstr();
769  // Iterate over each operand, and we process the definitions.
771  MOE = MI->operands_end();
772  MOI != MOE; ++MOI) {
773  if (!MOI->isReg())
774  continue;
775  Register Reg = MOI->getReg();
776  if (MOI->isDef()) {
777  // If the register is used by a Phi, then create an anti dependence.
779  UI = MRI.use_instr_begin(Reg),
780  UE = MRI.use_instr_end();
781  UI != UE; ++UI) {
782  MachineInstr *UseMI = &*UI;
783  SUnit *SU = getSUnit(UseMI);
784  if (SU != nullptr && UseMI->isPHI()) {
785  if (!MI->isPHI()) {
786  SDep Dep(SU, SDep::Anti, Reg);
787  Dep.setLatency(1);
788  I.addPred(Dep);
789  } else {
790  HasPhiDef = Reg;
791  // Add a chain edge to a dependent Phi that isn't an existing
792  // predecessor.
793  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
794  I.addPred(SDep(SU, SDep::Barrier));
795  }
796  }
797  }
798  } else if (MOI->isUse()) {
799  // If the register is defined by a Phi, then create a true dependence.
800  MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
801  if (DefMI == nullptr)
802  continue;
803  SUnit *SU = getSUnit(DefMI);
804  if (SU != nullptr && DefMI->isPHI()) {
805  if (!MI->isPHI()) {
806  SDep Dep(SU, SDep::Data, Reg);
807  Dep.setLatency(0);
808  ST.adjustSchedDependency(SU, &I, Dep);
809  I.addPred(Dep);
810  } else {
811  HasPhiUse = Reg;
812  // Add a chain edge to a dependent Phi that isn't an existing
813  // predecessor.
814  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
815  I.addPred(SDep(SU, SDep::Barrier));
816  }
817  }
818  }
819  }
820  // Remove order dependences from an unrelated Phi.
821  if (!SwpPruneDeps)
822  continue;
823  for (auto &PI : I.Preds) {
824  MachineInstr *PMI = PI.getSUnit()->getInstr();
825  if (PMI->isPHI() && PI.getKind() == SDep::Order) {
826  if (I.getInstr()->isPHI()) {
827  if (PMI->getOperand(0).getReg() == HasPhiUse)
828  continue;
829  if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
830  continue;
831  }
832  RemoveDeps.push_back(PI);
833  }
834  }
835  for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
836  I.removePred(RemoveDeps[i]);
837  }
838 }
839 
840 /// Iterate over each DAG node and see if we can change any dependences
841 /// in order to reduce the recurrence MII.
842 void SwingSchedulerDAG::changeDependences() {
843  // See if an instruction can use a value from the previous iteration.
844  // If so, we update the base and offset of the instruction and change
845  // the dependences.
846  for (SUnit &I : SUnits) {
847  unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
848  int64_t NewOffset = 0;
849  if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
850  NewOffset))
851  continue;
852 
853  // Get the MI and SUnit for the instruction that defines the original base.
854  Register OrigBase = I.getInstr()->getOperand(BasePos).getReg();
855  MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
856  if (!DefMI)
857  continue;
858  SUnit *DefSU = getSUnit(DefMI);
859  if (!DefSU)
860  continue;
861  // Get the MI and SUnit for the instruction that defins the new base.
862  MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
863  if (!LastMI)
864  continue;
865  SUnit *LastSU = getSUnit(LastMI);
866  if (!LastSU)
867  continue;
868 
869  if (Topo.IsReachable(&I, LastSU))
870  continue;
871 
872  // Remove the dependence. The value now depends on a prior iteration.
874  for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
875  ++P)
876  if (P->getSUnit() == DefSU)
877  Deps.push_back(*P);
878  for (int i = 0, e = Deps.size(); i != e; i++) {
879  Topo.RemovePred(&I, Deps[i].getSUnit());
880  I.removePred(Deps[i]);
881  }
882  // Remove the chain dependence between the instructions.
883  Deps.clear();
884  for (auto &P : LastSU->Preds)
885  if (P.getSUnit() == &I && P.getKind() == SDep::Order)
886  Deps.push_back(P);
887  for (int i = 0, e = Deps.size(); i != e; i++) {
888  Topo.RemovePred(LastSU, Deps[i].getSUnit());
889  LastSU->removePred(Deps[i]);
890  }
891 
892  // Add a dependence between the new instruction and the instruction
893  // that defines the new base.
894  SDep Dep(&I, SDep::Anti, NewBase);
895  Topo.AddPred(LastSU, &I);
896  LastSU->addPred(Dep);
897 
898  // Remember the base and offset information so that we can update the
899  // instruction during code generation.
900  InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
901  }
902 }
903 
904 namespace {
905 
906 // FuncUnitSorter - Comparison operator used to sort instructions by
907 // the number of functional unit choices.
908 struct FuncUnitSorter {
910  const MCSubtargetInfo *STI;
912 
913  FuncUnitSorter(const TargetSubtargetInfo &TSI)
914  : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
915 
916  // Compute the number of functional unit alternatives needed
917  // at each stage, and take the minimum value. We prioritize the
918  // instructions by the least number of choices first.
919  unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
920  unsigned SchedClass = Inst->getDesc().getSchedClass();
921  unsigned min = UINT_MAX;
922  if (InstrItins && !InstrItins->isEmpty()) {
923  for (const InstrStage &IS :
924  make_range(InstrItins->beginStage(SchedClass),
925  InstrItins->endStage(SchedClass))) {
926  unsigned funcUnits = IS.getUnits();
927  unsigned numAlternatives = countPopulation(funcUnits);
928  if (numAlternatives < min) {
929  min = numAlternatives;
930  F = funcUnits;
931  }
932  }
933  return min;
934  }
935  if (STI && STI->getSchedModel().hasInstrSchedModel()) {
936  const MCSchedClassDesc *SCDesc =
937  STI->getSchedModel().getSchedClassDesc(SchedClass);
938  if (!SCDesc->isValid())
939  // No valid Schedule Class Desc for schedClass, should be
940  // Pseudo/PostRAPseudo
941  return min;
942 
943  for (const MCWriteProcResEntry &PRE :
944  make_range(STI->getWriteProcResBegin(SCDesc),
945  STI->getWriteProcResEnd(SCDesc))) {
946  if (!PRE.Cycles)
947  continue;
948  const MCProcResourceDesc *ProcResource =
949  STI->getSchedModel().getProcResource(PRE.ProcResourceIdx);
950  unsigned NumUnits = ProcResource->NumUnits;
951  if (NumUnits < min) {
952  min = NumUnits;
953  F = PRE.ProcResourceIdx;
954  }
955  }
956  return min;
957  }
958  llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
959  }
960 
961  // Compute the critical resources needed by the instruction. This
962  // function records the functional units needed by instructions that
963  // must use only one functional unit. We use this as a tie breaker
964  // for computing the resource MII. The instrutions that require
965  // the same, highly used, functional unit have high priority.
966  void calcCriticalResources(MachineInstr &MI) {
967  unsigned SchedClass = MI.getDesc().getSchedClass();
968  if (InstrItins && !InstrItins->isEmpty()) {
969  for (const InstrStage &IS :
970  make_range(InstrItins->beginStage(SchedClass),
971  InstrItins->endStage(SchedClass))) {
972  unsigned FuncUnits = IS.getUnits();
973  if (countPopulation(FuncUnits) == 1)
974  Resources[FuncUnits]++;
975  }
976  return;
977  }
978  if (STI && STI->getSchedModel().hasInstrSchedModel()) {
979  const MCSchedClassDesc *SCDesc =
980  STI->getSchedModel().getSchedClassDesc(SchedClass);
981  if (!SCDesc->isValid())
982  // No valid Schedule Class Desc for schedClass, should be
983  // Pseudo/PostRAPseudo
984  return;
985 
986  for (const MCWriteProcResEntry &PRE :
987  make_range(STI->getWriteProcResBegin(SCDesc),
988  STI->getWriteProcResEnd(SCDesc))) {
989  if (!PRE.Cycles)
990  continue;
991  Resources[PRE.ProcResourceIdx]++;
992  }
993  return;
994  }
995  llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
996  }
997 
998  /// Return true if IS1 has less priority than IS2.
999  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
1000  unsigned F1 = 0, F2 = 0;
1001  unsigned MFUs1 = minFuncUnits(IS1, F1);
1002  unsigned MFUs2 = minFuncUnits(IS2, F2);
1003  if (MFUs1 == MFUs2)
1004  return Resources.lookup(F1) < Resources.lookup(F2);
1005  return MFUs1 > MFUs2;
1006  }
1007 };
1008 
1009 } // end anonymous namespace
1010 
1011 /// Calculate the resource constrained minimum initiation interval for the
1012 /// specified loop. We use the DFA to model the resources needed for
1013 /// each instruction, and we ignore dependences. A different DFA is created
1014 /// for each cycle that is required. When adding a new instruction, we attempt
1015 /// to add it to each existing DFA, until a legal space is found. If the
1016 /// instruction cannot be reserved in an existing DFA, we create a new one.
1017 unsigned SwingSchedulerDAG::calculateResMII() {
1018 
1019  LLVM_DEBUG(dbgs() << "calculateResMII:\n");
1021  MachineBasicBlock *MBB = Loop.getHeader();
1022  Resources.push_back(new ResourceManager(&MF.getSubtarget()));
1023 
1024  // Sort the instructions by the number of available choices for scheduling,
1025  // least to most. Use the number of critical resources as the tie breaker.
1026  FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget());
1028  E = MBB->getFirstTerminator();
1029  I != E; ++I)
1030  FUS.calcCriticalResources(*I);
1032  FuncUnitOrder(FUS);
1033 
1035  E = MBB->getFirstTerminator();
1036  I != E; ++I)
1037  FuncUnitOrder.push(&*I);
1038 
1039  while (!FuncUnitOrder.empty()) {
1040  MachineInstr *MI = FuncUnitOrder.top();
1041  FuncUnitOrder.pop();
1042  if (TII->isZeroCost(MI->getOpcode()))
1043  continue;
1044  // Attempt to reserve the instruction in an existing DFA. At least one
1045  // DFA is needed for each cycle.
1046  unsigned NumCycles = getSUnit(MI)->Latency;
1047  unsigned ReservedCycles = 0;
1050  LLVM_DEBUG({
1051  dbgs() << "Trying to reserve resource for " << NumCycles
1052  << " cycles for \n";
1053  MI->dump();
1054  });
1055  for (unsigned C = 0; C < NumCycles; ++C)
1056  while (RI != RE) {
1057  if ((*RI)->canReserveResources(*MI)) {
1058  (*RI)->reserveResources(*MI);
1059  ++ReservedCycles;
1060  break;
1061  }
1062  RI++;
1063  }
1064  LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
1065  << ", NumCycles:" << NumCycles << "\n");
1066  // Add new DFAs, if needed, to reserve resources.
1067  for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
1069  << "NewResource created to reserve resources"
1070  << "\n");
1071  ResourceManager *NewResource = new ResourceManager(&MF.getSubtarget());
1072  assert(NewResource->canReserveResources(*MI) && "Reserve error.");
1073  NewResource->reserveResources(*MI);
1074  Resources.push_back(NewResource);
1075  }
1076  }
1077  int Resmii = Resources.size();
1078  LLVM_DEBUG(dbgs() << "Retrun Res MII:" << Resmii << "\n");
1079  // Delete the memory for each of the DFAs that were created earlier.
1080  for (ResourceManager *RI : Resources) {
1081  ResourceManager *D = RI;
1082  delete D;
1083  }
1084  Resources.clear();
1085  return Resmii;
1086 }
1087 
1088 /// Calculate the recurrence-constrainted minimum initiation interval.
1089 /// Iterate over each circuit. Compute the delay(c) and distance(c)
1090 /// for each circuit. The II needs to satisfy the inequality
1091 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
1092 /// II that satisfies the inequality, and the RecMII is the maximum
1093 /// of those values.
1094 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1095  unsigned RecMII = 0;
1096 
1097  for (NodeSet &Nodes : NodeSets) {
1098  if (Nodes.empty())
1099  continue;
1100 
1101  unsigned Delay = Nodes.getLatency();
1102  unsigned Distance = 1;
1103 
1104  // ii = ceil(delay / distance)
1105  unsigned CurMII = (Delay + Distance - 1) / Distance;
1106  Nodes.setRecMII(CurMII);
1107  if (CurMII > RecMII)
1108  RecMII = CurMII;
1109  }
1110 
1111  return RecMII;
1112 }
1113 
1114 /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1115 /// but we do this to find the circuits, and then change them back.
1116 static void swapAntiDependences(std::vector<SUnit> &SUnits) {
1118  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1119  SUnit *SU = &SUnits[i];
1120  for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
1121  IP != EP; ++IP) {
1122  if (IP->getKind() != SDep::Anti)
1123  continue;
1124  DepsAdded.push_back(std::make_pair(SU, *IP));
1125  }
1126  }
1127  for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
1128  E = DepsAdded.end();
1129  I != E; ++I) {
1130  // Remove this anti dependency and add one in the reverse direction.
1131  SUnit *SU = I->first;
1132  SDep &D = I->second;
1133  SUnit *TargetSU = D.getSUnit();
1134  unsigned Reg = D.getReg();
1135  unsigned Lat = D.getLatency();
1136  SU->removePred(D);
1137  SDep Dep(SU, SDep::Anti, Reg);
1138  Dep.setLatency(Lat);
1139  TargetSU->addPred(Dep);
1140  }
1141 }
1142 
1143 /// Create the adjacency structure of the nodes in the graph.
1144 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1145  SwingSchedulerDAG *DAG) {
1146  BitVector Added(SUnits.size());
1147  DenseMap<int, int> OutputDeps;
1148  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1149  Added.reset();
1150  // Add any successor to the adjacency matrix and exclude duplicates.
1151  for (auto &SI : SUnits[i].Succs) {
1152  // Only create a back-edge on the first and last nodes of a dependence
1153  // chain. This records any chains and adds them later.
1154  if (SI.getKind() == SDep::Output) {
1155  int N = SI.getSUnit()->NodeNum;
1156  int BackEdge = i;
1157  auto Dep = OutputDeps.find(BackEdge);
1158  if (Dep != OutputDeps.end()) {
1159  BackEdge = Dep->second;
1160  OutputDeps.erase(Dep);
1161  }
1162  OutputDeps[N] = BackEdge;
1163  }
1164  // Do not process a boundary node, an artificial node.
1165  // A back-edge is processed only if it goes to a Phi.
1166  if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||
1167  (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
1168  continue;
1169  int N = SI.getSUnit()->NodeNum;
1170  if (!Added.test(N)) {
1171  AdjK[i].push_back(N);
1172  Added.set(N);
1173  }
1174  }
1175  // A chain edge between a store and a load is treated as a back-edge in the
1176  // adjacency matrix.
1177  for (auto &PI : SUnits[i].Preds) {
1178  if (!SUnits[i].getInstr()->mayStore() ||
1179  !DAG->isLoopCarriedDep(&SUnits[i], PI, false))
1180  continue;
1181  if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1182  int N = PI.getSUnit()->NodeNum;
1183  if (!Added.test(N)) {
1184  AdjK[i].push_back(N);
1185  Added.set(N);
1186  }
1187  }
1188  }
1189  }
1190  // Add back-edges in the adjacency matrix for the output dependences.
1191  for (auto &OD : OutputDeps)
1192  if (!Added.test(OD.second)) {
1193  AdjK[OD.first].push_back(OD.second);
1194  Added.set(OD.second);
1195  }
1196 }
1197 
1198 /// Identify an elementary circuit in the dependence graph starting at the
1199 /// specified node.
1200 bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1201  bool HasBackedge) {
1202  SUnit *SV = &SUnits[V];
1203  bool F = false;
1204  Stack.insert(SV);
1205  Blocked.set(V);
1206 
1207  for (auto W : AdjK[V]) {
1208  if (NumPaths > MaxPaths)
1209  break;
1210  if (W < S)
1211  continue;
1212  if (W == S) {
1213  if (!HasBackedge)
1214  NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
1215  F = true;
1216  ++NumPaths;
1217  break;
1218  } else if (!Blocked.test(W)) {
1219  if (circuit(W, S, NodeSets,
1220  Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
1221  F = true;
1222  }
1223  }
1224 
1225  if (F)
1226  unblock(V);
1227  else {
1228  for (auto W : AdjK[V]) {
1229  if (W < S)
1230  continue;
1231  if (B[W].count(SV) == 0)
1232  B[W].insert(SV);
1233  }
1234  }
1235  Stack.pop_back();
1236  return F;
1237 }
1238 
1239 /// Unblock a node in the circuit finding algorithm.
1240 void SwingSchedulerDAG::Circuits::unblock(int U) {
1241  Blocked.reset(U);
1242  SmallPtrSet<SUnit *, 4> &BU = B[U];
1243  while (!BU.empty()) {
1245  assert(SI != BU.end() && "Invalid B set.");
1246  SUnit *W = *SI;
1247  BU.erase(W);
1248  if (Blocked.test(W->NodeNum))
1249  unblock(W->NodeNum);
1250  }
1251 }
1252 
1253 /// Identify all the elementary circuits in the dependence graph using
1254 /// Johnson's circuit algorithm.
1255 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1256  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1257  // but we do this to find the circuits, and then change them back.
1258  swapAntiDependences(SUnits);
1259 
1260  Circuits Cir(SUnits, Topo);
1261  // Create the adjacency structure.
1262  Cir.createAdjacencyStructure(this);
1263  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1264  Cir.reset();
1265  Cir.circuit(i, i, NodeSets);
1266  }
1267 
1268  // Change the dependences back so that we've created a DAG again.
1269  swapAntiDependences(SUnits);
1270 }
1271 
1272 // Create artificial dependencies between the source of COPY/REG_SEQUENCE that
1273 // is loop-carried to the USE in next iteration. This will help pipeliner avoid
1274 // additional copies that are needed across iterations. An artificial dependence
1275 // edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
1276 
1277 // PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
1278 // SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
1279 // PHI-------True-Dep------> USEOfPhi
1280 
1281 // The mutation creates
1282 // USEOfPHI -------Artificial-Dep---> SRCOfCopy
1283 
1284 // This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
1285 // (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
1286 // late to avoid additional copies across iterations. The possible scheduling
1287 // order would be
1288 // USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
1289 
1291  for (SUnit &SU : DAG->SUnits) {
1292  // Find the COPY/REG_SEQUENCE instruction.
1293  if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
1294  continue;
1295 
1296  // Record the loop carried PHIs.
1297  SmallVector<SUnit *, 4> PHISUs;
1298  // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
1299  SmallVector<SUnit *, 4> SrcSUs;
1300 
1301  for (auto &Dep : SU.Preds) {
1302  SUnit *TmpSU = Dep.getSUnit();
1303  MachineInstr *TmpMI = TmpSU->getInstr();
1304  SDep::Kind DepKind = Dep.getKind();
1305  // Save the loop carried PHI.
1306  if (DepKind == SDep::Anti && TmpMI->isPHI())
1307  PHISUs.push_back(TmpSU);
1308  // Save the source of COPY/REG_SEQUENCE.
1309  // If the source has no pre-decessors, we will end up creating cycles.
1310  else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)
1311  SrcSUs.push_back(TmpSU);
1312  }
1313 
1314  if (PHISUs.size() == 0 || SrcSUs.size() == 0)
1315  continue;
1316 
1317  // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
1318  // SUnit to the container.
1319  SmallVector<SUnit *, 8> UseSUs;
1320  for (auto I = PHISUs.begin(); I != PHISUs.end(); ++I) {
1321  for (auto &Dep : (*I)->Succs) {
1322  if (Dep.getKind() != SDep::Data)
1323  continue;
1324 
1325  SUnit *TmpSU = Dep.getSUnit();
1326  MachineInstr *TmpMI = TmpSU->getInstr();
1327  if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
1328  PHISUs.push_back(TmpSU);
1329  continue;
1330  }
1331  UseSUs.push_back(TmpSU);
1332  }
1333  }
1334 
1335  if (UseSUs.size() == 0)
1336  continue;
1337 
1338  SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
1339  // Add the artificial dependencies if it does not form a cycle.
1340  for (auto I : UseSUs) {
1341  for (auto Src : SrcSUs) {
1342  if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
1343  Src->addPred(SDep(I, SDep::Artificial));
1344  SDAG->Topo.AddPred(Src, I);
1345  }
1346  }
1347  }
1348  }
1349 }
1350 
1351 /// Return true for DAG nodes that we ignore when computing the cost functions.
1352 /// We ignore the back-edge recurrence in order to avoid unbounded recursion
1353 /// in the calculation of the ASAP, ALAP, etc functions.
1354 static bool ignoreDependence(const SDep &D, bool isPred) {
1355  if (D.isArtificial())
1356  return true;
1357  return D.getKind() == SDep::Anti && isPred;
1358 }
1359 
1360 /// Compute several functions need to order the nodes for scheduling.
1361 /// ASAP - Earliest time to schedule a node.
1362 /// ALAP - Latest time to schedule a node.
1363 /// MOV - Mobility function, difference between ALAP and ASAP.
1364 /// D - Depth of each node.
1365 /// H - Height of each node.
1366 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1367  ScheduleInfo.resize(SUnits.size());
1368 
1369  LLVM_DEBUG({
1370  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
1371  E = Topo.end();
1372  I != E; ++I) {
1373  const SUnit &SU = SUnits[*I];
1374  dumpNode(SU);
1375  }
1376  });
1377 
1378  int maxASAP = 0;
1379  // Compute ASAP and ZeroLatencyDepth.
1380  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
1381  E = Topo.end();
1382  I != E; ++I) {
1383  int asap = 0;
1384  int zeroLatencyDepth = 0;
1385  SUnit *SU = &SUnits[*I];
1386  for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
1387  EP = SU->Preds.end();
1388  IP != EP; ++IP) {
1389  SUnit *pred = IP->getSUnit();
1390  if (IP->getLatency() == 0)
1391  zeroLatencyDepth =
1392  std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
1393  if (ignoreDependence(*IP, true))
1394  continue;
1395  asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() -
1396  getDistance(pred, SU, *IP) * MII));
1397  }
1398  maxASAP = std::max(maxASAP, asap);
1399  ScheduleInfo[*I].ASAP = asap;
1400  ScheduleInfo[*I].ZeroLatencyDepth = zeroLatencyDepth;
1401  }
1402 
1403  // Compute ALAP, ZeroLatencyHeight, and MOV.
1405  E = Topo.rend();
1406  I != E; ++I) {
1407  int alap = maxASAP;
1408  int zeroLatencyHeight = 0;
1409  SUnit *SU = &SUnits[*I];
1410  for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
1411  ES = SU->Succs.end();
1412  IS != ES; ++IS) {
1413  SUnit *succ = IS->getSUnit();
1414  if (IS->getLatency() == 0)
1415  zeroLatencyHeight =
1416  std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
1417  if (ignoreDependence(*IS, true))
1418  continue;
1419  alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() +
1420  getDistance(SU, succ, *IS) * MII));
1421  }
1422 
1423  ScheduleInfo[*I].ALAP = alap;
1424  ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight;
1425  }
1426 
1427  // After computing the node functions, compute the summary for each node set.
1428  for (NodeSet &I : NodeSets)
1429  I.computeNodeSetInfo(this);
1430 
1431  LLVM_DEBUG({
1432  for (unsigned i = 0; i < SUnits.size(); i++) {
1433  dbgs() << "\tNode " << i << ":\n";
1434  dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
1435  dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
1436  dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
1437  dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
1438  dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
1439  dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
1440  dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
1441  }
1442  });
1443 }
1444 
1445 /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1446 /// as the predecessors of the elements of NodeOrder that are not also in
1447 /// NodeOrder.
1450  const NodeSet *S = nullptr) {
1451  Preds.clear();
1452  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1453  I != E; ++I) {
1454  for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
1455  PI != PE; ++PI) {
1456  if (S && S->count(PI->getSUnit()) == 0)
1457  continue;
1458  if (ignoreDependence(*PI, true))
1459  continue;
1460  if (NodeOrder.count(PI->getSUnit()) == 0)
1461  Preds.insert(PI->getSUnit());
1462  }
1463  // Back-edges are predecessors with an anti-dependence.
1464  for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
1465  ES = (*I)->Succs.end();
1466  IS != ES; ++IS) {
1467  if (IS->getKind() != SDep::Anti)
1468  continue;
1469  if (S && S->count(IS->getSUnit()) == 0)
1470  continue;
1471  if (NodeOrder.count(IS->getSUnit()) == 0)
1472  Preds.insert(IS->getSUnit());
1473  }
1474  }
1475  return !Preds.empty();
1476 }
1477 
1478 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1479 /// as the successors of the elements of NodeOrder that are not also in
1480 /// NodeOrder.
1483  const NodeSet *S = nullptr) {
1484  Succs.clear();
1485  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1486  I != E; ++I) {
1487  for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
1488  SI != SE; ++SI) {
1489  if (S && S->count(SI->getSUnit()) == 0)
1490  continue;
1491  if (ignoreDependence(*SI, false))
1492  continue;
1493  if (NodeOrder.count(SI->getSUnit()) == 0)
1494  Succs.insert(SI->getSUnit());
1495  }
1496  for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
1497  PE = (*I)->Preds.end();
1498  PI != PE; ++PI) {
1499  if (PI->getKind() != SDep::Anti)
1500  continue;
1501  if (S && S->count(PI->getSUnit()) == 0)
1502  continue;
1503  if (NodeOrder.count(PI->getSUnit()) == 0)
1504  Succs.insert(PI->getSUnit());
1505  }
1506  }
1507  return !Succs.empty();
1508 }
1509 
1510 /// Return true if there is a path from the specified node to any of the nodes
1511 /// in DestNodes. Keep track and return the nodes in any path.
1512 static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
1513  SetVector<SUnit *> &DestNodes,
1514  SetVector<SUnit *> &Exclude,
1515  SmallPtrSet<SUnit *, 8> &Visited) {
1516  if (Cur->isBoundaryNode())
1517  return false;
1518  if (Exclude.count(Cur) != 0)
1519  return false;
1520  if (DestNodes.count(Cur) != 0)
1521  return true;
1522  if (!Visited.insert(Cur).second)
1523  return Path.count(Cur) != 0;
1524  bool FoundPath = false;
1525  for (auto &SI : Cur->Succs)
1526  FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1527  for (auto &PI : Cur->Preds)
1528  if (PI.getKind() == SDep::Anti)
1529  FoundPath |=
1530  computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1531  if (FoundPath)
1532  Path.insert(Cur);
1533  return FoundPath;
1534 }
1535 
1536 /// Return true if Set1 is a subset of Set2.
1537 template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
1538  for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I)
1539  if (Set2.count(*I) == 0)
1540  return false;
1541  return true;
1542 }
1543 
1544 /// Compute the live-out registers for the instructions in a node-set.
1545 /// The live-out registers are those that are defined in the node-set,
1546 /// but not used. Except for use operands of Phis.
1548  NodeSet &NS) {
1552  SmallSet<unsigned, 4> Uses;
1553  for (SUnit *SU : NS) {
1554  const MachineInstr *MI = SU->getInstr();
1555  if (MI->isPHI())
1556  continue;
1557  for (const MachineOperand &MO : MI->operands())
1558  if (MO.isReg() && MO.isUse()) {
1559  Register Reg = MO.getReg();
1560  if (Register::isVirtualRegister(Reg))
1561  Uses.insert(Reg);
1562  else if (MRI.isAllocatable(Reg))
1563  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1564  Uses.insert(*Units);
1565  }
1566  }
1567  for (SUnit *SU : NS)
1568  for (const MachineOperand &MO : SU->getInstr()->operands())
1569  if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1570  Register Reg = MO.getReg();
1571  if (Register::isVirtualRegister(Reg)) {
1572  if (!Uses.count(Reg))
1573  LiveOutRegs.push_back(RegisterMaskPair(Reg,
1575  } else if (MRI.isAllocatable(Reg)) {
1576  for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1577  if (!Uses.count(*Units))
1578  LiveOutRegs.push_back(RegisterMaskPair(*Units,
1580  }
1581  }
1582  RPTracker.addLiveRegs(LiveOutRegs);
1583 }
1584 
1585 /// A heuristic to filter nodes in recurrent node-sets if the register
1586 /// pressure of a set is too high.
1587 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1588  for (auto &NS : NodeSets) {
1589  // Skip small node-sets since they won't cause register pressure problems.
1590  if (NS.size() <= 2)
1591  continue;
1592  IntervalPressure RecRegPressure;
1593  RegPressureTracker RecRPTracker(RecRegPressure);
1594  RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
1595  computeLiveOuts(MF, RecRPTracker, NS);
1596  RecRPTracker.closeBottom();
1597 
1598  std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1599  llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
1600  return A->NodeNum > B->NodeNum;
1601  });
1602 
1603  for (auto &SU : SUnits) {
1604  // Since we're computing the register pressure for a subset of the
1605  // instructions in a block, we need to set the tracker for each
1606  // instruction in the node-set. The tracker is set to the instruction
1607  // just after the one we're interested in.
1608  MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
1609  RecRPTracker.setPos(std::next(CurInstI));
1610 
1611  RegPressureDelta RPDelta;
1612  ArrayRef<PressureChange> CriticalPSets;
1613  RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
1614  CriticalPSets,
1615  RecRegPressure.MaxSetPressure);
1616  if (RPDelta.Excess.isValid()) {
1617  LLVM_DEBUG(
1618  dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
1619  << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
1620  << ":" << RPDelta.Excess.getUnitInc());
1621  NS.setExceedPressure(SU);
1622  break;
1623  }
1624  RecRPTracker.recede();
1625  }
1626  }
1627 }
1628 
1629 /// A heuristic to colocate node sets that have the same set of
1630 /// successors.
1631 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
1632  unsigned Colocate = 0;
1633  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
1634  NodeSet &N1 = NodeSets[i];
1636  if (N1.empty() || !succ_L(N1, S1))
1637  continue;
1638  for (int j = i + 1; j < e; ++j) {
1639  NodeSet &N2 = NodeSets[j];
1640  if (N1.compareRecMII(N2) != 0)
1641  continue;
1643  if (N2.empty() || !succ_L(N2, S2))
1644  continue;
1645  if (isSubset(S1, S2) && S1.size() == S2.size()) {
1646  N1.setColocate(++Colocate);
1647  N2.setColocate(Colocate);
1648  break;
1649  }
1650  }
1651  }
1652 }
1653 
1654 /// Check if the existing node-sets are profitable. If not, then ignore the
1655 /// recurrent node-sets, and attempt to schedule all nodes together. This is
1656 /// a heuristic. If the MII is large and all the recurrent node-sets are small,
1657 /// then it's best to try to schedule all instructions together instead of
1658 /// starting with the recurrent node-sets.
1659 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
1660  // Look for loops with a large MII.
1661  if (MII < 17)
1662  return;
1663  // Check if the node-set contains only a simple add recurrence.
1664  for (auto &NS : NodeSets) {
1665  if (NS.getRecMII() > 2)
1666  return;
1667  if (NS.getMaxDepth() > MII)
1668  return;
1669  }
1670  NodeSets.clear();
1671  LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
1672  return;
1673 }
1674 
1675 /// Add the nodes that do not belong to a recurrence set into groups
1676 /// based upon connected componenets.
1677 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
1678  SetVector<SUnit *> NodesAdded;
1679  SmallPtrSet<SUnit *, 8> Visited;
1680  // Add the nodes that are on a path between the previous node sets and
1681  // the current node set.
1682  for (NodeSet &I : NodeSets) {
1684  // Add the nodes from the current node set to the previous node set.
1685  if (succ_L(I, N)) {
1686  SetVector<SUnit *> Path;
1687  for (SUnit *NI : N) {
1688  Visited.clear();
1689  computePath(NI, Path, NodesAdded, I, Visited);
1690  }
1691  if (!Path.empty())
1692  I.insert(Path.begin(), Path.end());
1693  }
1694  // Add the nodes from the previous node set to the current node set.
1695  N.clear();
1696  if (succ_L(NodesAdded, N)) {
1697  SetVector<SUnit *> Path;
1698  for (SUnit *NI : N) {
1699  Visited.clear();
1700  computePath(NI, Path, I, NodesAdded, Visited);
1701  }
1702  if (!Path.empty())
1703  I.insert(Path.begin(), Path.end());
1704  }
1705  NodesAdded.insert(I.begin(), I.end());
1706  }
1707 
1708  // Create a new node set with the connected nodes of any successor of a node
1709  // in a recurrent set.
1710  NodeSet NewSet;
1712  if (succ_L(NodesAdded, N))
1713  for (SUnit *I : N)
1714  addConnectedNodes(I, NewSet, NodesAdded);
1715  if (!NewSet.empty())
1716  NodeSets.push_back(NewSet);
1717 
1718  // Create a new node set with the connected nodes of any predecessor of a node
1719  // in a recurrent set.
1720  NewSet.clear();
1721  if (pred_L(NodesAdded, N))
1722  for (SUnit *I : N)
1723  addConnectedNodes(I, NewSet, NodesAdded);
1724  if (!NewSet.empty())
1725  NodeSets.push_back(NewSet);
1726 
1727  // Create new nodes sets with the connected nodes any remaining node that
1728  // has no predecessor.
1729  for (unsigned i = 0; i < SUnits.size(); ++i) {
1730  SUnit *SU = &SUnits[i];
1731  if (NodesAdded.count(SU) == 0) {
1732  NewSet.clear();
1733  addConnectedNodes(SU, NewSet, NodesAdded);
1734  if (!NewSet.empty())
1735  NodeSets.push_back(NewSet);
1736  }
1737  }
1738 }
1739 
1740 /// Add the node to the set, and add all of its connected nodes to the set.
1741 void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
1742  SetVector<SUnit *> &NodesAdded) {
1743  NewSet.insert(SU);
1744  NodesAdded.insert(SU);
1745  for (auto &SI : SU->Succs) {
1746  SUnit *Successor = SI.getSUnit();
1747  if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
1748  addConnectedNodes(Successor, NewSet, NodesAdded);
1749  }
1750  for (auto &PI : SU->Preds) {
1751  SUnit *Predecessor = PI.getSUnit();
1752  if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
1753  addConnectedNodes(Predecessor, NewSet, NodesAdded);
1754  }
1755 }
1756 
1757 /// Return true if Set1 contains elements in Set2. The elements in common
1758 /// are returned in a different container.
1759 static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
1760  SmallSetVector<SUnit *, 8> &Result) {
1761  Result.clear();
1762  for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
1763  SUnit *SU = Set1[i];
1764  if (Set2.count(SU) != 0)
1765  Result.insert(SU);
1766  }
1767  return !Result.empty();
1768 }
1769 
1770 /// Merge the recurrence node sets that have the same initial node.
1771 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
1772  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1773  ++I) {
1774  NodeSet &NI = *I;
1775  for (NodeSetType::iterator J = I + 1; J != E;) {
1776  NodeSet &NJ = *J;
1777  if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
1778  if (NJ.compareRecMII(NI) > 0)
1779  NI.setRecMII(NJ.getRecMII());
1780  for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
1781  ++NII)
1782  I->insert(*NII);
1783  NodeSets.erase(J);
1784  E = NodeSets.end();
1785  } else {
1786  ++J;
1787  }
1788  }
1789  }
1790 }
1791 
1792 /// Remove nodes that have been scheduled in previous NodeSets.
1793 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
1794  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1795  ++I)
1796  for (NodeSetType::iterator J = I + 1; J != E;) {
1797  J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
1798 
1799  if (J->empty()) {
1800  NodeSets.erase(J);
1801  E = NodeSets.end();
1802  } else {
1803  ++J;
1804  }
1805  }
1806 }
1807 
1808 /// Compute an ordered list of the dependence graph nodes, which
1809 /// indicates the order that the nodes will be scheduled. This is a
1810 /// two-level algorithm. First, a partial order is created, which
1811 /// consists of a list of sets ordered from highest to lowest priority.
1812 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
1814  NodeOrder.clear();
1815 
1816  for (auto &Nodes : NodeSets) {
1817  LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
1818  OrderKind Order;
1820  if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
1821  R.insert(N.begin(), N.end());
1822  Order = BottomUp;
1823  LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
1824  } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
1825  R.insert(N.begin(), N.end());
1826  Order = TopDown;
1827  LLVM_DEBUG(dbgs() << " Top down (succs) ");
1828  } else if (isIntersect(N, Nodes, R)) {
1829  // If some of the successors are in the existing node-set, then use the
1830  // top-down ordering.
1831  Order = TopDown;
1832  LLVM_DEBUG(dbgs() << " Top down (intersect) ");
1833  } else if (NodeSets.size() == 1) {
1834  for (auto &N : Nodes)
1835  if (N->Succs.size() == 0)
1836  R.insert(N);
1837  Order = BottomUp;
1838  LLVM_DEBUG(dbgs() << " Bottom up (all) ");
1839  } else {
1840  // Find the node with the highest ASAP.
1841  SUnit *maxASAP = nullptr;
1842  for (SUnit *SU : Nodes) {
1843  if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||
1844  (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))
1845  maxASAP = SU;
1846  }
1847  R.insert(maxASAP);
1848  Order = BottomUp;
1849  LLVM_DEBUG(dbgs() << " Bottom up (default) ");
1850  }
1851 
1852  while (!R.empty()) {
1853  if (Order == TopDown) {
1854  // Choose the node with the maximum height. If more than one, choose
1855  // the node wiTH the maximum ZeroLatencyHeight. If still more than one,
1856  // choose the node with the lowest MOV.
1857  while (!R.empty()) {
1858  SUnit *maxHeight = nullptr;
1859  for (SUnit *I : R) {
1860  if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
1861  maxHeight = I;
1862  else if (getHeight(I) == getHeight(maxHeight) &&
1863  getZeroLatencyHeight(I) > getZeroLatencyHeight(maxHeight))
1864  maxHeight = I;
1865  else if (getHeight(I) == getHeight(maxHeight) &&
1866  getZeroLatencyHeight(I) ==
1867  getZeroLatencyHeight(maxHeight) &&
1868  getMOV(I) < getMOV(maxHeight))
1869  maxHeight = I;
1870  }
1871  NodeOrder.insert(maxHeight);
1872  LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
1873  R.remove(maxHeight);
1874  for (const auto &I : maxHeight->Succs) {
1875  if (Nodes.count(I.getSUnit()) == 0)
1876  continue;
1877  if (NodeOrder.count(I.getSUnit()) != 0)
1878  continue;
1879  if (ignoreDependence(I, false))
1880  continue;
1881  R.insert(I.getSUnit());
1882  }
1883  // Back-edges are predecessors with an anti-dependence.
1884  for (const auto &I : maxHeight->Preds) {
1885  if (I.getKind() != SDep::Anti)
1886  continue;
1887  if (Nodes.count(I.getSUnit()) == 0)
1888  continue;
1889  if (NodeOrder.count(I.getSUnit()) != 0)
1890  continue;
1891  R.insert(I.getSUnit());
1892  }
1893  }
1894  Order = BottomUp;
1895  LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
1897  if (pred_L(NodeOrder, N, &Nodes))
1898  R.insert(N.begin(), N.end());
1899  } else {
1900  // Choose the node with the maximum depth. If more than one, choose
1901  // the node with the maximum ZeroLatencyDepth. If still more than one,
1902  // choose the node with the lowest MOV.
1903  while (!R.empty()) {
1904  SUnit *maxDepth = nullptr;
1905  for (SUnit *I : R) {
1906  if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
1907  maxDepth = I;
1908  else if (getDepth(I) == getDepth(maxDepth) &&
1909  getZeroLatencyDepth(I) > getZeroLatencyDepth(maxDepth))
1910  maxDepth = I;
1911  else if (getDepth(I) == getDepth(maxDepth) &&
1912  getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&
1913  getMOV(I) < getMOV(maxDepth))
1914  maxDepth = I;
1915  }
1916  NodeOrder.insert(maxDepth);
1917  LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
1918  R.remove(maxDepth);
1919  if (Nodes.isExceedSU(maxDepth)) {
1920  Order = TopDown;
1921  R.clear();
1922  R.insert(Nodes.getNode(0));
1923  break;
1924  }
1925  for (const auto &I : maxDepth->Preds) {
1926  if (Nodes.count(I.getSUnit()) == 0)
1927  continue;
1928  if (NodeOrder.count(I.getSUnit()) != 0)
1929  continue;
1930  R.insert(I.getSUnit());
1931  }
1932  // Back-edges are predecessors with an anti-dependence.
1933  for (const auto &I : maxDepth->Succs) {
1934  if (I.getKind() != SDep::Anti)
1935  continue;
1936  if (Nodes.count(I.getSUnit()) == 0)
1937  continue;
1938  if (NodeOrder.count(I.getSUnit()) != 0)
1939  continue;
1940  R.insert(I.getSUnit());
1941  }
1942  }
1943  Order = TopDown;
1944  LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
1946  if (succ_L(NodeOrder, N, &Nodes))
1947  R.insert(N.begin(), N.end());
1948  }
1949  }
1950  LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
1951  }
1952 
1953  LLVM_DEBUG({
1954  dbgs() << "Node order: ";
1955  for (SUnit *I : NodeOrder)
1956  dbgs() << " " << I->NodeNum << " ";
1957  dbgs() << "\n";
1958  });
1959 }
1960 
1961 /// Process the nodes in the computed order and create the pipelined schedule
1962 /// of the instructions, if possible. Return true if a schedule is found.
1963 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
1964 
1965  if (NodeOrder.empty()){
1966  LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
1967  return false;
1968  }
1969 
1970  bool scheduleFound = false;
1971  unsigned II = 0;
1972  // Keep increasing II until a valid schedule is found.
1973  for (II = MII; II <= MAX_II && !scheduleFound; ++II) {
1974  Schedule.reset();
1975  Schedule.setInitiationInterval(II);
1976  LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
1977 
1980  do {
1981  SUnit *SU = *NI;
1982 
1983  // Compute the schedule time for the instruction, which is based
1984  // upon the scheduled time for any predecessors/successors.
1985  int EarlyStart = INT_MIN;
1986  int LateStart = INT_MAX;
1987  // These values are set when the size of the schedule window is limited
1988  // due to chain dependences.
1989  int SchedEnd = INT_MAX;
1990  int SchedStart = INT_MIN;
1991  Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
1992  II, this);
1993  LLVM_DEBUG({
1994  dbgs() << "\n";
1995  dbgs() << "Inst (" << SU->NodeNum << ") ";
1996  SU->getInstr()->dump();
1997  dbgs() << "\n";
1998  });
1999  LLVM_DEBUG({
2000  dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
2001  LateStart, SchedEnd, SchedStart);
2002  });
2003 
2004  if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
2005  SchedStart > LateStart)
2006  scheduleFound = false;
2007  else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
2008  SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
2009  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2010  } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
2011  SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
2012  scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
2013  } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2014  SchedEnd =
2015  std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
2016  // When scheduling a Phi it is better to start at the late cycle and go
2017  // backwards. The default order may insert the Phi too far away from
2018  // its first dependence.
2019  if (SU->getInstr()->isPHI())
2020  scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
2021  else
2022  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2023  } else {
2024  int FirstCycle = Schedule.getFirstCycle();
2025  scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
2026  FirstCycle + getASAP(SU) + II - 1, II);
2027  }
2028  // Even if we find a schedule, make sure the schedule doesn't exceed the
2029  // allowable number of stages. We keep trying if this happens.
2030  if (scheduleFound)
2031  if (SwpMaxStages > -1 &&
2032  Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
2033  scheduleFound = false;
2034 
2035  LLVM_DEBUG({
2036  if (!scheduleFound)
2037  dbgs() << "\tCan't schedule\n";
2038  });
2039  } while (++NI != NE && scheduleFound);
2040 
2041  // If a schedule is found, check if it is a valid schedule too.
2042  if (scheduleFound)
2043  scheduleFound = Schedule.isValidSchedule(this);
2044  }
2045 
2046  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II
2047  << ")\n");
2048 
2049  if (scheduleFound)
2050  Schedule.finalizeSchedule(this);
2051  else
2052  Schedule.reset();
2053 
2054  return scheduleFound && Schedule.getMaxStageCount() > 0;
2055 }
2056 
2057 /// Return true if we can compute the amount the instruction changes
2058 /// during each iteration. Set Delta to the amount of the change.
2059 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
2061  const MachineOperand *BaseOp;
2062  int64_t Offset;
2063  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
2064  return false;
2065 
2066  if (!BaseOp->isReg())
2067  return false;
2068 
2069  Register BaseReg = BaseOp->getReg();
2070 
2072  // Check if there is a Phi. If so, get the definition in the loop.
2073  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
2074  if (BaseDef && BaseDef->isPHI()) {
2075  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
2076  BaseDef = MRI.getVRegDef(BaseReg);
2077  }
2078  if (!BaseDef)
2079  return false;
2080 
2081  int D = 0;
2082  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
2083  return false;
2084 
2085  Delta = D;
2086  return true;
2087 }
2088 
2089 /// Check if we can change the instruction to use an offset value from the
2090 /// previous iteration. If so, return true and set the base and offset values
2091 /// so that we can rewrite the load, if necessary.
2092 /// v1 = Phi(v0, v3)
2093 /// v2 = load v1, 0
2094 /// v3 = post_store v1, 4, x
2095 /// This function enables the load to be rewritten as v2 = load v3, 4.
2096 bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
2097  unsigned &BasePos,
2098  unsigned &OffsetPos,
2099  unsigned &NewBase,
2100  int64_t &Offset) {
2101  // Get the load instruction.
2102  if (TII->isPostIncrement(*MI))
2103  return false;
2104  unsigned BasePosLd, OffsetPosLd;
2105  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
2106  return false;
2107  Register BaseReg = MI->getOperand(BasePosLd).getReg();
2108 
2109  // Look for the Phi instruction.
2111  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
2112  if (!Phi || !Phi->isPHI())
2113  return false;
2114  // Get the register defined in the loop block.
2115  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
2116  if (!PrevReg)
2117  return false;
2118 
2119  // Check for the post-increment load/store instruction.
2120  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
2121  if (!PrevDef || PrevDef == MI)
2122  return false;
2123 
2124  if (!TII->isPostIncrement(*PrevDef))
2125  return false;
2126 
2127  unsigned BasePos1 = 0, OffsetPos1 = 0;
2128  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
2129  return false;
2130 
2131  // Make sure that the instructions do not access the same memory location in
2132  // the next iteration.
2133  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
2134  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
2135  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2136  NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
2137  bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
2138  MF.DeleteMachineInstr(NewMI);
2139  if (!Disjoint)
2140  return false;
2141 
2142  // Set the return value once we determine that we return true.
2143  BasePos = BasePosLd;
2144  OffsetPos = OffsetPosLd;
2145  NewBase = PrevReg;
2146  Offset = StoreOffset;
2147  return true;
2148 }
2149 
2150 /// Apply changes to the instruction if needed. The changes are need
2151 /// to improve the scheduling and depend up on the final schedule.
2153  SMSchedule &Schedule) {
2154  SUnit *SU = getSUnit(MI);
2156  InstrChanges.find(SU);
2157  if (It != InstrChanges.end()) {
2158  std::pair<unsigned, int64_t> RegAndOffset = It->second;
2159  unsigned BasePos, OffsetPos;
2160  if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
2161  return;
2162  Register BaseReg = MI->getOperand(BasePos).getReg();
2163  MachineInstr *LoopDef = findDefInLoop(BaseReg);
2164  int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
2165  int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
2166  int BaseStageNum = Schedule.stageScheduled(SU);
2167  int BaseCycleNum = Schedule.cycleScheduled(SU);
2168  if (BaseStageNum < DefStageNum) {
2169  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2170  int OffsetDiff = DefStageNum - BaseStageNum;
2171  if (DefCycleNum < BaseCycleNum) {
2172  NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
2173  if (OffsetDiff > 0)
2174  --OffsetDiff;
2175  }
2176  int64_t NewOffset =
2177  MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2178  NewMI->getOperand(OffsetPos).setImm(NewOffset);
2179  SU->setInstr(NewMI);
2180  MISUnitMap[NewMI] = SU;
2181  NewMIs[MI] = NewMI;
2182  }
2183  }
2184 }
2185 
2186 /// Return the instruction in the loop that defines the register.
2187 /// If the definition is a Phi, then follow the Phi operand to
2188 /// the instruction in the loop.
2189 MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
2191  MachineInstr *Def = MRI.getVRegDef(Reg);
2192  while (Def->isPHI()) {
2193  if (!Visited.insert(Def).second)
2194  break;
2195  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
2196  if (Def->getOperand(i + 1).getMBB() == BB) {
2197  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
2198  break;
2199  }
2200  }
2201  return Def;
2202 }
2203 
2204 /// Return true for an order or output dependence that is loop carried
2205 /// potentially. A dependence is loop carried if the destination defines a valu
2206 /// that may be used or defined by the source in a subsequent iteration.
2208  bool isSucc) {
2209  if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
2210  Dep.isArtificial())
2211  return false;
2212 
2213  if (!SwpPruneLoopCarried)
2214  return true;
2215 
2216  if (Dep.getKind() == SDep::Output)
2217  return true;
2218 
2219  MachineInstr *SI = Source->getInstr();
2220  MachineInstr *DI = Dep.getSUnit()->getInstr();
2221  if (!isSucc)
2222  std::swap(SI, DI);
2223  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
2224 
2225  // Assume ordered loads and stores may have a loop carried dependence.
2226  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
2227  SI->mayRaiseFPException() || DI->mayRaiseFPException() ||
2229  return true;
2230 
2231  // Only chain dependences between a load and store can be loop carried.
2232  if (!DI->mayStore() || !SI->mayLoad())
2233  return false;
2234 
2235  unsigned DeltaS, DeltaD;
2236  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
2237  return true;
2238 
2239  const MachineOperand *BaseOpS, *BaseOpD;
2240  int64_t OffsetS, OffsetD;
2242  if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) ||
2243  !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, TRI))
2244  return true;
2245 
2246  if (!BaseOpS->isIdenticalTo(*BaseOpD))
2247  return true;
2248 
2249  // Check that the base register is incremented by a constant value for each
2250  // iteration.
2251  MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg());
2252  if (!Def || !Def->isPHI())
2253  return true;
2254  unsigned InitVal = 0;
2255  unsigned LoopVal = 0;
2256  getPhiRegs(*Def, BB, InitVal, LoopVal);
2257  MachineInstr *LoopDef = MRI.getVRegDef(LoopVal);
2258  int D = 0;
2259  if (!LoopDef || !TII->getIncrementValue(*LoopDef, D))
2260  return true;
2261 
2262  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
2263  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
2264 
2265  // This is the main test, which checks the offset values and the loop
2266  // increment value to determine if the accesses may be loop carried.
2267  if (AccessSizeS == MemoryLocation::UnknownSize ||
2268  AccessSizeD == MemoryLocation::UnknownSize)
2269  return true;
2270 
2271  if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD)
2272  return true;
2273 
2274  return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
2275 }
2276 
2277 void SwingSchedulerDAG::postprocessDAG() {
2278  for (auto &M : Mutations)
2279  M->apply(this);
2280 }
2281 
2282 /// Try to schedule the node at the specified StartCycle and continue
2283 /// until the node is schedule or the EndCycle is reached. This function
2284 /// returns true if the node is scheduled. This routine may search either
2285 /// forward or backward for a place to insert the instruction based upon
2286 /// the relative values of StartCycle and EndCycle.
2287 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
2288  bool forward = true;
2289  LLVM_DEBUG({
2290  dbgs() << "Trying to insert node between " << StartCycle << " and "
2291  << EndCycle << " II: " << II << "\n";
2292  });
2293  if (StartCycle > EndCycle)
2294  forward = false;
2295 
2296  // The terminating condition depends on the direction.
2297  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
2298  for (int curCycle = StartCycle; curCycle != termCycle;
2299  forward ? ++curCycle : --curCycle) {
2300 
2301  // Add the already scheduled instructions at the specified cycle to the
2302  // DFA.
2303  ProcItinResources.clearResources();
2304  for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
2305  checkCycle <= LastCycle; checkCycle += II) {
2306  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
2307 
2308  for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
2309  E = cycleInstrs.end();
2310  I != E; ++I) {
2311  if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
2312  continue;
2313  assert(ProcItinResources.canReserveResources(*(*I)->getInstr()) &&
2314  "These instructions have already been scheduled.");
2315  ProcItinResources.reserveResources(*(*I)->getInstr());
2316  }
2317  }
2318  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
2319  ProcItinResources.canReserveResources(*SU->getInstr())) {
2320  LLVM_DEBUG({
2321  dbgs() << "\tinsert at cycle " << curCycle << " ";
2322  SU->getInstr()->dump();
2323  });
2324 
2325  ScheduledInstrs[curCycle].push_back(SU);
2326  InstrToCycle.insert(std::make_pair(SU, curCycle));
2327  if (curCycle > LastCycle)
2328  LastCycle = curCycle;
2329  if (curCycle < FirstCycle)
2330  FirstCycle = curCycle;
2331  return true;
2332  }
2333  LLVM_DEBUG({
2334  dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
2335  SU->getInstr()->dump();
2336  });
2337  }
2338  return false;
2339 }
2340 
2341 // Return the cycle of the earliest scheduled instruction in the chain.
2343  SmallPtrSet<SUnit *, 8> Visited;
2344  SmallVector<SDep, 8> Worklist;
2345  Worklist.push_back(Dep);
2346  int EarlyCycle = INT_MAX;
2347  while (!Worklist.empty()) {
2348  const SDep &Cur = Worklist.pop_back_val();
2349  SUnit *PrevSU = Cur.getSUnit();
2350  if (Visited.count(PrevSU))
2351  continue;
2352  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
2353  if (it == InstrToCycle.end())
2354  continue;
2355  EarlyCycle = std::min(EarlyCycle, it->second);
2356  for (const auto &PI : PrevSU->Preds)
2357  if (PI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
2358  Worklist.push_back(PI);
2359  Visited.insert(PrevSU);
2360  }
2361  return EarlyCycle;
2362 }
2363 
2364 // Return the cycle of the latest scheduled instruction in the chain.
2366  SmallPtrSet<SUnit *, 8> Visited;
2367  SmallVector<SDep, 8> Worklist;
2368  Worklist.push_back(Dep);
2369  int LateCycle = INT_MIN;
2370  while (!Worklist.empty()) {
2371  const SDep &Cur = Worklist.pop_back_val();
2372  SUnit *SuccSU = Cur.getSUnit();
2373  if (Visited.count(SuccSU))
2374  continue;
2375  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
2376  if (it == InstrToCycle.end())
2377  continue;
2378  LateCycle = std::max(LateCycle, it->second);
2379  for (const auto &SI : SuccSU->Succs)
2380  if (SI.getKind() == SDep::Order || Dep.getKind() == SDep::Output)
2381  Worklist.push_back(SI);
2382  Visited.insert(SuccSU);
2383  }
2384  return LateCycle;
2385 }
2386 
2387 /// If an instruction has a use that spans multiple iterations, then
2388 /// return true. These instructions are characterized by having a back-ege
2389 /// to a Phi, which contains a reference to another Phi.
2391  for (auto &P : SU->Preds)
2392  if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
2393  for (auto &S : P.getSUnit()->Succs)
2394  if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
2395  return P.getSUnit();
2396  return nullptr;
2397 }
2398 
2399 /// Compute the scheduling start slot for the instruction. The start slot
2400 /// depends on any predecessor or successor nodes scheduled already.
2401 void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
2402  int *MinEnd, int *MaxStart, int II,
2403  SwingSchedulerDAG *DAG) {
2404  // Iterate over each instruction that has been scheduled already. The start
2405  // slot computation depends on whether the previously scheduled instruction
2406  // is a predecessor or successor of the specified instruction.
2407  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
2408 
2409  // Iterate over each instruction in the current cycle.
2410  for (SUnit *I : getInstructions(cycle)) {
2411  // Because we're processing a DAG for the dependences, we recognize
2412  // the back-edge in recurrences by anti dependences.
2413  for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
2414  const SDep &Dep = SU->Preds[i];
2415  if (Dep.getSUnit() == I) {
2416  if (!DAG->isBackedge(SU, Dep)) {
2417  int EarlyStart = cycle + Dep.getLatency() -
2418  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
2419  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2420  if (DAG->isLoopCarriedDep(SU, Dep, false)) {
2421  int End = earliestCycleInChain(Dep) + (II - 1);
2422  *MinEnd = std::min(*MinEnd, End);
2423  }
2424  } else {
2425  int LateStart = cycle - Dep.getLatency() +
2426  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
2427  *MinLateStart = std::min(*MinLateStart, LateStart);
2428  }
2429  }
2430  // For instruction that requires multiple iterations, make sure that
2431  // the dependent instruction is not scheduled past the definition.
2432  SUnit *BE = multipleIterations(I, DAG);
2433  if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
2434  !SU->isPred(I))
2435  *MinLateStart = std::min(*MinLateStart, cycle);
2436  }
2437  for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {
2438  if (SU->Succs[i].getSUnit() == I) {
2439  const SDep &Dep = SU->Succs[i];
2440  if (!DAG->isBackedge(SU, Dep)) {
2441  int LateStart = cycle - Dep.getLatency() +
2442  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
2443  *MinLateStart = std::min(*MinLateStart, LateStart);
2444  if (DAG->isLoopCarriedDep(SU, Dep)) {
2445  int Start = latestCycleInChain(Dep) + 1 - II;
2446  *MaxStart = std::max(*MaxStart, Start);
2447  }
2448  } else {
2449  int EarlyStart = cycle + Dep.getLatency() -
2450  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
2451  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2452  }
2453  }
2454  }
2455  }
2456  }
2457 }
2458 
2459 /// Order the instructions within a cycle so that the definitions occur
2460 /// before the uses. Returns true if the instruction is added to the start
2461 /// of the list, or false if added to the end.
2463  std::deque<SUnit *> &Insts) {
2464  MachineInstr *MI = SU->getInstr();
2465  bool OrderBeforeUse = false;
2466  bool OrderAfterDef = false;
2467  bool OrderBeforeDef = false;
2468  unsigned MoveDef = 0;
2469  unsigned MoveUse = 0;
2470  int StageInst1 = stageScheduled(SU);
2471 
2472  unsigned Pos = 0;
2473  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
2474  ++I, ++Pos) {
2475  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
2476  MachineOperand &MO = MI->getOperand(i);
2477  if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
2478  continue;
2479 
2480  Register Reg = MO.getReg();
2481  unsigned BasePos, OffsetPos;
2482  if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
2483  if (MI->getOperand(BasePos).getReg() == Reg)
2484  if (unsigned NewReg = SSD->getInstrBaseReg(SU))
2485  Reg = NewReg;
2486  bool Reads, Writes;
2487  std::tie(Reads, Writes) =
2488  (*I)->getInstr()->readsWritesVirtualRegister(Reg);
2489  if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
2490  OrderBeforeUse = true;
2491  if (MoveUse == 0)
2492  MoveUse = Pos;
2493  } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
2494  // Add the instruction after the scheduled instruction.
2495  OrderAfterDef = true;
2496  MoveDef = Pos;
2497  } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
2498  if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
2499  OrderBeforeUse = true;
2500  if (MoveUse == 0)
2501  MoveUse = Pos;
2502  } else {
2503  OrderAfterDef = true;
2504  MoveDef = Pos;
2505  }
2506  } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
2507  OrderBeforeUse = true;
2508  if (MoveUse == 0)
2509  MoveUse = Pos;
2510  if (MoveUse != 0) {
2511  OrderAfterDef = true;
2512  MoveDef = Pos - 1;
2513  }
2514  } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
2515  // Add the instruction before the scheduled instruction.
2516  OrderBeforeUse = true;
2517  if (MoveUse == 0)
2518  MoveUse = Pos;
2519  } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
2520  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
2521  if (MoveUse == 0) {
2522  OrderBeforeDef = true;
2523  MoveUse = Pos;
2524  }
2525  }
2526  }
2527  // Check for order dependences between instructions. Make sure the source
2528  // is ordered before the destination.
2529  for (auto &S : SU->Succs) {
2530  if (S.getSUnit() != *I)
2531  continue;
2532  if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
2533  OrderBeforeUse = true;
2534  if (Pos < MoveUse)
2535  MoveUse = Pos;
2536  }
2537  // We did not handle HW dependences in previous for loop,
2538  // and we normally set Latency = 0 for Anti deps,
2539  // so may have nodes in same cycle with Anti denpendent on HW regs.
2540  else if (S.getKind() == SDep::Anti && stageScheduled(*I) == StageInst1) {
2541  OrderBeforeUse = true;
2542  if ((MoveUse == 0) || (Pos < MoveUse))
2543  MoveUse = Pos;
2544  }
2545  }
2546  for (auto &P : SU->Preds) {
2547  if (P.getSUnit() != *I)
2548  continue;
2549  if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
2550  OrderAfterDef = true;
2551  MoveDef = Pos;
2552  }
2553  }
2554  }
2555 
2556  // A circular dependence.
2557  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
2558  OrderBeforeUse = false;
2559 
2560  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
2561  // to a loop-carried dependence.
2562  if (OrderBeforeDef)
2563  OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
2564 
2565  // The uncommon case when the instruction order needs to be updated because
2566  // there is both a use and def.
2567  if (OrderBeforeUse && OrderAfterDef) {
2568  SUnit *UseSU = Insts.at(MoveUse);
2569  SUnit *DefSU = Insts.at(MoveDef);
2570  if (MoveUse > MoveDef) {
2571  Insts.erase(Insts.begin() + MoveUse);
2572  Insts.erase(Insts.begin() + MoveDef);
2573  } else {
2574  Insts.erase(Insts.begin() + MoveDef);
2575  Insts.erase(Insts.begin() + MoveUse);
2576  }
2577  orderDependence(SSD, UseSU, Insts);
2578  orderDependence(SSD, SU, Insts);
2579  orderDependence(SSD, DefSU, Insts);
2580  return;
2581  }
2582  // Put the new instruction first if there is a use in the list. Otherwise,
2583  // put it at the end of the list.
2584  if (OrderBeforeUse)
2585  Insts.push_front(SU);
2586  else
2587  Insts.push_back(SU);
2588 }
2589 
2590 /// Return true if the scheduled Phi has a loop carried operand.
2592  if (!Phi.isPHI())
2593  return false;
2594  assert(Phi.isPHI() && "Expecting a Phi.");
2595  SUnit *DefSU = SSD->getSUnit(&Phi);
2596  unsigned DefCycle = cycleScheduled(DefSU);
2597  int DefStage = stageScheduled(DefSU);
2598 
2599  unsigned InitVal = 0;
2600  unsigned LoopVal = 0;
2601  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
2602  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
2603  if (!UseSU)
2604  return true;
2605  if (UseSU->getInstr()->isPHI())
2606  return true;
2607  unsigned LoopCycle = cycleScheduled(UseSU);
2608  int LoopStage = stageScheduled(UseSU);
2609  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
2610 }
2611 
2612 /// Return true if the instruction is a definition that is loop carried
2613 /// and defines the use on the next iteration.
2614 /// v1 = phi(v2, v3)
2615 /// (Def) v3 = op v1
2616 /// (MO) = v1
2617 /// If MO appears before Def, then then v1 and v3 may get assigned to the same
2618 /// register.
2621  if (!MO.isReg())
2622  return false;
2623  if (Def->isPHI())
2624  return false;
2625  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
2626  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
2627  return false;
2628  if (!isLoopCarried(SSD, *Phi))
2629  return false;
2630  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
2631  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
2632  MachineOperand &DMO = Def->getOperand(i);
2633  if (!DMO.isReg() || !DMO.isDef())
2634  continue;
2635  if (DMO.getReg() == LoopReg)
2636  return true;
2637  }
2638  return false;
2639 }
2640 
2641 // Check if the generated schedule is valid. This function checks if
2642 // an instruction that uses a physical register is scheduled in a
2643 // different stage than the definition. The pipeliner does not handle
2644 // physical register values that may cross a basic block boundary.
2646  for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) {
2647  SUnit &SU = SSD->SUnits[i];
2648  if (!SU.hasPhysRegDefs)
2649  continue;
2650  int StageDef = stageScheduled(&SU);
2651  assert(StageDef != -1 && "Instruction should have been scheduled.");
2652  for (auto &SI : SU.Succs)
2653  if (SI.isAssignedRegDep())
2654  if (Register::isPhysicalRegister(SI.getReg()))
2655  if (stageScheduled(SI.getSUnit()) != StageDef)
2656  return false;
2657  }
2658  return true;
2659 }
2660 
2661 /// A property of the node order in swing-modulo-scheduling is
2662 /// that for nodes outside circuits the following holds:
2663 /// none of them is scheduled after both a successor and a
2664 /// predecessor.
2665 /// The method below checks whether the property is met.
2666 /// If not, debug information is printed and statistics information updated.
2667 /// Note that we do not use an assert statement.
2668 /// The reason is that although an invalid node oder may prevent
2669 /// the pipeliner from finding a pipelined schedule for arbitrary II,
2670 /// it does not lead to the generation of incorrect code.
2671 void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
2672 
2673  // a sorted vector that maps each SUnit to its index in the NodeOrder
2674  typedef std::pair<SUnit *, unsigned> UnitIndex;
2675  std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
2676 
2677  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
2678  Indices.push_back(std::make_pair(NodeOrder[i], i));
2679 
2680  auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
2681  return std::get<0>(i1) < std::get<0>(i2);
2682  };
2683 
2684  // sort, so that we can perform a binary search
2685  llvm::sort(Indices, CompareKey);
2686 
2687  bool Valid = true;
2688  (void)Valid;
2689  // for each SUnit in the NodeOrder, check whether
2690  // it appears after both a successor and a predecessor
2691  // of the SUnit. If this is the case, and the SUnit
2692  // is not part of circuit, then the NodeOrder is not
2693  // valid.
2694  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
2695  SUnit *SU = NodeOrder[i];
2696  unsigned Index = i;
2697 
2698  bool PredBefore = false;
2699  bool SuccBefore = false;
2700 
2701  SUnit *Succ;
2702  SUnit *Pred;
2703  (void)Succ;
2704  (void)Pred;
2705 
2706  for (SDep &PredEdge : SU->Preds) {
2707  SUnit *PredSU = PredEdge.getSUnit();
2708  unsigned PredIndex = std::get<1>(
2709  *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));
2710  if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
2711  PredBefore = true;
2712  Pred = PredSU;
2713  break;
2714  }
2715  }
2716 
2717  for (SDep &SuccEdge : SU->Succs) {
2718  SUnit *SuccSU = SuccEdge.getSUnit();
2719  // Do not process a boundary node, it was not included in NodeOrder,
2720  // hence not in Indices either, call to std::lower_bound() below will
2721  // return Indices.end().
2722  if (SuccSU->isBoundaryNode())
2723  continue;
2724  unsigned SuccIndex = std::get<1>(
2725  *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey));
2726  if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
2727  SuccBefore = true;
2728  Succ = SuccSU;
2729  break;
2730  }
2731  }
2732 
2733  if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
2734  // instructions in circuits are allowed to be scheduled
2735  // after both a successor and predecessor.
2736  bool InCircuit = llvm::any_of(
2737  Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
2738  if (InCircuit)
2739  LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
2740  else {
2741  Valid = false;
2742  NumNodeOrderIssues++;
2743  LLVM_DEBUG(dbgs() << "Predecessor ";);
2744  }
2745  LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
2746  << " are scheduled before node " << SU->NodeNum
2747  << "\n";);
2748  }
2749  }
2750 
2751  LLVM_DEBUG({
2752  if (!Valid)
2753  dbgs() << "Invalid node order found!\n";
2754  });
2755 }
2756 
2757 /// Attempt to fix the degenerate cases when the instruction serialization
2758 /// causes the register lifetimes to overlap. For example,
2759 /// p' = store_pi(p, b)
2760 /// = load p, offset
2761 /// In this case p and p' overlap, which means that two registers are needed.
2762 /// Instead, this function changes the load to use p' and updates the offset.
2763 void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
2764  unsigned OverlapReg = 0;
2765  unsigned NewBaseReg = 0;
2766  for (SUnit *SU : Instrs) {
2767  MachineInstr *MI = SU->getInstr();
2768  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
2769  const MachineOperand &MO = MI->getOperand(i);
2770  // Look for an instruction that uses p. The instruction occurs in the
2771  // same cycle but occurs later in the serialized order.
2772  if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
2773  // Check that the instruction appears in the InstrChanges structure,
2774  // which contains instructions that can have the offset updated.
2776  InstrChanges.find(SU);
2777  if (It != InstrChanges.end()) {
2778  unsigned BasePos, OffsetPos;
2779  // Update the base register and adjust the offset.
2780  if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
2781  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2782  NewMI->getOperand(BasePos).setReg(NewBaseReg);
2783  int64_t NewOffset =
2784  MI->getOperand(OffsetPos).getImm() - It->second.second;
2785  NewMI->getOperand(OffsetPos).setImm(NewOffset);
2786  SU->setInstr(NewMI);
2787  MISUnitMap[NewMI] = SU;
2788  NewMIs[MI] = NewMI;
2789  }
2790  }
2791  OverlapReg = 0;
2792  NewBaseReg = 0;
2793  break;
2794  }
2795  // Look for an instruction of the form p' = op(p), which uses and defines
2796  // two virtual registers that get allocated to the same physical register.
2797  unsigned TiedUseIdx = 0;
2798  if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
2799  // OverlapReg is p in the example above.
2800  OverlapReg = MI->getOperand(TiedUseIdx).getReg();
2801  // NewBaseReg is p' in the example above.
2802  NewBaseReg = MI->getOperand(i).getReg();
2803  break;
2804  }
2805  }
2806  }
2807 }
2808 
2809 /// After the schedule has been formed, call this function to combine
2810 /// the instructions from the different stages/cycles. That is, this
2811 /// function creates a schedule that represents a single iteration.
2813  // Move all instructions to the first stage from later stages.
2814  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
2815  for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
2816  ++stage) {
2817  std::deque<SUnit *> &cycleInstrs =
2818  ScheduledInstrs[cycle + (stage * InitiationInterval)];
2819  for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
2820  E = cycleInstrs.rend();
2821  I != E; ++I)
2822  ScheduledInstrs[cycle].push_front(*I);
2823  }
2824  }
2825 
2826  // Erase all the elements in the later stages. Only one iteration should
2827  // remain in the scheduled list, and it contains all the instructions.
2828  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
2829  ScheduledInstrs.erase(cycle);
2830 
2831  // Change the registers in instruction as specified in the InstrChanges
2832  // map. We need to use the new registers to create the correct order.
2833  for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
2834  SUnit *SU = &SSD->SUnits[i];
2835  SSD->applyInstrChange(SU->getInstr(), *this);
2836  }
2837 
2838  // Reorder the instructions in each cycle to fix and improve the
2839  // generated code.
2840  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
2841  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
2842  std::deque<SUnit *> newOrderPhi;
2843  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
2844  SUnit *SU = cycleInstrs[i];
2845  if (SU->getInstr()->isPHI())
2846  newOrderPhi.push_back(SU);
2847  }
2848  std::deque<SUnit *> newOrderI;
2849  for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
2850  SUnit *SU = cycleInstrs[i];
2851  if (!SU->getInstr()->isPHI())
2852  orderDependence(SSD, SU, newOrderI);
2853  }
2854  // Replace the old order with the new order.
2855  cycleInstrs.swap(newOrderPhi);
2856  cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
2857  SSD->fixupRegisterOverlaps(cycleInstrs);
2858  }
2859 
2860  LLVM_DEBUG(dump(););
2861 }
2862 
2863 void NodeSet::print(raw_ostream &os) const {
2864  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
2865  << " depth " << MaxDepth << " col " << Colocate << "\n";
2866  for (const auto &I : Nodes)
2867  os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
2868  os << "\n";
2869 }
2870 
2871 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2872 /// Print the schedule information to the given output.
2874  // Iterate over each cycle.
2875  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
2876  // Iterate over each instruction in the cycle.
2877  const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
2878  for (SUnit *CI : cycleInstrs->second) {
2879  os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
2880  os << "(" << CI->NodeNum << ") ";
2881  CI->getInstr()->print(os);
2882  os << "\n";
2883  }
2884  }
2885 }
2886 
2887 /// Utility function used for debugging to print the schedule.
2890 
2891 #endif
2892 
2894  const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) {
2895  unsigned ProcResourceID = 0;
2896 
2897  // We currently limit the resource kinds to 64 and below so that we can use
2898  // uint64_t for Masks
2899  assert(SM.getNumProcResourceKinds() < 64 &&
2900  "Too many kinds of resources, unsupported");
2901  // Create a unique bitmask for every processor resource unit.
2902  // Skip resource at index 0, since it always references 'InvalidUnit'.
2903  Masks.resize(SM.getNumProcResourceKinds());
2904  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
2905  const MCProcResourceDesc &Desc = *SM.getProcResource(I);
2906  if (Desc.SubUnitsIdxBegin)
2907  continue;
2908  Masks[I] = 1ULL << ProcResourceID;
2909  ProcResourceID++;
2910  }
2911  // Create a unique bitmask for every processor resource group.
2912  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
2913  const MCProcResourceDesc &Desc = *SM.getProcResource(I);
2914  if (!Desc.SubUnitsIdxBegin)
2915  continue;
2916  Masks[I] = 1ULL << ProcResourceID;
2917  for (unsigned U = 0; U < Desc.NumUnits; ++U)
2918  Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];
2919  ProcResourceID++;
2920  }
2921  LLVM_DEBUG({
2922  if (SwpShowResMask) {
2923  dbgs() << "ProcResourceDesc:\n";
2924  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
2925  const MCProcResourceDesc *ProcResource = SM.getProcResource(I);
2926  dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
2927  ProcResource->Name, I, Masks[I],
2928  ProcResource->NumUnits);
2929  }
2930  dbgs() << " -----------------\n";
2931  }
2932  });
2933 }
2934 
2936 
2937  LLVM_DEBUG({
2938  if (SwpDebugResource)
2939  dbgs() << "canReserveResources:\n";
2940  });
2941  if (UseDFA)
2942  return DFAResources->canReserveResources(MID);
2943 
2944  unsigned InsnClass = MID->getSchedClass();
2945  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
2946  if (!SCDesc->isValid()) {
2947  LLVM_DEBUG({
2948  dbgs() << "No valid Schedule Class Desc for schedClass!\n";
2949  dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
2950  });
2951  return true;
2952  }
2953 
2954  const MCWriteProcResEntry *I = STI->getWriteProcResBegin(SCDesc);
2955  const MCWriteProcResEntry *E = STI->getWriteProcResEnd(SCDesc);
2956  for (; I != E; ++I) {
2957  if (!I->Cycles)
2958  continue;
2959  const MCProcResourceDesc *ProcResource =
2960  SM.getProcResource(I->ProcResourceIdx);
2961  unsigned NumUnits = ProcResource->NumUnits;
2962  LLVM_DEBUG({
2963  if (SwpDebugResource)
2964  dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
2965  ProcResource->Name, I->ProcResourceIdx,
2966  ProcResourceCount[I->ProcResourceIdx], NumUnits,
2967  I->Cycles);
2968  });
2969  if (ProcResourceCount[I->ProcResourceIdx] >= NumUnits)
2970  return false;
2971  }
2972  LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return true\n\n";);
2973  return true;
2974 }
2975 
2977  LLVM_DEBUG({
2978  if (SwpDebugResource)
2979  dbgs() << "reserveResources:\n";
2980  });
2981  if (UseDFA)
2982  return DFAResources->reserveResources(MID);
2983 
2984  unsigned InsnClass = MID->getSchedClass();
2985  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
2986  if (!SCDesc->isValid()) {
2987  LLVM_DEBUG({
2988  dbgs() << "No valid Schedule Class Desc for schedClass!\n";
2989  dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
2990  });
2991  return;
2992  }
2993  for (const MCWriteProcResEntry &PRE :
2994  make_range(STI->getWriteProcResBegin(SCDesc),
2995  STI->getWriteProcResEnd(SCDesc))) {
2996  if (!PRE.Cycles)
2997  continue;
2998  ++ProcResourceCount[PRE.ProcResourceIdx];
2999  LLVM_DEBUG({
3000  if (SwpDebugResource) {
3001  const MCProcResourceDesc *ProcResource =
3002  SM.getProcResource(PRE.ProcResourceIdx);
3003  dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
3004  ProcResource->Name, PRE.ProcResourceIdx,
3005  ProcResourceCount[PRE.ProcResourceIdx],
3006  ProcResource->NumUnits, PRE.Cycles);
3007  }
3008  });
3009  }
3010  LLVM_DEBUG({
3011  if (SwpDebugResource)
3012  dbgs() << "reserveResources: done!\n\n";
3013  });
3014 }
3015 
3017  return canReserveResources(&MI.getDesc());
3018 }
3019 
3021  return reserveResources(&MI.getDesc());
3022 }
3023 
3025  if (UseDFA)
3026  return DFAResources->clearResources();
3027  std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0);
3028 }
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
auto lower_bound(R &&Range, T &&Value) -> decltype(adl_begin(Range))
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1261
uint64_t CallInst * C
std::vector< int >::const_reverse_iterator const_reverse_iterator
Definition: ScheduleDAG.h:769
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII"))
virtual void finishBlock()
Cleans up after scheduling in the given block.
mop_iterator operands_end()
Definition: MachineInstr.h:472
RegisterClassInfo RegClassInfo
A common definition of LaneBitmask for use in TableGen and CodeGen.
void clear()
Definition: MapVector.h:88
static bool pred_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi...
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:651
instr_iterator instr_end()
MachineBasicBlock * getMBB() const
unsigned NumPreds
of SDep::Data preds.
Definition: ScheduleDAG.h:266
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
bool isAllocatable(unsigned PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn&#39;t been...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A reimplementation of ModuloScheduleExpander.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:476
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static constexpr LocationSize unknown()
Represents a schedule for a single-block loop.
bool empty() const
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
cl::opt< bool > SwpEnableCopyToPhi
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:63
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:178
bool isValidSchedule(SwingSchedulerDAG *SSD)
unsigned Reg
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
unsigned getSubReg() const
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:160
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition: MCSchedule.h:339
The two locations do not alias at all.
Definition: AliasAnalysis.h:84
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
bool isRegSequence() const
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:37
STATISTIC(NumFunctions, "Total number of functions")
unsigned const TargetRegisterInfo * TRI
A debug info location.
Definition: DebugLoc.h:33
Metadata node.
Definition: Metadata.h:863
F(f)
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1068
block Block Frequency true
bool isPseudo() const
Return true if this is a pseudo instruction that doesn&#39;t correspond to a real machine instruction...
Definition: MCInstrDesc.h:257
static void getUnderlyingObjects(const MachineInstr *MI, SmallVectorImpl< const Value *> &Objs, const DataLayout &DL)
Return the underlying objects for the memory references of an instruction.
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
unsigned getReg() const
Returns the register associated with this edge.
Definition: ScheduleDAG.h:218
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition: MCSchedule.h:346
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:52
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:477
bool isPHI() const
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:92
SmallVectorImpl< SDep >::iterator succ_iterator
Definition: ScheduleDAG.h:260
SUnit * getNode(unsigned i) const
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
bool canReserveResources(const MCInstrDesc *MID) const
Check if the resources occupied by a MCInstrDesc are available in the current state.
virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
int compareRecMII(NodeSet &RHS)
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
A description of a memory reference used in the backend.
SmallVectorImpl< SDep >::iterator pred_iterator
Definition: ScheduleDAG.h:259
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout, ignoring any parts of the loop not contiguous with the part that contains the header.
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA)
Return true if the instruction causes a chain between memory references before and after it...
void closeBottom()
Set the boundary for the bottom of the region and summarize live outs.
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
const HexagonInstrInfo * TII
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:366
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:261
Modulo Software Pipelining
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
void reserveResources(const MCInstrDesc *MID)
Reserve the resources occupied by a MCInstrDesc and change the current state to reflect that change...
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1217
This file contains the simple types necessary to represent the attributes associated with functions a...
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:82
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
BlockT * getHeader() const
Definition: LoopInfo.h:105
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
defusechain_iterator - This class provides iterator support for machine operands in the function that...
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:408
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
SlotIndexes pass.
Definition: SlotIndexes.h:314
void expand()
Performs the actual expansion.
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
Definition: MachineInstr.h:858
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
static bool succ_L(SetVector< SUnit *> &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
static bool isSubset(S1Ty &Set1, S2Ty &Set2)
Return true if Set1 is a subset of Set2.
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
void dump() const
Definition: Pass.cpp:134
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:244
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Itinerary data supplied by a subtarget to be used by a target.
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
void setReg(Register Reg)
Change the register this operand corresponds to.
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
unsigned count(SUnit *SU) const
bool isValid() const
Definition: MCSchedule.h:127
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator find(const KeyT &Key)
Definition: MapVector.h:147
bool insert(SUnit *SU)
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const
Return true if this load instruction never traps and points to a memory location whose value doesn&#39;t ...
#define DEBUG_TYPE
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:344
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:210
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(adl_begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1231
virtual const InstrItineraryData * getInstrItineraryData() const
getInstrItineraryData - Returns instruction itinerary data for the target or specific subtarget...
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:78
const Value * getValue() const
Return the base address of the memory access.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
StringRef getString() const
Definition: Metadata.cpp:463
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
Scheduling dependency.
Definition: ScheduleDAG.h:49
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:538
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:596
void finishBlock() override
Clean up after the software pipeliner runs.
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:838
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true)
Return true for an order or output dependence that is loop carried potentially.
int getFirstCycle() const
Return the first cycle in the completed schedule.
void print(raw_ostream &os) const
Print the schedule information to the given output.
void GetUnderlyingObjects(const Value *V, SmallVectorImpl< const Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
The main class in the implementation of the target independent software pipeliner pass...
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn&#39;t necessary for c...
Definition: ScheduleDAG.h:200
MachineFunction * MF
unsigned const MachineRegisterInfo * MRI
void clearResources()
Reset the state.
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx...
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const unsigned * SubUnitsIdxBegin
Definition: MCSchedule.h:54
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
MachineInstrBuilder & UseMI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
static int64_t computeDelta(SectionEntry *A, SectionEntry *B)
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:160
bool hasOneMemOperand() const
Return true if this instruction has exactly one MachineMemOperand.
Definition: MachineInstr.h:567
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1172
unsigned getInstrBaseReg(SUnit *SU)
Return the new base register that was stored away for the changed instruction.
Track the current register pressure at some position in the instruction stream, and remember the high...
void setImm(int64_t immVal)
void dump() const
Utility function used for debugging to print the schedule.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
void cleanup()
Performs final cleanup after expansion.
const TargetInstrInfo * TII
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
void print(raw_ostream &os) const
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
void fixupRegisterOverlaps(std::deque< SUnit *> &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
LLVM_DUMP_METHOD void dump() const
bool isCopy() const
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1446
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Any other ordering dependency.
Definition: ScheduleDAG.h:56
size_t size() const
Definition: SmallVector.h:52
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
const InstrItineraryData * InstrItins
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
hexagon gen pred
An unknown scheduling barrier.
Definition: ScheduleDAG.h:69
const unsigned MaxDepth
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
SetVector< SUnit * >::const_iterator iterator
virtual bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
typename vector_type::const_iterator iterator
Definition: SetVector.h:48
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:297
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition: MCSchedule.h:320
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:519
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1146
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:377
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:552
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
MachineOperand class - Representation of each machine instruction operand.
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
static unsigned getDepth(const SmallVectorImpl< const MachineBasicBlock *> &Stack, const MachineBasicBlock *MBB)
Define a kind of processor resource that will be modeled by the scheduler.
Definition: MCSchedule.h:32
MachineInstrBuilder MachineInstrBuilder & DefMI
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
void setRecMII(unsigned mii)
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:374
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb, AliasAnalysis *AA=nullptr) const
Sometimes, it is possible for the target to tell, even without aliasing information, that two MIs access different memory addresses.
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
std::vector< int >::const_iterator const_iterator
Definition: ScheduleDAG.h:762
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
typename SuperClass::iterator iterator
Definition: SmallVector.h:319
This class represents the scheduled code.
SmallVectorImpl< SDep >::const_iterator const_succ_iterator
Definition: ScheduleDAG.h:262
bool isEmpty() const
Returns true if there are no itineraries.
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
TargetSubtargetInfo - Generic base class for all target subtargets.
void setColocate(unsigned c)
std::set< NodeId > NodeSet
Definition: RDFGraph.h:513
A ScheduleDAG for scheduling lists of MachineInstr.
Representation of each machine instruction.
Definition: MachineInstr.h:64
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:168
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
virtual bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, MachineInstr *&CmpInst) const
Analyze the loop code, return true if it cannot be understoo.
iterator begin() const
Definition: SmallPtrSet.h:396
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
These values represent a non-pipelined step in the execution of an instruction.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
int getFinalCycle() const
Return the last cycle in the finalized schedule.
void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit *> &Insts)
Order the instructions within a cycle so that the definitions occur before the uses.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:154
This file provides utility analysis objects describing memory locations.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
void setSubReg(unsigned subReg)
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don&#39;t consume any machine resources in their current form...
Generic base class for all target subtargets.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, RegPressureDelta &Delta, ArrayRef< PressureChange > CriticalPSets, ArrayRef< unsigned > MaxPressureLimit)
Consider the pressure increase caused by traversing this instruction bottom-up.
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:128
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi)
Return true if the scheduled Phi has a loop carried operand.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx=nullptr) const
Given the index of a register def operand, check if the register def is tied to a source operand...
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class...
iterator end() const
Definition: SmallPtrSet.h:401
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:185
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, MachineBasicBlock::const_iterator pos, bool TrackLaneMasks, bool TrackUntiedDefs)
Setup the RegPressureTracker.
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
Definition: MachineInstr.h:825
Store the effects of a change in pressure on things that MI scheduler cares about.
iterator end()
Definition: MapVector.h:71
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
void stable_sort(R &&Range)
Definition: STLExtras.h:1289
bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO)
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
LLVM Value Representation.
Definition: Value.h:73
mop_iterator operands_begin()
Definition: MachineInstr.h:471
A vector that has set insertion semantics.
Definition: SetVector.h:40
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y...
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore...
SmallVector< MachineOperand, 4 > BrCond
IRTranslator LLVM IR MI
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
A single uniqued string.
Definition: Metadata.h:603
void setPos(MachineBasicBlock::const_iterator Pos)
Register getReg() const
getReg - Returns the register number.
unsigned getPSet() const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
static bool computePath(SUnit *Cur, SetVector< SUnit *> &Path, SetVector< SUnit *> &DestNodes, SetVector< SUnit *> &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1074
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:244
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
unsigned getNumProcResourceKinds() const
Definition: MCSchedule.h:335
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
void resize(size_type N)
Definition: SmallVector.h:344
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
Definition: ScheduleDAG.h:431