LLVM  16.0.0git
MachinePipeliner.cpp
Go to the documentation of this file.
1 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10 //
11 // This SMS implementation is a target-independent back-end pass. When enabled,
12 // the pass runs just prior to the register allocation pass, while the machine
13 // IR is in SSA form. If software pipelining is successful, then the original
14 // loop is replaced by the optimized loop. The optimized loop contains one or
15 // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
16 // the instructions cannot be scheduled in a given MII, we increase the MII by
17 // one and try again.
18 //
19 // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
20 // represent loop carried dependences in the DAG as order edges to the Phi
21 // nodes. We also perform several passes over the DAG to eliminate unnecessary
22 // edges that inhibit the ability to pipeline. The implementation uses the
23 // DFAPacketizer class to compute the minimum initiation interval and the check
24 // where an instruction may be inserted in the pipelined schedule.
25 //
26 // In order for the SMS pass to work, several target specific hooks need to be
27 // implemented to get information about the loop structure and to rewrite
28 // instructions.
29 //
30 //===----------------------------------------------------------------------===//
31 
33 #include "llvm/ADT/ArrayRef.h"
34 #include "llvm/ADT/BitVector.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/MapVector.h"
37 #include "llvm/ADT/PriorityQueue.h"
38 #include "llvm/ADT/SetOperations.h"
39 #include "llvm/ADT/SetVector.h"
40 #include "llvm/ADT/SmallPtrSet.h"
41 #include "llvm/ADT/SmallSet.h"
42 #include "llvm/ADT/SmallVector.h"
43 #include "llvm/ADT/Statistic.h"
69 #include "llvm/Config/llvm-config.h"
70 #include "llvm/IR/Attributes.h"
71 #include "llvm/IR/Function.h"
72 #include "llvm/MC/LaneBitmask.h"
73 #include "llvm/MC/MCInstrDesc.h"
75 #include "llvm/MC/MCRegisterInfo.h"
76 #include "llvm/Pass.h"
78 #include "llvm/Support/Compiler.h"
79 #include "llvm/Support/Debug.h"
82 #include <algorithm>
83 #include <cassert>
84 #include <climits>
85 #include <cstdint>
86 #include <deque>
87 #include <functional>
88 #include <iomanip>
89 #include <iterator>
90 #include <map>
91 #include <memory>
92 #include <sstream>
93 #include <tuple>
94 #include <utility>
95 #include <vector>
96 
97 using namespace llvm;
98 
99 #define DEBUG_TYPE "pipeliner"
100 
101 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
102 STATISTIC(NumPipelined, "Number of loops software pipelined");
103 STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
104 STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
105 STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
106 STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
107 STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
108 STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
109 STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
110 STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
111 STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
112 
113 /// A command line option to turn software pipelining on or off.
114 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
115  cl::desc("Enable Software Pipelining"));
116 
117 /// A command line option to enable SWP at -Os.
118 static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
119  cl::desc("Enable SWP at Os."), cl::Hidden,
120  cl::init(false));
121 
122 /// A command line argument to limit minimum initial interval for pipelining.
123 static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
124  cl::desc("Size limit for the MII."),
125  cl::Hidden, cl::init(27));
126 
127 /// A command line argument to force pipeliner to use specified initial
128 /// interval.
129 static cl::opt<int> SwpForceII("pipeliner-force-ii",
130  cl::desc("Force pipeliner to use specified II."),
131  cl::Hidden, cl::init(-1));
132 
133 /// A command line argument to limit the number of stages in the pipeline.
134 static cl::opt<int>
135  SwpMaxStages("pipeliner-max-stages",
136  cl::desc("Maximum stages allowed in the generated scheduled."),
137  cl::Hidden, cl::init(3));
138 
139 /// A command line option to disable the pruning of chain dependences due to
140 /// an unrelated Phi.
141 static cl::opt<bool>
142  SwpPruneDeps("pipeliner-prune-deps",
143  cl::desc("Prune dependences between unrelated Phi nodes."),
144  cl::Hidden, cl::init(true));
145 
146 /// A command line option to disable the pruning of loop carried order
147 /// dependences.
148 static cl::opt<bool>
149  SwpPruneLoopCarried("pipeliner-prune-loop-carried",
150  cl::desc("Prune loop carried order dependences."),
151  cl::Hidden, cl::init(true));
152 
153 #ifndef NDEBUG
154 static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
155 #endif
156 
157 static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
159  cl::desc("Ignore RecMII"));
160 
161 static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
162  cl::init(false));
163 static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
164  cl::init(false));
165 
167  "pipeliner-annotate-for-testing", cl::Hidden, cl::init(false),
168  cl::desc("Instead of emitting the pipelined code, annotate instructions "
169  "with the generated schedule for feeding into the "
170  "-modulo-schedule-test pass"));
171 
173  "pipeliner-experimental-cg", cl::Hidden, cl::init(false),
174  cl::desc(
175  "Use the experimental peeling code generator for software pipelining"));
176 
177 namespace llvm {
178 
179 // A command line option to enable the CopyToPhi DAG mutation.
180 cl::opt<bool> SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
181  cl::init(true),
182  cl::desc("Enable CopyToPhi DAG Mutation"));
183 
184 /// A command line argument to force pipeliner to use specified issue
185 /// width.
187  "pipeliner-force-issue-width",
188  cl::desc("Force pipeliner to use specified issue width."), cl::Hidden,
189  cl::init(-1));
190 
191 } // end namespace llvm
192 
193 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
194 char MachinePipeliner::ID = 0;
195 #ifndef NDEBUG
197 #endif
199 
201  "Modulo Software Pipelining", false, false)
207  "Modulo Software Pipelining", false, false)
208 
209 /// The "main" function for implementing Swing Modulo Scheduling.
210 bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
211  if (skipFunction(mf.getFunction()))
212  return false;
213 
214  if (!EnableSWP)
215  return false;
216 
217  if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&
219  return false;
220 
221  if (!mf.getSubtarget().enableMachinePipeliner())
222  return false;
223 
224  // Cannot pipeline loops without instruction itineraries if we are using
225  // DFA for the pipeliner.
226  if (mf.getSubtarget().useDFAforSMS() &&
227  (!mf.getSubtarget().getInstrItineraryData() ||
228  mf.getSubtarget().getInstrItineraryData()->isEmpty()))
229  return false;
230 
231  MF = &mf;
232  MLI = &getAnalysis<MachineLoopInfo>();
233  MDT = &getAnalysis<MachineDominatorTree>();
234  ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
235  TII = MF->getSubtarget().getInstrInfo();
236  RegClassInfo.runOnMachineFunction(*MF);
237 
238  for (const auto &L : *MLI)
239  scheduleLoop(*L);
240 
241  return false;
242 }
243 
244 /// Attempt to perform the SMS algorithm on the specified loop. This function is
245 /// the main entry point for the algorithm. The function identifies candidate
246 /// loops, calculates the minimum initiation interval, and attempts to schedule
247 /// the loop.
248 bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
249  bool Changed = false;
250  for (const auto &InnerLoop : L)
251  Changed |= scheduleLoop(*InnerLoop);
252 
253 #ifndef NDEBUG
254  // Stop trying after reaching the limit (if any).
255  int Limit = SwpLoopLimit;
256  if (Limit >= 0) {
257  if (NumTries >= SwpLoopLimit)
258  return Changed;
259  NumTries++;
260  }
261 #endif
262 
263  setPragmaPipelineOptions(L);
264  if (!canPipelineLoop(L)) {
265  LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
266  ORE->emit([&]() {
267  return MachineOptimizationRemarkMissed(DEBUG_TYPE, "canPipelineLoop",
268  L.getStartLoc(), L.getHeader())
269  << "Failed to pipeline loop";
270  });
271 
272  LI.LoopPipelinerInfo.reset();
273  return Changed;
274  }
275 
276  ++NumTrytoPipeline;
277 
278  Changed = swingModuloScheduler(L);
279 
280  LI.LoopPipelinerInfo.reset();
281  return Changed;
282 }
283 
284 void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
285  // Reset the pragma for the next loop in iteration.
286  disabledByPragma = false;
287  II_setByPragma = 0;
288 
289  MachineBasicBlock *LBLK = L.getTopBlock();
290 
291  if (LBLK == nullptr)
292  return;
293 
294  const BasicBlock *BBLK = LBLK->getBasicBlock();
295  if (BBLK == nullptr)
296  return;
297 
298  const Instruction *TI = BBLK->getTerminator();
299  if (TI == nullptr)
300  return;
301 
302  MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);
303  if (LoopID == nullptr)
304  return;
305 
306  assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");
307  assert(LoopID->getOperand(0) == LoopID && "invalid loop");
308 
309  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
310  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
311 
312  if (MD == nullptr)
313  continue;
314 
315  MDString *S = dyn_cast<MDString>(MD->getOperand(0));
316 
317  if (S == nullptr)
318  continue;
319 
320  if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
321  assert(MD->getNumOperands() == 2 &&
322  "Pipeline initiation interval hint metadata should have two operands.");
324  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
325  assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
326  } else if (S->getString() == "llvm.loop.pipeline.disable") {
327  disabledByPragma = true;
328  }
329  }
330 }
331 
332 /// Return true if the loop can be software pipelined. The algorithm is
333 /// restricted to loops with a single basic block. Make sure that the
334 /// branch in the loop can be analyzed.
335 bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
336  if (L.getNumBlocks() != 1) {
337  ORE->emit([&]() {
338  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
339  L.getStartLoc(), L.getHeader())
340  << "Not a single basic block: "
341  << ore::NV("NumBlocks", L.getNumBlocks());
342  });
343  return false;
344  }
345 
346  if (disabledByPragma) {
347  ORE->emit([&]() {
348  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
349  L.getStartLoc(), L.getHeader())
350  << "Disabled by Pragma.";
351  });
352  return false;
353  }
354 
355  // Check if the branch can't be understood because we can't do pipelining
356  // if that's the case.
357  LI.TBB = nullptr;
358  LI.FBB = nullptr;
359  LI.BrCond.clear();
360  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
361  LLVM_DEBUG(dbgs() << "Unable to analyzeBranch, can NOT pipeline Loop\n");
362  NumFailBranch++;
363  ORE->emit([&]() {
364  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
365  L.getStartLoc(), L.getHeader())
366  << "The branch can't be understood";
367  });
368  return false;
369  }
370 
371  LI.LoopInductionVar = nullptr;
372  LI.LoopCompare = nullptr;
374  if (!LI.LoopPipelinerInfo) {
375  LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");
376  NumFailLoop++;
377  ORE->emit([&]() {
378  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
379  L.getStartLoc(), L.getHeader())
380  << "The loop structure is not supported";
381  });
382  return false;
383  }
384 
385  if (!L.getLoopPreheader()) {
386  LLVM_DEBUG(dbgs() << "Preheader not found, can NOT pipeline Loop\n");
387  NumFailPreheader++;
388  ORE->emit([&]() {
389  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
390  L.getStartLoc(), L.getHeader())
391  << "No loop preheader found";
392  });
393  return false;
394  }
395 
396  // Remove any subregisters from inputs to phi nodes.
397  preprocessPhiNodes(*L.getHeader());
398  return true;
399 }
400 
401 void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
403  SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
404 
405  for (MachineInstr &PI : B.phis()) {
406  MachineOperand &DefOp = PI.getOperand(0);
407  assert(DefOp.getSubReg() == 0);
408  auto *RC = MRI.getRegClass(DefOp.getReg());
409 
410  for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
411  MachineOperand &RegOp = PI.getOperand(i);
412  if (RegOp.getSubReg() == 0)
413  continue;
414 
415  // If the operand uses a subregister, replace it with a new register
416  // without subregisters, and generate a copy to the new register.
417  Register NewReg = MRI.createVirtualRegister(RC);
418  MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
420  const DebugLoc &DL = PredB.findDebugLoc(At);
421  auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
422  .addReg(RegOp.getReg(), getRegState(RegOp),
423  RegOp.getSubReg());
424  Slots.insertMachineInstrInMaps(*Copy);
425  RegOp.setReg(NewReg);
426  RegOp.setSubReg(0);
427  }
428  }
429 }
430 
431 /// The SMS algorithm consists of the following main steps:
432 /// 1. Computation and analysis of the dependence graph.
433 /// 2. Ordering of the nodes (instructions).
434 /// 3. Attempt to Schedule the loop.
435 bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
436  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
437 
438  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
440 
442  // The kernel should not include any terminator instructions. These
443  // will be added back later.
444  SMS.startBlock(MBB);
445 
446  // Compute the number of 'real' instructions in the basic block by
447  // ignoring terminators.
448  unsigned size = MBB->size();
450  E = MBB->instr_end();
451  I != E; ++I, --size)
452  ;
453 
454  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
455  SMS.schedule();
456  SMS.exitRegion();
457 
458  SMS.finishBlock();
459  return SMS.hasNewSchedule();
460 }
461 
470 }
471 
472 void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
473  if (SwpForceII > 0)
474  MII = SwpForceII;
475  else if (II_setByPragma > 0)
476  MII = II_setByPragma;
477  else
478  MII = std::max(ResMII, RecMII);
479 }
480 
481 void SwingSchedulerDAG::setMAX_II() {
482  if (SwpForceII > 0)
483  MAX_II = SwpForceII;
484  else if (II_setByPragma > 0)
485  MAX_II = II_setByPragma;
486  else
487  MAX_II = MII + 10;
488 }
489 
490 /// We override the schedule function in ScheduleDAGInstrs to implement the
491 /// scheduling part of the Swing Modulo Scheduling algorithm.
493  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
495  addLoopCarriedDependences(AA);
496  updatePhiDependences();
498  changeDependences();
499  postprocessDAG();
500  LLVM_DEBUG(dump());
501 
502  NodeSetType NodeSets;
503  findCircuits(NodeSets);
504  NodeSetType Circuits = NodeSets;
505 
506  // Calculate the MII.
507  unsigned ResMII = calculateResMII();
508  unsigned RecMII = calculateRecMII(NodeSets);
509 
510  fuseRecs(NodeSets);
511 
512  // This flag is used for testing and can cause correctness problems.
513  if (SwpIgnoreRecMII)
514  RecMII = 0;
515 
516  setMII(ResMII, RecMII);
517  setMAX_II();
518 
519  LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
520  << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
521 
522  // Can't schedule a loop without a valid MII.
523  if (MII == 0) {
524  LLVM_DEBUG(dbgs() << "Invalid Minimal Initiation Interval: 0\n");
525  NumFailZeroMII++;
526  Pass.ORE->emit([&]() {
528  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
529  << "Invalid Minimal Initiation Interval: 0";
530  });
531  return;
532  }
533 
534  // Don't pipeline large loops.
535  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {
536  LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
537  << ", we don't pipeline large loops\n");
538  NumFailLargeMaxMII++;
539  Pass.ORE->emit([&]() {
541  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
542  << "Minimal Initiation Interval too large: "
543  << ore::NV("MII", (int)MII) << " > "
544  << ore::NV("SwpMaxMii", SwpMaxMii) << "."
545  << "Refer to -pipeliner-max-mii.";
546  });
547  return;
548  }
549 
550  computeNodeFunctions(NodeSets);
551 
552  registerPressureFilter(NodeSets);
553 
554  colocateNodeSets(NodeSets);
555 
556  checkNodeSets(NodeSets);
557 
558  LLVM_DEBUG({
559  for (auto &I : NodeSets) {
560  dbgs() << " Rec NodeSet ";
561  I.dump();
562  }
563  });
564 
565  llvm::stable_sort(NodeSets, std::greater<NodeSet>());
566 
567  groupRemainingNodes(NodeSets);
568 
569  removeDuplicateNodes(NodeSets);
570 
571  LLVM_DEBUG({
572  for (auto &I : NodeSets) {
573  dbgs() << " NodeSet ";
574  I.dump();
575  }
576  });
577 
578  computeNodeOrder(NodeSets);
579 
580  // check for node order issues
581  checkValidNodeOrder(Circuits);
582 
583  SMSchedule Schedule(Pass.MF, this);
584  Scheduled = schedulePipeline(Schedule);
585 
586  if (!Scheduled){
587  LLVM_DEBUG(dbgs() << "No schedule found, return\n");
588  NumFailNoSchedule++;
589  Pass.ORE->emit([&]() {
591  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
592  << "Unable to find schedule";
593  });
594  return;
595  }
596 
597  unsigned numStages = Schedule.getMaxStageCount();
598  // No need to generate pipeline if there are no overlapped iterations.
599  if (numStages == 0) {
600  LLVM_DEBUG(dbgs() << "No overlapped iterations, skip.\n");
601  NumFailZeroStage++;
602  Pass.ORE->emit([&]() {
604  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
605  << "No need to pipeline - no overlapped iterations in schedule.";
606  });
607  return;
608  }
609  // Check that the maximum stage count is less than user-defined limit.
610  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) {
611  LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
612  << " : too many stages, abort\n");
613  NumFailLargeMaxStage++;
614  Pass.ORE->emit([&]() {
616  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
617  << "Too many stages in schedule: "
618  << ore::NV("numStages", (int)numStages) << " > "
619  << ore::NV("SwpMaxStages", SwpMaxStages)
620  << ". Refer to -pipeliner-max-stages.";
621  });
622  return;
623  }
624 
625  Pass.ORE->emit([&]() {
627  Loop.getHeader())
628  << "Pipelined succesfully!";
629  });
630 
631  // Generate the schedule as a ModuloSchedule.
632  DenseMap<MachineInstr *, int> Cycles, Stages;
633  std::vector<MachineInstr *> OrderedInsts;
634  for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle();
635  ++Cycle) {
636  for (SUnit *SU : Schedule.getInstructions(Cycle)) {
637  OrderedInsts.push_back(SU->getInstr());
638  Cycles[SU->getInstr()] = Cycle;
639  Stages[SU->getInstr()] = Schedule.stageScheduled(SU);
640  }
641  }
643  for (auto &KV : NewMIs) {
644  Cycles[KV.first] = Cycles[KV.second];
645  Stages[KV.first] = Stages[KV.second];
646  NewInstrChanges[KV.first] = InstrChanges[getSUnit(KV.first)];
647  }
648 
649  ModuloSchedule MS(MF, &Loop, std::move(OrderedInsts), std::move(Cycles),
650  std::move(Stages));
651  if (EmitTestAnnotations) {
652  assert(NewInstrChanges.empty() &&
653  "Cannot serialize a schedule with InstrChanges!");
655  MSTI.annotate();
656  return;
657  }
658  // The experimental code generator can't work if there are InstChanges.
659  if (ExperimentalCodeGen && NewInstrChanges.empty()) {
660  PeelingModuloScheduleExpander MSE(MF, MS, &LIS);
661  MSE.expand();
662  } else {
663  ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
664  MSE.expand();
665  MSE.cleanup();
666  }
667  ++NumPipelined;
668 }
669 
670 /// Clean up after the software pipeliner runs.
672  for (auto &KV : NewMIs)
673  MF.deleteMachineInstr(KV.second);
674  NewMIs.clear();
675 
676  // Call the superclass.
678 }
679 
680 /// Return the register values for the operands of a Phi instruction.
681 /// This function assume the instruction is a Phi.
683  unsigned &InitVal, unsigned &LoopVal) {
684  assert(Phi.isPHI() && "Expecting a Phi.");
685 
686  InitVal = 0;
687  LoopVal = 0;
688  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
689  if (Phi.getOperand(i + 1).getMBB() != Loop)
690  InitVal = Phi.getOperand(i).getReg();
691  else
692  LoopVal = Phi.getOperand(i).getReg();
693 
694  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
695 }
696 
697 /// Return the Phi register value that comes the loop block.
698 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
699  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
700  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
701  return Phi.getOperand(i).getReg();
702  return 0;
703 }
704 
705 /// Return true if SUb can be reached from SUa following the chain edges.
706 static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
707  SmallPtrSet<SUnit *, 8> Visited;
708  SmallVector<SUnit *, 8> Worklist;
709  Worklist.push_back(SUa);
710  while (!Worklist.empty()) {
711  const SUnit *SU = Worklist.pop_back_val();
712  for (const auto &SI : SU->Succs) {
713  SUnit *SuccSU = SI.getSUnit();
714  if (SI.getKind() == SDep::Order) {
715  if (Visited.count(SuccSU))
716  continue;
717  if (SuccSU == SUb)
718  return true;
719  Worklist.push_back(SuccSU);
720  Visited.insert(SuccSU);
721  }
722  }
723  }
724  return false;
725 }
726 
727 /// Return true if the instruction causes a chain between memory
728 /// references before and after it.
730  return MI.isCall() || MI.mayRaiseFPException() ||
731  MI.hasUnmodeledSideEffects() ||
732  (MI.hasOrderedMemoryRef() &&
733  (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad()));
734 }
735 
736 /// Return the underlying objects for the memory references of an instruction.
737 /// This function calls the code in ValueTracking, but first checks that the
738 /// instruction has a memory operand.
741  if (!MI->hasOneMemOperand())
742  return;
743  MachineMemOperand *MM = *MI->memoperands_begin();
744  if (!MM->getValue())
745  return;
746  getUnderlyingObjects(MM->getValue(), Objs);
747  for (const Value *V : Objs) {
748  if (!isIdentifiedObject(V)) {
749  Objs.clear();
750  return;
751  }
752  Objs.push_back(V);
753  }
754 }
755 
756 /// Add a chain edge between a load and store if the store can be an
757 /// alias of the load on a subsequent iteration, i.e., a loop carried
758 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
759 /// but that code doesn't create loop carried dependences.
760 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
764  for (auto &SU : SUnits) {
765  MachineInstr &MI = *SU.getInstr();
766  if (isDependenceBarrier(MI))
767  PendingLoads.clear();
768  else if (MI.mayLoad()) {
770  ::getUnderlyingObjects(&MI, Objs);
771  if (Objs.empty())
772  Objs.push_back(UnknownValue);
773  for (const auto *V : Objs) {
774  SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
775  SUs.push_back(&SU);
776  }
777  } else if (MI.mayStore()) {
779  ::getUnderlyingObjects(&MI, Objs);
780  if (Objs.empty())
781  Objs.push_back(UnknownValue);
782  for (const auto *V : Objs) {
784  PendingLoads.find(V);
785  if (I == PendingLoads.end())
786  continue;
787  for (auto *Load : I->second) {
788  if (isSuccOrder(Load, &SU))
789  continue;
790  MachineInstr &LdMI = *Load->getInstr();
791  // First, perform the cheaper check that compares the base register.
792  // If they are the same and the load offset is less than the store
793  // offset, then mark the dependence as loop carried potentially.
794  const MachineOperand *BaseOp1, *BaseOp2;
795  int64_t Offset1, Offset2;
796  bool Offset1IsScalable, Offset2IsScalable;
797  if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1,
798  Offset1IsScalable, TRI) &&
799  TII->getMemOperandWithOffset(MI, BaseOp2, Offset2,
800  Offset2IsScalable, TRI)) {
801  if (BaseOp1->isIdenticalTo(*BaseOp2) &&
802  Offset1IsScalable == Offset2IsScalable &&
803  (int)Offset1 < (int)Offset2) {
805  "What happened to the chain edge?");
806  SDep Dep(Load, SDep::Barrier);
807  Dep.setLatency(1);
808  SU.addPred(Dep);
809  continue;
810  }
811  }
812  // Second, the more expensive check that uses alias analysis on the
813  // base registers. If they alias, and the load offset is less than
814  // the store offset, the mark the dependence as loop carried.
815  if (!AA) {
816  SDep Dep(Load, SDep::Barrier);
817  Dep.setLatency(1);
818  SU.addPred(Dep);
819  continue;
820  }
821  MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
822  MachineMemOperand *MMO2 = *MI.memoperands_begin();
823  if (!MMO1->getValue() || !MMO2->getValue()) {
824  SDep Dep(Load, SDep::Barrier);
825  Dep.setLatency(1);
826  SU.addPred(Dep);
827  continue;
828  }
829  if (MMO1->getValue() == MMO2->getValue() &&
830  MMO1->getOffset() <= MMO2->getOffset()) {
831  SDep Dep(Load, SDep::Barrier);
832  Dep.setLatency(1);
833  SU.addPred(Dep);
834  continue;
835  }
836  if (!AA->isNoAlias(
837  MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
839  MMO2->getAAInfo()))) {
840  SDep Dep(Load, SDep::Barrier);
841  Dep.setLatency(1);
842  SU.addPred(Dep);
843  }
844  }
845  }
846  }
847  }
848 }
849 
850 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
851 /// processes dependences for PHIs. This function adds true dependences
852 /// from a PHI to a use, and a loop carried dependence from the use to the
853 /// PHI. The loop carried dependence is represented as an anti dependence
854 /// edge. This function also removes chain dependences between unrelated
855 /// PHIs.
856 void SwingSchedulerDAG::updatePhiDependences() {
857  SmallVector<SDep, 4> RemoveDeps;
859 
860  // Iterate over each DAG node.
861  for (SUnit &I : SUnits) {
862  RemoveDeps.clear();
863  // Set to true if the instruction has an operand defined by a Phi.
864  unsigned HasPhiUse = 0;
865  unsigned HasPhiDef = 0;
866  MachineInstr *MI = I.getInstr();
867  // Iterate over each operand, and we process the definitions.
868  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
869  MOE = MI->operands_end();
870  MOI != MOE; ++MOI) {
871  if (!MOI->isReg())
872  continue;
873  Register Reg = MOI->getReg();
874  if (MOI->isDef()) {
875  // If the register is used by a Phi, then create an anti dependence.
877  UI = MRI.use_instr_begin(Reg),
878  UE = MRI.use_instr_end();
879  UI != UE; ++UI) {
880  MachineInstr *UseMI = &*UI;
881  SUnit *SU = getSUnit(UseMI);
882  if (SU != nullptr && UseMI->isPHI()) {
883  if (!MI->isPHI()) {
884  SDep Dep(SU, SDep::Anti, Reg);
885  Dep.setLatency(1);
886  I.addPred(Dep);
887  } else {
888  HasPhiDef = Reg;
889  // Add a chain edge to a dependent Phi that isn't an existing
890  // predecessor.
891  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
892  I.addPred(SDep(SU, SDep::Barrier));
893  }
894  }
895  }
896  } else if (MOI->isUse()) {
897  // If the register is defined by a Phi, then create a true dependence.
899  if (DefMI == nullptr)
900  continue;
901  SUnit *SU = getSUnit(DefMI);
902  if (SU != nullptr && DefMI->isPHI()) {
903  if (!MI->isPHI()) {
904  SDep Dep(SU, SDep::Data, Reg);
905  Dep.setLatency(0);
906  ST.adjustSchedDependency(SU, 0, &I, MI->getOperandNo(MOI), Dep);
907  I.addPred(Dep);
908  } else {
909  HasPhiUse = Reg;
910  // Add a chain edge to a dependent Phi that isn't an existing
911  // predecessor.
912  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
913  I.addPred(SDep(SU, SDep::Barrier));
914  }
915  }
916  }
917  }
918  // Remove order dependences from an unrelated Phi.
919  if (!SwpPruneDeps)
920  continue;
921  for (auto &PI : I.Preds) {
922  MachineInstr *PMI = PI.getSUnit()->getInstr();
923  if (PMI->isPHI() && PI.getKind() == SDep::Order) {
924  if (I.getInstr()->isPHI()) {
925  if (PMI->getOperand(0).getReg() == HasPhiUse)
926  continue;
927  if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
928  continue;
929  }
930  RemoveDeps.push_back(PI);
931  }
932  }
933  for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
934  I.removePred(RemoveDeps[i]);
935  }
936 }
937 
938 /// Iterate over each DAG node and see if we can change any dependences
939 /// in order to reduce the recurrence MII.
940 void SwingSchedulerDAG::changeDependences() {
941  // See if an instruction can use a value from the previous iteration.
942  // If so, we update the base and offset of the instruction and change
943  // the dependences.
944  for (SUnit &I : SUnits) {
945  unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
946  int64_t NewOffset = 0;
947  if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
948  NewOffset))
949  continue;
950 
951  // Get the MI and SUnit for the instruction that defines the original base.
952  Register OrigBase = I.getInstr()->getOperand(BasePos).getReg();
953  MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
954  if (!DefMI)
955  continue;
956  SUnit *DefSU = getSUnit(DefMI);
957  if (!DefSU)
958  continue;
959  // Get the MI and SUnit for the instruction that defins the new base.
960  MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
961  if (!LastMI)
962  continue;
963  SUnit *LastSU = getSUnit(LastMI);
964  if (!LastSU)
965  continue;
966 
967  if (Topo.IsReachable(&I, LastSU))
968  continue;
969 
970  // Remove the dependence. The value now depends on a prior iteration.
972  for (const SDep &P : I.Preds)
973  if (P.getSUnit() == DefSU)
974  Deps.push_back(P);
975  for (int i = 0, e = Deps.size(); i != e; i++) {
976  Topo.RemovePred(&I, Deps[i].getSUnit());
977  I.removePred(Deps[i]);
978  }
979  // Remove the chain dependence between the instructions.
980  Deps.clear();
981  for (auto &P : LastSU->Preds)
982  if (P.getSUnit() == &I && P.getKind() == SDep::Order)
983  Deps.push_back(P);
984  for (int i = 0, e = Deps.size(); i != e; i++) {
985  Topo.RemovePred(LastSU, Deps[i].getSUnit());
986  LastSU->removePred(Deps[i]);
987  }
988 
989  // Add a dependence between the new instruction and the instruction
990  // that defines the new base.
991  SDep Dep(&I, SDep::Anti, NewBase);
992  Topo.AddPred(LastSU, &I);
993  LastSU->addPred(Dep);
994 
995  // Remember the base and offset information so that we can update the
996  // instruction during code generation.
997  InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
998  }
999 }
1000 
1001 namespace {
1002 
1003 // FuncUnitSorter - Comparison operator used to sort instructions by
1004 // the number of functional unit choices.
1005 struct FuncUnitSorter {
1006  const InstrItineraryData *InstrItins;
1007  const MCSubtargetInfo *STI;
1009 
1010  FuncUnitSorter(const TargetSubtargetInfo &TSI)
1011  : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
1012 
1013  // Compute the number of functional unit alternatives needed
1014  // at each stage, and take the minimum value. We prioritize the
1015  // instructions by the least number of choices first.
1016  unsigned minFuncUnits(const MachineInstr *Inst,
1017  InstrStage::FuncUnits &F) const {
1018  unsigned SchedClass = Inst->getDesc().getSchedClass();
1019  unsigned min = UINT_MAX;
1020  if (InstrItins && !InstrItins->isEmpty()) {
1021  for (const InstrStage &IS :
1022  make_range(InstrItins->beginStage(SchedClass),
1023  InstrItins->endStage(SchedClass))) {
1024  InstrStage::FuncUnits funcUnits = IS.getUnits();
1025  unsigned numAlternatives = countPopulation(funcUnits);
1026  if (numAlternatives < min) {
1027  min = numAlternatives;
1028  F = funcUnits;
1029  }
1030  }
1031  return min;
1032  }
1033  if (STI && STI->getSchedModel().hasInstrSchedModel()) {
1034  const MCSchedClassDesc *SCDesc =
1035  STI->getSchedModel().getSchedClassDesc(SchedClass);
1036  if (!SCDesc->isValid())
1037  // No valid Schedule Class Desc for schedClass, should be
1038  // Pseudo/PostRAPseudo
1039  return min;
1040 
1041  for (const MCWriteProcResEntry &PRE :
1042  make_range(STI->getWriteProcResBegin(SCDesc),
1043  STI->getWriteProcResEnd(SCDesc))) {
1044  if (!PRE.Cycles)
1045  continue;
1046  const MCProcResourceDesc *ProcResource =
1047  STI->getSchedModel().getProcResource(PRE.ProcResourceIdx);
1048  unsigned NumUnits = ProcResource->NumUnits;
1049  if (NumUnits < min) {
1050  min = NumUnits;
1051  F = PRE.ProcResourceIdx;
1052  }
1053  }
1054  return min;
1055  }
1056  llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
1057  }
1058 
1059  // Compute the critical resources needed by the instruction. This
1060  // function records the functional units needed by instructions that
1061  // must use only one functional unit. We use this as a tie breaker
1062  // for computing the resource MII. The instrutions that require
1063  // the same, highly used, functional unit have high priority.
1064  void calcCriticalResources(MachineInstr &MI) {
1065  unsigned SchedClass = MI.getDesc().getSchedClass();
1066  if (InstrItins && !InstrItins->isEmpty()) {
1067  for (const InstrStage &IS :
1068  make_range(InstrItins->beginStage(SchedClass),
1069  InstrItins->endStage(SchedClass))) {
1070  InstrStage::FuncUnits FuncUnits = IS.getUnits();
1071  if (countPopulation(FuncUnits) == 1)
1072  Resources[FuncUnits]++;
1073  }
1074  return;
1075  }
1076  if (STI && STI->getSchedModel().hasInstrSchedModel()) {
1077  const MCSchedClassDesc *SCDesc =
1078  STI->getSchedModel().getSchedClassDesc(SchedClass);
1079  if (!SCDesc->isValid())
1080  // No valid Schedule Class Desc for schedClass, should be
1081  // Pseudo/PostRAPseudo
1082  return;
1083 
1084  for (const MCWriteProcResEntry &PRE :
1085  make_range(STI->getWriteProcResBegin(SCDesc),
1086  STI->getWriteProcResEnd(SCDesc))) {
1087  if (!PRE.Cycles)
1088  continue;
1089  Resources[PRE.ProcResourceIdx]++;
1090  }
1091  return;
1092  }
1093  llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
1094  }
1095 
1096  /// Return true if IS1 has less priority than IS2.
1097  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
1098  InstrStage::FuncUnits F1 = 0, F2 = 0;
1099  unsigned MFUs1 = minFuncUnits(IS1, F1);
1100  unsigned MFUs2 = minFuncUnits(IS2, F2);
1101  if (MFUs1 == MFUs2)
1102  return Resources.lookup(F1) < Resources.lookup(F2);
1103  return MFUs1 > MFUs2;
1104  }
1105 };
1106 
1107 } // end anonymous namespace
1108 
1109 /// Calculate the resource constrained minimum initiation interval for the
1110 /// specified loop. We use the DFA to model the resources needed for
1111 /// each instruction, and we ignore dependences. A different DFA is created
1112 /// for each cycle that is required. When adding a new instruction, we attempt
1113 /// to add it to each existing DFA, until a legal space is found. If the
1114 /// instruction cannot be reserved in an existing DFA, we create a new one.
1115 unsigned SwingSchedulerDAG::calculateResMII() {
1116  LLVM_DEBUG(dbgs() << "calculateResMII:\n");
1117  ResourceManager RM(&MF.getSubtarget(), this);
1118  return RM.calculateResMII();
1119 }
1120 
1121 /// Calculate the recurrence-constrainted minimum initiation interval.
1122 /// Iterate over each circuit. Compute the delay(c) and distance(c)
1123 /// for each circuit. The II needs to satisfy the inequality
1124 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
1125 /// II that satisfies the inequality, and the RecMII is the maximum
1126 /// of those values.
1127 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1128  unsigned RecMII = 0;
1129 
1130  for (NodeSet &Nodes : NodeSets) {
1131  if (Nodes.empty())
1132  continue;
1133 
1134  unsigned Delay = Nodes.getLatency();
1135  unsigned Distance = 1;
1136 
1137  // ii = ceil(delay / distance)
1138  unsigned CurMII = (Delay + Distance - 1) / Distance;
1139  Nodes.setRecMII(CurMII);
1140  if (CurMII > RecMII)
1141  RecMII = CurMII;
1142  }
1143 
1144  return RecMII;
1145 }
1146 
1147 /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1148 /// but we do this to find the circuits, and then change them back.
1149 static void swapAntiDependences(std::vector<SUnit> &SUnits) {
1151  for (SUnit &SU : SUnits) {
1152  for (SDep &Pred : SU.Preds)
1153  if (Pred.getKind() == SDep::Anti)
1154  DepsAdded.push_back(std::make_pair(&SU, Pred));
1155  }
1156  for (std::pair<SUnit *, SDep> &P : DepsAdded) {
1157  // Remove this anti dependency and add one in the reverse direction.
1158  SUnit *SU = P.first;
1159  SDep &D = P.second;
1160  SUnit *TargetSU = D.getSUnit();
1161  unsigned Reg = D.getReg();
1162  unsigned Lat = D.getLatency();
1163  SU->removePred(D);
1164  SDep Dep(SU, SDep::Anti, Reg);
1165  Dep.setLatency(Lat);
1166  TargetSU->addPred(Dep);
1167  }
1168 }
1169 
1170 /// Create the adjacency structure of the nodes in the graph.
1171 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1172  SwingSchedulerDAG *DAG) {
1173  BitVector Added(SUnits.size());
1174  DenseMap<int, int> OutputDeps;
1175  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1176  Added.reset();
1177  // Add any successor to the adjacency matrix and exclude duplicates.
1178  for (auto &SI : SUnits[i].Succs) {
1179  // Only create a back-edge on the first and last nodes of a dependence
1180  // chain. This records any chains and adds them later.
1181  if (SI.getKind() == SDep::Output) {
1182  int N = SI.getSUnit()->NodeNum;
1183  int BackEdge = i;
1184  auto Dep = OutputDeps.find(BackEdge);
1185  if (Dep != OutputDeps.end()) {
1186  BackEdge = Dep->second;
1187  OutputDeps.erase(Dep);
1188  }
1189  OutputDeps[N] = BackEdge;
1190  }
1191  // Do not process a boundary node, an artificial node.
1192  // A back-edge is processed only if it goes to a Phi.
1193  if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||
1194  (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
1195  continue;
1196  int N = SI.getSUnit()->NodeNum;
1197  if (!Added.test(N)) {
1198  AdjK[i].push_back(N);
1199  Added.set(N);
1200  }
1201  }
1202  // A chain edge between a store and a load is treated as a back-edge in the
1203  // adjacency matrix.
1204  for (auto &PI : SUnits[i].Preds) {
1205  if (!SUnits[i].getInstr()->mayStore() ||
1206  !DAG->isLoopCarriedDep(&SUnits[i], PI, false))
1207  continue;
1208  if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1209  int N = PI.getSUnit()->NodeNum;
1210  if (!Added.test(N)) {
1211  AdjK[i].push_back(N);
1212  Added.set(N);
1213  }
1214  }
1215  }
1216  }
1217  // Add back-edges in the adjacency matrix for the output dependences.
1218  for (auto &OD : OutputDeps)
1219  if (!Added.test(OD.second)) {
1220  AdjK[OD.first].push_back(OD.second);
1221  Added.set(OD.second);
1222  }
1223 }
1224 
1225 /// Identify an elementary circuit in the dependence graph starting at the
1226 /// specified node.
1227 bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1228  bool HasBackedge) {
1229  SUnit *SV = &SUnits[V];
1230  bool F = false;
1231  Stack.insert(SV);
1232  Blocked.set(V);
1233 
1234  for (auto W : AdjK[V]) {
1235  if (NumPaths > MaxPaths)
1236  break;
1237  if (W < S)
1238  continue;
1239  if (W == S) {
1240  if (!HasBackedge)
1241  NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
1242  F = true;
1243  ++NumPaths;
1244  break;
1245  } else if (!Blocked.test(W)) {
1246  if (circuit(W, S, NodeSets,
1247  Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
1248  F = true;
1249  }
1250  }
1251 
1252  if (F)
1253  unblock(V);
1254  else {
1255  for (auto W : AdjK[V]) {
1256  if (W < S)
1257  continue;
1258  B[W].insert(SV);
1259  }
1260  }
1261  Stack.pop_back();
1262  return F;
1263 }
1264 
1265 /// Unblock a node in the circuit finding algorithm.
1266 void SwingSchedulerDAG::Circuits::unblock(int U) {
1267  Blocked.reset(U);
1268  SmallPtrSet<SUnit *, 4> &BU = B[U];
1269  while (!BU.empty()) {
1271  assert(SI != BU.end() && "Invalid B set.");
1272  SUnit *W = *SI;
1273  BU.erase(W);
1274  if (Blocked.test(W->NodeNum))
1275  unblock(W->NodeNum);
1276  }
1277 }
1278 
1279 /// Identify all the elementary circuits in the dependence graph using
1280 /// Johnson's circuit algorithm.
1281 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1282  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1283  // but we do this to find the circuits, and then change them back.
1285 
1286  Circuits Cir(SUnits, Topo);
1287  // Create the adjacency structure.
1288  Cir.createAdjacencyStructure(this);
1289  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1290  Cir.reset();
1291  Cir.circuit(i, i, NodeSets);
1292  }
1293 
1294  // Change the dependences back so that we've created a DAG again.
1296 }
1297 
1298 // Create artificial dependencies between the source of COPY/REG_SEQUENCE that
1299 // is loop-carried to the USE in next iteration. This will help pipeliner avoid
1300 // additional copies that are needed across iterations. An artificial dependence
1301 // edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
1302 
1303 // PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
1304 // SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
1305 // PHI-------True-Dep------> USEOfPhi
1306 
1307 // The mutation creates
1308 // USEOfPHI -------Artificial-Dep---> SRCOfCopy
1309 
1310 // This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
1311 // (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
1312 // late to avoid additional copies across iterations. The possible scheduling
1313 // order would be
1314 // USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
1315 
1317  for (SUnit &SU : DAG->SUnits) {
1318  // Find the COPY/REG_SEQUENCE instruction.
1319  if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
1320  continue;
1321 
1322  // Record the loop carried PHIs.
1323  SmallVector<SUnit *, 4> PHISUs;
1324  // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
1325  SmallVector<SUnit *, 4> SrcSUs;
1326 
1327  for (auto &Dep : SU.Preds) {
1328  SUnit *TmpSU = Dep.getSUnit();
1329  MachineInstr *TmpMI = TmpSU->getInstr();
1330  SDep::Kind DepKind = Dep.getKind();
1331  // Save the loop carried PHI.
1332  if (DepKind == SDep::Anti && TmpMI->isPHI())
1333  PHISUs.push_back(TmpSU);
1334  // Save the source of COPY/REG_SEQUENCE.
1335  // If the source has no pre-decessors, we will end up creating cycles.
1336  else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)
1337  SrcSUs.push_back(TmpSU);
1338  }
1339 
1340  if (PHISUs.size() == 0 || SrcSUs.size() == 0)
1341  continue;
1342 
1343  // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
1344  // SUnit to the container.
1345  SmallVector<SUnit *, 8> UseSUs;
1346  // Do not use iterator based loop here as we are updating the container.
1347  for (size_t Index = 0; Index < PHISUs.size(); ++Index) {
1348  for (auto &Dep : PHISUs[Index]->Succs) {
1349  if (Dep.getKind() != SDep::Data)
1350  continue;
1351 
1352  SUnit *TmpSU = Dep.getSUnit();
1353  MachineInstr *TmpMI = TmpSU->getInstr();
1354  if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
1355  PHISUs.push_back(TmpSU);
1356  continue;
1357  }
1358  UseSUs.push_back(TmpSU);
1359  }
1360  }
1361 
1362  if (UseSUs.size() == 0)
1363  continue;
1364 
1365  SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
1366  // Add the artificial dependencies if it does not form a cycle.
1367  for (auto *I : UseSUs) {
1368  for (auto *Src : SrcSUs) {
1369  if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
1370  Src->addPred(SDep(I, SDep::Artificial));
1371  SDAG->Topo.AddPred(Src, I);
1372  }
1373  }
1374  }
1375  }
1376 }
1377 
1378 /// Return true for DAG nodes that we ignore when computing the cost functions.
1379 /// We ignore the back-edge recurrence in order to avoid unbounded recursion
1380 /// in the calculation of the ASAP, ALAP, etc functions.
1381 static bool ignoreDependence(const SDep &D, bool isPred) {
1382  if (D.isArtificial() || D.getSUnit()->isBoundaryNode())
1383  return true;
1384  return D.getKind() == SDep::Anti && isPred;
1385 }
1386 
1387 /// Compute several functions need to order the nodes for scheduling.
1388 /// ASAP - Earliest time to schedule a node.
1389 /// ALAP - Latest time to schedule a node.
1390 /// MOV - Mobility function, difference between ALAP and ASAP.
1391 /// D - Depth of each node.
1392 /// H - Height of each node.
1393 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1394  ScheduleInfo.resize(SUnits.size());
1395 
1396  LLVM_DEBUG({
1397  for (int I : Topo) {
1398  const SUnit &SU = SUnits[I];
1399  dumpNode(SU);
1400  }
1401  });
1402 
1403  int maxASAP = 0;
1404  // Compute ASAP and ZeroLatencyDepth.
1405  for (int I : Topo) {
1406  int asap = 0;
1407  int zeroLatencyDepth = 0;
1408  SUnit *SU = &SUnits[I];
1409  for (const SDep &P : SU->Preds) {
1410  SUnit *pred = P.getSUnit();
1411  if (P.getLatency() == 0)
1412  zeroLatencyDepth =
1413  std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
1414  if (ignoreDependence(P, true))
1415  continue;
1416  asap = std::max(asap, (int)(getASAP(pred) + P.getLatency() -
1417  getDistance(pred, SU, P) * MII));
1418  }
1419  maxASAP = std::max(maxASAP, asap);
1420  ScheduleInfo[I].ASAP = asap;
1421  ScheduleInfo[I].ZeroLatencyDepth = zeroLatencyDepth;
1422  }
1423 
1424  // Compute ALAP, ZeroLatencyHeight, and MOV.
1425  for (int I : llvm::reverse(Topo)) {
1426  int alap = maxASAP;
1427  int zeroLatencyHeight = 0;
1428  SUnit *SU = &SUnits[I];
1429  for (const SDep &S : SU->Succs) {
1430  SUnit *succ = S.getSUnit();
1431  if (succ->isBoundaryNode())
1432  continue;
1433  if (S.getLatency() == 0)
1434  zeroLatencyHeight =
1435  std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
1436  if (ignoreDependence(S, true))
1437  continue;
1438  alap = std::min(alap, (int)(getALAP(succ) - S.getLatency() +
1439  getDistance(SU, succ, S) * MII));
1440  }
1441 
1442  ScheduleInfo[I].ALAP = alap;
1443  ScheduleInfo[I].ZeroLatencyHeight = zeroLatencyHeight;
1444  }
1445 
1446  // After computing the node functions, compute the summary for each node set.
1447  for (NodeSet &I : NodeSets)
1448  I.computeNodeSetInfo(this);
1449 
1450  LLVM_DEBUG({
1451  for (unsigned i = 0; i < SUnits.size(); i++) {
1452  dbgs() << "\tNode " << i << ":\n";
1453  dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
1454  dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
1455  dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
1456  dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
1457  dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
1458  dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
1459  dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
1460  }
1461  });
1462 }
1463 
1464 /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1465 /// as the predecessors of the elements of NodeOrder that are not also in
1466 /// NodeOrder.
1469  const NodeSet *S = nullptr) {
1470  Preds.clear();
1471  for (const SUnit *SU : NodeOrder) {
1472  for (const SDep &Pred : SU->Preds) {
1473  if (S && S->count(Pred.getSUnit()) == 0)
1474  continue;
1475  if (ignoreDependence(Pred, true))
1476  continue;
1477  if (NodeOrder.count(Pred.getSUnit()) == 0)
1478  Preds.insert(Pred.getSUnit());
1479  }
1480  // Back-edges are predecessors with an anti-dependence.
1481  for (const SDep &Succ : SU->Succs) {
1482  if (Succ.getKind() != SDep::Anti)
1483  continue;
1484  if (S && S->count(Succ.getSUnit()) == 0)
1485  continue;
1486  if (NodeOrder.count(Succ.getSUnit()) == 0)
1487  Preds.insert(Succ.getSUnit());
1488  }
1489  }
1490  return !Preds.empty();
1491 }
1492 
1493 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1494 /// as the successors of the elements of NodeOrder that are not also in
1495 /// NodeOrder.
1498  const NodeSet *S = nullptr) {
1499  Succs.clear();
1500  for (const SUnit *SU : NodeOrder) {
1501  for (const SDep &Succ : SU->Succs) {
1502  if (S && S->count(Succ.getSUnit()) == 0)
1503  continue;
1504  if (ignoreDependence(Succ, false))
1505  continue;
1506  if (NodeOrder.count(Succ.getSUnit()) == 0)
1507  Succs.insert(Succ.getSUnit());
1508  }
1509  for (const SDep &Pred : SU->Preds) {
1510  if (Pred.getKind() != SDep::Anti)
1511  continue;
1512  if (S && S->count(Pred.getSUnit()) == 0)
1513  continue;
1514  if (NodeOrder.count(Pred.getSUnit()) == 0)
1515  Succs.insert(Pred.getSUnit());
1516  }
1517  }
1518  return !Succs.empty();
1519 }
1520 
1521 /// Return true if there is a path from the specified node to any of the nodes
1522 /// in DestNodes. Keep track and return the nodes in any path.
1523 static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
1524  SetVector<SUnit *> &DestNodes,
1525  SetVector<SUnit *> &Exclude,
1526  SmallPtrSet<SUnit *, 8> &Visited) {
1527  if (Cur->isBoundaryNode())
1528  return false;
1529  if (Exclude.contains(Cur))
1530  return false;
1531  if (DestNodes.contains(Cur))
1532  return true;
1533  if (!Visited.insert(Cur).second)
1534  return Path.contains(Cur);
1535  bool FoundPath = false;
1536  for (auto &SI : Cur->Succs)
1537  if (!ignoreDependence(SI, false))
1538  FoundPath |=
1539  computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1540  for (auto &PI : Cur->Preds)
1541  if (PI.getKind() == SDep::Anti)
1542  FoundPath |=
1543  computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1544  if (FoundPath)
1545  Path.insert(Cur);
1546  return FoundPath;
1547 }
1548 
1549 /// Compute the live-out registers for the instructions in a node-set.
1550 /// The live-out registers are those that are defined in the node-set,
1551 /// but not used. Except for use operands of Phis.
1553  NodeSet &NS) {
1558  for (SUnit *SU : NS) {
1559  const MachineInstr *MI = SU->getInstr();
1560  if (MI->isPHI())
1561  continue;
1562  for (const MachineOperand &MO : MI->operands())
1563  if (MO.isReg() && MO.isUse()) {
1564  Register Reg = MO.getReg();
1566  Uses.insert(Reg);
1567  else if (MRI.isAllocatable(Reg))
1568  for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
1569  ++Units)
1570  Uses.insert(*Units);
1571  }
1572  }
1573  for (SUnit *SU : NS)
1574  for (const MachineOperand &MO : SU->getInstr()->operands())
1575  if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1576  Register Reg = MO.getReg();
1578  if (!Uses.count(Reg))
1579  LiveOutRegs.push_back(RegisterMaskPair(Reg,
1581  } else if (MRI.isAllocatable(Reg)) {
1582  for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
1583  ++Units)
1584  if (!Uses.count(*Units))
1585  LiveOutRegs.push_back(RegisterMaskPair(*Units,
1587  }
1588  }
1589  RPTracker.addLiveRegs(LiveOutRegs);
1590 }
1591 
1592 /// A heuristic to filter nodes in recurrent node-sets if the register
1593 /// pressure of a set is too high.
1594 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1595  for (auto &NS : NodeSets) {
1596  // Skip small node-sets since they won't cause register pressure problems.
1597  if (NS.size() <= 2)
1598  continue;
1599  IntervalPressure RecRegPressure;
1600  RegPressureTracker RecRPTracker(RecRegPressure);
1601  RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
1602  computeLiveOuts(MF, RecRPTracker, NS);
1603  RecRPTracker.closeBottom();
1604 
1605  std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1606  llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
1607  return A->NodeNum > B->NodeNum;
1608  });
1609 
1610  for (auto &SU : SUnits) {
1611  // Since we're computing the register pressure for a subset of the
1612  // instructions in a block, we need to set the tracker for each
1613  // instruction in the node-set. The tracker is set to the instruction
1614  // just after the one we're interested in.
1616  RecRPTracker.setPos(std::next(CurInstI));
1617 
1618  RegPressureDelta RPDelta;
1619  ArrayRef<PressureChange> CriticalPSets;
1620  RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
1621  CriticalPSets,
1622  RecRegPressure.MaxSetPressure);
1623  if (RPDelta.Excess.isValid()) {
1624  LLVM_DEBUG(
1625  dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
1626  << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
1627  << ":" << RPDelta.Excess.getUnitInc() << "\n");
1628  NS.setExceedPressure(SU);
1629  break;
1630  }
1631  RecRPTracker.recede();
1632  }
1633  }
1634 }
1635 
1636 /// A heuristic to colocate node sets that have the same set of
1637 /// successors.
1638 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
1639  unsigned Colocate = 0;
1640  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
1641  NodeSet &N1 = NodeSets[i];
1643  if (N1.empty() || !succ_L(N1, S1))
1644  continue;
1645  for (int j = i + 1; j < e; ++j) {
1646  NodeSet &N2 = NodeSets[j];
1647  if (N1.compareRecMII(N2) != 0)
1648  continue;
1650  if (N2.empty() || !succ_L(N2, S2))
1651  continue;
1652  if (llvm::set_is_subset(S1, S2) && S1.size() == S2.size()) {
1653  N1.setColocate(++Colocate);
1654  N2.setColocate(Colocate);
1655  break;
1656  }
1657  }
1658  }
1659 }
1660 
1661 /// Check if the existing node-sets are profitable. If not, then ignore the
1662 /// recurrent node-sets, and attempt to schedule all nodes together. This is
1663 /// a heuristic. If the MII is large and all the recurrent node-sets are small,
1664 /// then it's best to try to schedule all instructions together instead of
1665 /// starting with the recurrent node-sets.
1666 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
1667  // Look for loops with a large MII.
1668  if (MII < 17)
1669  return;
1670  // Check if the node-set contains only a simple add recurrence.
1671  for (auto &NS : NodeSets) {
1672  if (NS.getRecMII() > 2)
1673  return;
1674  if (NS.getMaxDepth() > MII)
1675  return;
1676  }
1677  NodeSets.clear();
1678  LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
1679 }
1680 
1681 /// Add the nodes that do not belong to a recurrence set into groups
1682 /// based upon connected components.
1683 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
1684  SetVector<SUnit *> NodesAdded;
1685  SmallPtrSet<SUnit *, 8> Visited;
1686  // Add the nodes that are on a path between the previous node sets and
1687  // the current node set.
1688  for (NodeSet &I : NodeSets) {
1690  // Add the nodes from the current node set to the previous node set.
1691  if (succ_L(I, N)) {
1693  for (SUnit *NI : N) {
1694  Visited.clear();
1695  computePath(NI, Path, NodesAdded, I, Visited);
1696  }
1697  if (!Path.empty())
1698  I.insert(Path.begin(), Path.end());
1699  }
1700  // Add the nodes from the previous node set to the current node set.
1701  N.clear();
1702  if (succ_L(NodesAdded, N)) {
1704  for (SUnit *NI : N) {
1705  Visited.clear();
1706  computePath(NI, Path, I, NodesAdded, Visited);
1707  }
1708  if (!Path.empty())
1709  I.insert(Path.begin(), Path.end());
1710  }
1711  NodesAdded.insert(I.begin(), I.end());
1712  }
1713 
1714  // Create a new node set with the connected nodes of any successor of a node
1715  // in a recurrent set.
1716  NodeSet NewSet;
1718  if (succ_L(NodesAdded, N))
1719  for (SUnit *I : N)
1720  addConnectedNodes(I, NewSet, NodesAdded);
1721  if (!NewSet.empty())
1722  NodeSets.push_back(NewSet);
1723 
1724  // Create a new node set with the connected nodes of any predecessor of a node
1725  // in a recurrent set.
1726  NewSet.clear();
1727  if (pred_L(NodesAdded, N))
1728  for (SUnit *I : N)
1729  addConnectedNodes(I, NewSet, NodesAdded);
1730  if (!NewSet.empty())
1731  NodeSets.push_back(NewSet);
1732 
1733  // Create new nodes sets with the connected nodes any remaining node that
1734  // has no predecessor.
1735  for (SUnit &SU : SUnits) {
1736  if (NodesAdded.count(&SU) == 0) {
1737  NewSet.clear();
1738  addConnectedNodes(&SU, NewSet, NodesAdded);
1739  if (!NewSet.empty())
1740  NodeSets.push_back(NewSet);
1741  }
1742  }
1743 }
1744 
1745 /// Add the node to the set, and add all of its connected nodes to the set.
1746 void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
1747  SetVector<SUnit *> &NodesAdded) {
1748  NewSet.insert(SU);
1749  NodesAdded.insert(SU);
1750  for (auto &SI : SU->Succs) {
1751  SUnit *Successor = SI.getSUnit();
1752  if (!SI.isArtificial() && !Successor->isBoundaryNode() &&
1753  NodesAdded.count(Successor) == 0)
1754  addConnectedNodes(Successor, NewSet, NodesAdded);
1755  }
1756  for (auto &PI : SU->Preds) {
1757  SUnit *Predecessor = PI.getSUnit();
1758  if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
1759  addConnectedNodes(Predecessor, NewSet, NodesAdded);
1760  }
1761 }
1762 
1763 /// Return true if Set1 contains elements in Set2. The elements in common
1764 /// are returned in a different container.
1765 static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
1766  SmallSetVector<SUnit *, 8> &Result) {
1767  Result.clear();
1768  for (SUnit *SU : Set1) {
1769  if (Set2.count(SU) != 0)
1770  Result.insert(SU);
1771  }
1772  return !Result.empty();
1773 }
1774 
1775 /// Merge the recurrence node sets that have the same initial node.
1776 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
1777  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1778  ++I) {
1779  NodeSet &NI = *I;
1780  for (NodeSetType::iterator J = I + 1; J != E;) {
1781  NodeSet &NJ = *J;
1782  if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
1783  if (NJ.compareRecMII(NI) > 0)
1784  NI.setRecMII(NJ.getRecMII());
1785  for (SUnit *SU : *J)
1786  I->insert(SU);
1787  NodeSets.erase(J);
1788  E = NodeSets.end();
1789  } else {
1790  ++J;
1791  }
1792  }
1793  }
1794 }
1795 
1796 /// Remove nodes that have been scheduled in previous NodeSets.
1797 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
1798  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1799  ++I)
1800  for (NodeSetType::iterator J = I + 1; J != E;) {
1801  J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
1802 
1803  if (J->empty()) {
1804  NodeSets.erase(J);
1805  E = NodeSets.end();
1806  } else {
1807  ++J;
1808  }
1809  }
1810 }
1811 
1812 /// Compute an ordered list of the dependence graph nodes, which
1813 /// indicates the order that the nodes will be scheduled. This is a
1814 /// two-level algorithm. First, a partial order is created, which
1815 /// consists of a list of sets ordered from highest to lowest priority.
1816 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
1818  NodeOrder.clear();
1819 
1820  for (auto &Nodes : NodeSets) {
1821  LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
1822  OrderKind Order;
1824  if (pred_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) {
1825  R.insert(N.begin(), N.end());
1826  Order = BottomUp;
1827  LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
1828  } else if (succ_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) {
1829  R.insert(N.begin(), N.end());
1830  Order = TopDown;
1831  LLVM_DEBUG(dbgs() << " Top down (succs) ");
1832  } else if (isIntersect(N, Nodes, R)) {
1833  // If some of the successors are in the existing node-set, then use the
1834  // top-down ordering.
1835  Order = TopDown;
1836  LLVM_DEBUG(dbgs() << " Top down (intersect) ");
1837  } else if (NodeSets.size() == 1) {
1838  for (const auto &N : Nodes)
1839  if (N->Succs.size() == 0)
1840  R.insert(N);
1841  Order = BottomUp;
1842  LLVM_DEBUG(dbgs() << " Bottom up (all) ");
1843  } else {
1844  // Find the node with the highest ASAP.
1845  SUnit *maxASAP = nullptr;
1846  for (SUnit *SU : Nodes) {
1847  if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||
1848  (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))
1849  maxASAP = SU;
1850  }
1851  R.insert(maxASAP);
1852  Order = BottomUp;
1853  LLVM_DEBUG(dbgs() << " Bottom up (default) ");
1854  }
1855 
1856  while (!R.empty()) {
1857  if (Order == TopDown) {
1858  // Choose the node with the maximum height. If more than one, choose
1859  // the node wiTH the maximum ZeroLatencyHeight. If still more than one,
1860  // choose the node with the lowest MOV.
1861  while (!R.empty()) {
1862  SUnit *maxHeight = nullptr;
1863  for (SUnit *I : R) {
1864  if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
1865  maxHeight = I;
1866  else if (getHeight(I) == getHeight(maxHeight) &&
1868  maxHeight = I;
1869  else if (getHeight(I) == getHeight(maxHeight) &&
1871  getZeroLatencyHeight(maxHeight) &&
1872  getMOV(I) < getMOV(maxHeight))
1873  maxHeight = I;
1874  }
1875  NodeOrder.insert(maxHeight);
1876  LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
1877  R.remove(maxHeight);
1878  for (const auto &I : maxHeight->Succs) {
1879  if (Nodes.count(I.getSUnit()) == 0)
1880  continue;
1881  if (NodeOrder.contains(I.getSUnit()))
1882  continue;
1883  if (ignoreDependence(I, false))
1884  continue;
1885  R.insert(I.getSUnit());
1886  }
1887  // Back-edges are predecessors with an anti-dependence.
1888  for (const auto &I : maxHeight->Preds) {
1889  if (I.getKind() != SDep::Anti)
1890  continue;
1891  if (Nodes.count(I.getSUnit()) == 0)
1892  continue;
1893  if (NodeOrder.contains(I.getSUnit()))
1894  continue;
1895  R.insert(I.getSUnit());
1896  }
1897  }
1898  Order = BottomUp;
1899  LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
1901  if (pred_L(NodeOrder, N, &Nodes))
1902  R.insert(N.begin(), N.end());
1903  } else {
1904  // Choose the node with the maximum depth. If more than one, choose
1905  // the node with the maximum ZeroLatencyDepth. If still more than one,
1906  // choose the node with the lowest MOV.
1907  while (!R.empty()) {
1908  SUnit *maxDepth = nullptr;
1909  for (SUnit *I : R) {
1910  if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
1911  maxDepth = I;
1912  else if (getDepth(I) == getDepth(maxDepth) &&
1914  maxDepth = I;
1915  else if (getDepth(I) == getDepth(maxDepth) &&
1916  getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&
1917  getMOV(I) < getMOV(maxDepth))
1918  maxDepth = I;
1919  }
1920  NodeOrder.insert(maxDepth);
1921  LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
1922  R.remove(maxDepth);
1923  if (Nodes.isExceedSU(maxDepth)) {
1924  Order = TopDown;
1925  R.clear();
1926  R.insert(Nodes.getNode(0));
1927  break;
1928  }
1929  for (const auto &I : maxDepth->Preds) {
1930  if (Nodes.count(I.getSUnit()) == 0)
1931  continue;
1932  if (NodeOrder.contains(I.getSUnit()))
1933  continue;
1934  R.insert(I.getSUnit());
1935  }
1936  // Back-edges are predecessors with an anti-dependence.
1937  for (const auto &I : maxDepth->Succs) {
1938  if (I.getKind() != SDep::Anti)
1939  continue;
1940  if (Nodes.count(I.getSUnit()) == 0)
1941  continue;
1942  if (NodeOrder.contains(I.getSUnit()))
1943  continue;
1944  R.insert(I.getSUnit());
1945  }
1946  }
1947  Order = TopDown;
1948  LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
1950  if (succ_L(NodeOrder, N, &Nodes))
1951  R.insert(N.begin(), N.end());
1952  }
1953  }
1954  LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
1955  }
1956 
1957  LLVM_DEBUG({
1958  dbgs() << "Node order: ";
1959  for (SUnit *I : NodeOrder)
1960  dbgs() << " " << I->NodeNum << " ";
1961  dbgs() << "\n";
1962  });
1963 }
1964 
1965 /// Process the nodes in the computed order and create the pipelined schedule
1966 /// of the instructions, if possible. Return true if a schedule is found.
1967 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
1968 
1969  if (NodeOrder.empty()){
1970  LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
1971  return false;
1972  }
1973 
1974  bool scheduleFound = false;
1975  // Keep increasing II until a valid schedule is found.
1976  for (unsigned II = MII; II <= MAX_II && !scheduleFound; ++II) {
1977  Schedule.reset();
1978  Schedule.setInitiationInterval(II);
1979  LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
1980 
1981  SetVector<SUnit *>::iterator NI = NodeOrder.begin();
1982  SetVector<SUnit *>::iterator NE = NodeOrder.end();
1983  do {
1984  SUnit *SU = *NI;
1985 
1986  // Compute the schedule time for the instruction, which is based
1987  // upon the scheduled time for any predecessors/successors.
1988  int EarlyStart = INT_MIN;
1989  int LateStart = INT_MAX;
1990  // These values are set when the size of the schedule window is limited
1991  // due to chain dependences.
1992  int SchedEnd = INT_MAX;
1993  int SchedStart = INT_MIN;
1994  Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
1995  II, this);
1996  LLVM_DEBUG({
1997  dbgs() << "\n";
1998  dbgs() << "Inst (" << SU->NodeNum << ") ";
1999  SU->getInstr()->dump();
2000  dbgs() << "\n";
2001  });
2002  LLVM_DEBUG({
2003  dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
2004  LateStart, SchedEnd, SchedStart);
2005  });
2006 
2007  if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
2008  SchedStart > LateStart)
2009  scheduleFound = false;
2010  else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
2011  SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
2012  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2013  } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
2014  SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
2015  scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
2016  } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2017  SchedEnd =
2018  std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
2019  // When scheduling a Phi it is better to start at the late cycle and go
2020  // backwards. The default order may insert the Phi too far away from
2021  // its first dependence.
2022  if (SU->getInstr()->isPHI())
2023  scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
2024  else
2025  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2026  } else {
2027  int FirstCycle = Schedule.getFirstCycle();
2028  scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
2029  FirstCycle + getASAP(SU) + II - 1, II);
2030  }
2031  // Even if we find a schedule, make sure the schedule doesn't exceed the
2032  // allowable number of stages. We keep trying if this happens.
2033  if (scheduleFound)
2034  if (SwpMaxStages > -1 &&
2035  Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
2036  scheduleFound = false;
2037 
2038  LLVM_DEBUG({
2039  if (!scheduleFound)
2040  dbgs() << "\tCan't schedule\n";
2041  });
2042  } while (++NI != NE && scheduleFound);
2043 
2044  // If a schedule is found, ensure non-pipelined instructions are in stage 0
2045  if (scheduleFound)
2046  scheduleFound =
2047  Schedule.normalizeNonPipelinedInstructions(this, LoopPipelinerInfo);
2048 
2049  // If a schedule is found, check if it is a valid schedule too.
2050  if (scheduleFound)
2051  scheduleFound = Schedule.isValidSchedule(this);
2052  }
2053 
2054  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound
2055  << " (II=" << Schedule.getInitiationInterval()
2056  << ")\n");
2057 
2058  if (scheduleFound) {
2059  scheduleFound = LoopPipelinerInfo->shouldUseSchedule(*this, Schedule);
2060  if (!scheduleFound)
2061  LLVM_DEBUG(dbgs() << "Target rejected schedule\n");
2062  }
2063 
2064  if (scheduleFound) {
2065  Schedule.finalizeSchedule(this);
2066  Pass.ORE->emit([&]() {
2068  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
2069  << "Schedule found with Initiation Interval: "
2070  << ore::NV("II", Schedule.getInitiationInterval())
2071  << ", MaxStageCount: "
2072  << ore::NV("MaxStageCount", Schedule.getMaxStageCount());
2073  });
2074  } else
2075  Schedule.reset();
2076 
2077  return scheduleFound && Schedule.getMaxStageCount() > 0;
2078 }
2079 
2080 /// Return true if we can compute the amount the instruction changes
2081 /// during each iteration. Set Delta to the amount of the change.
2082 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
2084  const MachineOperand *BaseOp;
2085  int64_t Offset;
2086  bool OffsetIsScalable;
2087  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
2088  return false;
2089 
2090  // FIXME: This algorithm assumes instructions have fixed-size offsets.
2091  if (OffsetIsScalable)
2092  return false;
2093 
2094  if (!BaseOp->isReg())
2095  return false;
2096 
2097  Register BaseReg = BaseOp->getReg();
2098 
2100  // Check if there is a Phi. If so, get the definition in the loop.
2101  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
2102  if (BaseDef && BaseDef->isPHI()) {
2103  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
2104  BaseDef = MRI.getVRegDef(BaseReg);
2105  }
2106  if (!BaseDef)
2107  return false;
2108 
2109  int D = 0;
2110  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
2111  return false;
2112 
2113  Delta = D;
2114  return true;
2115 }
2116 
2117 /// Check if we can change the instruction to use an offset value from the
2118 /// previous iteration. If so, return true and set the base and offset values
2119 /// so that we can rewrite the load, if necessary.
2120 /// v1 = Phi(v0, v3)
2121 /// v2 = load v1, 0
2122 /// v3 = post_store v1, 4, x
2123 /// This function enables the load to be rewritten as v2 = load v3, 4.
2124 bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
2125  unsigned &BasePos,
2126  unsigned &OffsetPos,
2127  unsigned &NewBase,
2128  int64_t &Offset) {
2129  // Get the load instruction.
2130  if (TII->isPostIncrement(*MI))
2131  return false;
2132  unsigned BasePosLd, OffsetPosLd;
2133  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
2134  return false;
2135  Register BaseReg = MI->getOperand(BasePosLd).getReg();
2136 
2137  // Look for the Phi instruction.
2138  MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
2139  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
2140  if (!Phi || !Phi->isPHI())
2141  return false;
2142  // Get the register defined in the loop block.
2143  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
2144  if (!PrevReg)
2145  return false;
2146 
2147  // Check for the post-increment load/store instruction.
2148  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
2149  if (!PrevDef || PrevDef == MI)
2150  return false;
2151 
2152  if (!TII->isPostIncrement(*PrevDef))
2153  return false;
2154 
2155  unsigned BasePos1 = 0, OffsetPos1 = 0;
2156  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
2157  return false;
2158 
2159  // Make sure that the instructions do not access the same memory location in
2160  // the next iteration.
2161  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
2162  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
2163  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2164  NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
2165  bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
2166  MF.deleteMachineInstr(NewMI);
2167  if (!Disjoint)
2168  return false;
2169 
2170  // Set the return value once we determine that we return true.
2171  BasePos = BasePosLd;
2172  OffsetPos = OffsetPosLd;
2173  NewBase = PrevReg;
2174  Offset = StoreOffset;
2175  return true;
2176 }
2177 
2178 /// Apply changes to the instruction if needed. The changes are need
2179 /// to improve the scheduling and depend up on the final schedule.
2181  SMSchedule &Schedule) {
2182  SUnit *SU = getSUnit(MI);
2184  InstrChanges.find(SU);
2185  if (It != InstrChanges.end()) {
2186  std::pair<unsigned, int64_t> RegAndOffset = It->second;
2187  unsigned BasePos, OffsetPos;
2188  if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
2189  return;
2190  Register BaseReg = MI->getOperand(BasePos).getReg();
2191  MachineInstr *LoopDef = findDefInLoop(BaseReg);
2192  int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
2193  int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
2194  int BaseStageNum = Schedule.stageScheduled(SU);
2195  int BaseCycleNum = Schedule.cycleScheduled(SU);
2196  if (BaseStageNum < DefStageNum) {
2197  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2198  int OffsetDiff = DefStageNum - BaseStageNum;
2199  if (DefCycleNum < BaseCycleNum) {
2200  NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
2201  if (OffsetDiff > 0)
2202  --OffsetDiff;
2203  }
2204  int64_t NewOffset =
2205  MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2206  NewMI->getOperand(OffsetPos).setImm(NewOffset);
2207  SU->setInstr(NewMI);
2208  MISUnitMap[NewMI] = SU;
2209  NewMIs[MI] = NewMI;
2210  }
2211  }
2212 }
2213 
2214 /// Return the instruction in the loop that defines the register.
2215 /// If the definition is a Phi, then follow the Phi operand to
2216 /// the instruction in the loop.
2217 MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
2220  while (Def->isPHI()) {
2221  if (!Visited.insert(Def).second)
2222  break;
2223  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
2224  if (Def->getOperand(i + 1).getMBB() == BB) {
2225  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
2226  break;
2227  }
2228  }
2229  return Def;
2230 }
2231 
2232 /// Return true for an order or output dependence that is loop carried
2233 /// potentially. A dependence is loop carried if the destination defines a valu
2234 /// that may be used or defined by the source in a subsequent iteration.
2236  bool isSucc) {
2237  if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
2238  Dep.isArtificial() || Dep.getSUnit()->isBoundaryNode())
2239  return false;
2240 
2241  if (!SwpPruneLoopCarried)
2242  return true;
2243 
2244  if (Dep.getKind() == SDep::Output)
2245  return true;
2246 
2247  MachineInstr *SI = Source->getInstr();
2248  MachineInstr *DI = Dep.getSUnit()->getInstr();
2249  if (!isSucc)
2250  std::swap(SI, DI);
2251  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
2252 
2253  // Assume ordered loads and stores may have a loop carried dependence.
2254  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
2255  SI->mayRaiseFPException() || DI->mayRaiseFPException() ||
2256  SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
2257  return true;
2258 
2259  // Only chain dependences between a load and store can be loop carried.
2260  if (!DI->mayStore() || !SI->mayLoad())
2261  return false;
2262 
2263  unsigned DeltaS, DeltaD;
2264  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
2265  return true;
2266 
2267  const MachineOperand *BaseOpS, *BaseOpD;
2268  int64_t OffsetS, OffsetD;
2269  bool OffsetSIsScalable, OffsetDIsScalable;
2271  if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, OffsetSIsScalable,
2272  TRI) ||
2273  !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, OffsetDIsScalable,
2274  TRI))
2275  return true;
2276 
2277  assert(!OffsetSIsScalable && !OffsetDIsScalable &&
2278  "Expected offsets to be byte offsets");
2279 
2280  if (!BaseOpS->isIdenticalTo(*BaseOpD))
2281  return true;
2282 
2283  // Check that the base register is incremented by a constant value for each
2284  // iteration.
2285  MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg());
2286  if (!Def || !Def->isPHI())
2287  return true;
2288  unsigned InitVal = 0;
2289  unsigned LoopVal = 0;
2290  getPhiRegs(*Def, BB, InitVal, LoopVal);
2291  MachineInstr *LoopDef = MRI.getVRegDef(LoopVal);
2292  int D = 0;
2293  if (!LoopDef || !TII->getIncrementValue(*LoopDef, D))
2294  return true;
2295 
2296  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
2297  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
2298 
2299  // This is the main test, which checks the offset values and the loop
2300  // increment value to determine if the accesses may be loop carried.
2301  if (AccessSizeS == MemoryLocation::UnknownSize ||
2302  AccessSizeD == MemoryLocation::UnknownSize)
2303  return true;
2304 
2305  if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD)
2306  return true;
2307 
2308  return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
2309 }
2310 
2311 void SwingSchedulerDAG::postprocessDAG() {
2312  for (auto &M : Mutations)
2313  M->apply(this);
2314 }
2315 
2316 /// Try to schedule the node at the specified StartCycle and continue
2317 /// until the node is schedule or the EndCycle is reached. This function
2318 /// returns true if the node is scheduled. This routine may search either
2319 /// forward or backward for a place to insert the instruction based upon
2320 /// the relative values of StartCycle and EndCycle.
2321 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
2322  bool forward = true;
2323  LLVM_DEBUG({
2324  dbgs() << "Trying to insert node between " << StartCycle << " and "
2325  << EndCycle << " II: " << II << "\n";
2326  });
2327  if (StartCycle > EndCycle)
2328  forward = false;
2329 
2330  // The terminating condition depends on the direction.
2331  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
2332  for (int curCycle = StartCycle; curCycle != termCycle;
2333  forward ? ++curCycle : --curCycle) {
2334 
2335  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
2336  ProcItinResources.canReserveResources(*SU, curCycle)) {
2337  LLVM_DEBUG({
2338  dbgs() << "\tinsert at cycle " << curCycle << " ";
2339  SU->getInstr()->dump();
2340  });
2341 
2342  if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
2343  ProcItinResources.reserveResources(*SU, curCycle);
2344  ScheduledInstrs[curCycle].push_back(SU);
2345  InstrToCycle.insert(std::make_pair(SU, curCycle));
2346  if (curCycle > LastCycle)
2347  LastCycle = curCycle;
2348  if (curCycle < FirstCycle)
2349  FirstCycle = curCycle;
2350  return true;
2351  }
2352  LLVM_DEBUG({
2353  dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
2354  SU->getInstr()->dump();
2355  });
2356  }
2357  return false;
2358 }
2359 
2360 // Return the cycle of the earliest scheduled instruction in the chain.
2362  SmallPtrSet<SUnit *, 8> Visited;
2363  SmallVector<SDep, 8> Worklist;
2364  Worklist.push_back(Dep);
2365  int EarlyCycle = INT_MAX;
2366  while (!Worklist.empty()) {
2367  const SDep &Cur = Worklist.pop_back_val();
2368  SUnit *PrevSU = Cur.getSUnit();
2369  if (Visited.count(PrevSU))
2370  continue;
2371  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
2372  if (it == InstrToCycle.end())
2373  continue;
2374  EarlyCycle = std::min(EarlyCycle, it->second);
2375  for (const auto &PI : PrevSU->Preds)
2376  if (PI.getKind() == SDep::Order || PI.getKind() == SDep::Output)
2377  Worklist.push_back(PI);
2378  Visited.insert(PrevSU);
2379  }
2380  return EarlyCycle;
2381 }
2382 
2383 // Return the cycle of the latest scheduled instruction in the chain.
2385  SmallPtrSet<SUnit *, 8> Visited;
2386  SmallVector<SDep, 8> Worklist;
2387  Worklist.push_back(Dep);
2388  int LateCycle = INT_MIN;
2389  while (!Worklist.empty()) {
2390  const SDep &Cur = Worklist.pop_back_val();
2391  SUnit *SuccSU = Cur.getSUnit();
2392  if (Visited.count(SuccSU) || SuccSU->isBoundaryNode())
2393  continue;
2394  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
2395  if (it == InstrToCycle.end())
2396  continue;
2397  LateCycle = std::max(LateCycle, it->second);
2398  for (const auto &SI : SuccSU->Succs)
2399  if (SI.getKind() == SDep::Order || SI.getKind() == SDep::Output)
2400  Worklist.push_back(SI);
2401  Visited.insert(SuccSU);
2402  }
2403  return LateCycle;
2404 }
2405 
2406 /// If an instruction has a use that spans multiple iterations, then
2407 /// return true. These instructions are characterized by having a back-ege
2408 /// to a Phi, which contains a reference to another Phi.
2410  for (auto &P : SU->Preds)
2411  if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
2412  for (auto &S : P.getSUnit()->Succs)
2413  if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
2414  return P.getSUnit();
2415  return nullptr;
2416 }
2417 
2418 /// Compute the scheduling start slot for the instruction. The start slot
2419 /// depends on any predecessor or successor nodes scheduled already.
2420 void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
2421  int *MinEnd, int *MaxStart, int II,
2422  SwingSchedulerDAG *DAG) {
2423  // Iterate over each instruction that has been scheduled already. The start
2424  // slot computation depends on whether the previously scheduled instruction
2425  // is a predecessor or successor of the specified instruction.
2426  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
2427 
2428  // Iterate over each instruction in the current cycle.
2429  for (SUnit *I : getInstructions(cycle)) {
2430  // Because we're processing a DAG for the dependences, we recognize
2431  // the back-edge in recurrences by anti dependences.
2432  for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
2433  const SDep &Dep = SU->Preds[i];
2434  if (Dep.getSUnit() == I) {
2435  if (!DAG->isBackedge(SU, Dep)) {
2436  int EarlyStart = cycle + Dep.getLatency() -
2437  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
2438  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2439  if (DAG->isLoopCarriedDep(SU, Dep, false)) {
2440  int End = earliestCycleInChain(Dep) + (II - 1);
2441  *MinEnd = std::min(*MinEnd, End);
2442  }
2443  } else {
2444  int LateStart = cycle - Dep.getLatency() +
2445  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
2446  *MinLateStart = std::min(*MinLateStart, LateStart);
2447  }
2448  }
2449  // For instruction that requires multiple iterations, make sure that
2450  // the dependent instruction is not scheduled past the definition.
2451  SUnit *BE = multipleIterations(I, DAG);
2452  if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
2453  !SU->isPred(I))
2454  *MinLateStart = std::min(*MinLateStart, cycle);
2455  }
2456  for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {
2457  if (SU->Succs[i].getSUnit() == I) {
2458  const SDep &Dep = SU->Succs[i];
2459  if (!DAG->isBackedge(SU, Dep)) {
2460  int LateStart = cycle - Dep.getLatency() +
2461  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
2462  *MinLateStart = std::min(*MinLateStart, LateStart);
2463  if (DAG->isLoopCarriedDep(SU, Dep)) {
2464  int Start = latestCycleInChain(Dep) + 1 - II;
2465  *MaxStart = std::max(*MaxStart, Start);
2466  }
2467  } else {
2468  int EarlyStart = cycle + Dep.getLatency() -
2469  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
2470  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2471  }
2472  }
2473  }
2474  }
2475  }
2476 }
2477 
2478 /// Order the instructions within a cycle so that the definitions occur
2479 /// before the uses. Returns true if the instruction is added to the start
2480 /// of the list, or false if added to the end.
2482  std::deque<SUnit *> &Insts) {
2483  MachineInstr *MI = SU->getInstr();
2484  bool OrderBeforeUse = false;
2485  bool OrderAfterDef = false;
2486  bool OrderBeforeDef = false;
2487  unsigned MoveDef = 0;
2488  unsigned MoveUse = 0;
2489  int StageInst1 = stageScheduled(SU);
2490 
2491  unsigned Pos = 0;
2492  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
2493  ++I, ++Pos) {
2494  for (MachineOperand &MO : MI->operands()) {
2495  if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
2496  continue;
2497 
2498  Register Reg = MO.getReg();
2499  unsigned BasePos, OffsetPos;
2500  if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
2501  if (MI->getOperand(BasePos).getReg() == Reg)
2502  if (unsigned NewReg = SSD->getInstrBaseReg(SU))
2503  Reg = NewReg;
2504  bool Reads, Writes;
2505  std::tie(Reads, Writes) =
2506  (*I)->getInstr()->readsWritesVirtualRegister(Reg);
2507  if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
2508  OrderBeforeUse = true;
2509  if (MoveUse == 0)
2510  MoveUse = Pos;
2511  } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
2512  // Add the instruction after the scheduled instruction.
2513  OrderAfterDef = true;
2514  MoveDef = Pos;
2515  } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
2516  if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
2517  OrderBeforeUse = true;
2518  if (MoveUse == 0)
2519  MoveUse = Pos;
2520  } else {
2521  OrderAfterDef = true;
2522  MoveDef = Pos;
2523  }
2524  } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
2525  OrderBeforeUse = true;
2526  if (MoveUse == 0)
2527  MoveUse = Pos;
2528  if (MoveUse != 0) {
2529  OrderAfterDef = true;
2530  MoveDef = Pos - 1;
2531  }
2532  } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
2533  // Add the instruction before the scheduled instruction.
2534  OrderBeforeUse = true;
2535  if (MoveUse == 0)
2536  MoveUse = Pos;
2537  } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
2538  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
2539  if (MoveUse == 0) {
2540  OrderBeforeDef = true;
2541  MoveUse = Pos;
2542  }
2543  }
2544  }
2545  // Check for order dependences between instructions. Make sure the source
2546  // is ordered before the destination.
2547  for (auto &S : SU->Succs) {
2548  if (S.getSUnit() != *I)
2549  continue;
2550  if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
2551  OrderBeforeUse = true;
2552  if (Pos < MoveUse)
2553  MoveUse = Pos;
2554  }
2555  // We did not handle HW dependences in previous for loop,
2556  // and we normally set Latency = 0 for Anti deps,
2557  // so may have nodes in same cycle with Anti denpendent on HW regs.
2558  else if (S.getKind() == SDep::Anti && stageScheduled(*I) == StageInst1) {
2559  OrderBeforeUse = true;
2560  if ((MoveUse == 0) || (Pos < MoveUse))
2561  MoveUse = Pos;
2562  }
2563  }
2564  for (auto &P : SU->Preds) {
2565  if (P.getSUnit() != *I)
2566  continue;
2567  if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
2568  OrderAfterDef = true;
2569  MoveDef = Pos;
2570  }
2571  }
2572  }
2573 
2574  // A circular dependence.
2575  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
2576  OrderBeforeUse = false;
2577 
2578  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
2579  // to a loop-carried dependence.
2580  if (OrderBeforeDef)
2581  OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
2582 
2583  // The uncommon case when the instruction order needs to be updated because
2584  // there is both a use and def.
2585  if (OrderBeforeUse && OrderAfterDef) {
2586  SUnit *UseSU = Insts.at(MoveUse);
2587  SUnit *DefSU = Insts.at(MoveDef);
2588  if (MoveUse > MoveDef) {
2589  Insts.erase(Insts.begin() + MoveUse);
2590  Insts.erase(Insts.begin() + MoveDef);
2591  } else {
2592  Insts.erase(Insts.begin() + MoveDef);
2593  Insts.erase(Insts.begin() + MoveUse);
2594  }
2595  orderDependence(SSD, UseSU, Insts);
2596  orderDependence(SSD, SU, Insts);
2597  orderDependence(SSD, DefSU, Insts);
2598  return;
2599  }
2600  // Put the new instruction first if there is a use in the list. Otherwise,
2601  // put it at the end of the list.
2602  if (OrderBeforeUse)
2603  Insts.push_front(SU);
2604  else
2605  Insts.push_back(SU);
2606 }
2607 
2608 /// Return true if the scheduled Phi has a loop carried operand.
2610  if (!Phi.isPHI())
2611  return false;
2612  assert(Phi.isPHI() && "Expecting a Phi.");
2613  SUnit *DefSU = SSD->getSUnit(&Phi);
2614  unsigned DefCycle = cycleScheduled(DefSU);
2615  int DefStage = stageScheduled(DefSU);
2616 
2617  unsigned InitVal = 0;
2618  unsigned LoopVal = 0;
2619  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
2620  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
2621  if (!UseSU)
2622  return true;
2623  if (UseSU->getInstr()->isPHI())
2624  return true;
2625  unsigned LoopCycle = cycleScheduled(UseSU);
2626  int LoopStage = stageScheduled(UseSU);
2627  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
2628 }
2629 
2630 /// Return true if the instruction is a definition that is loop carried
2631 /// and defines the use on the next iteration.
2632 /// v1 = phi(v2, v3)
2633 /// (Def) v3 = op v1
2634 /// (MO) = v1
2635 /// If MO appears before Def, then then v1 and v3 may get assigned to the same
2636 /// register.
2639  if (!MO.isReg())
2640  return false;
2641  if (Def->isPHI())
2642  return false;
2643  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
2644  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
2645  return false;
2646  if (!isLoopCarried(SSD, *Phi))
2647  return false;
2648  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
2649  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
2650  MachineOperand &DMO = Def->getOperand(i);
2651  if (!DMO.isReg() || !DMO.isDef())
2652  continue;
2653  if (DMO.getReg() == LoopReg)
2654  return true;
2655  }
2656  return false;
2657 }
2658 
2659 /// Determine transitive dependences of unpipelineable instructions
2662  SmallSet<SUnit *, 8> DoNotPipeline;
2663  SmallVector<SUnit *, 8> Worklist;
2664 
2665  for (auto &SU : SSD->SUnits)
2666  if (SU.isInstr() && PLI->shouldIgnoreForPipelining(SU.getInstr()))
2667  Worklist.push_back(&SU);
2668 
2669  while (!Worklist.empty()) {
2670  auto SU = Worklist.pop_back_val();
2671  if (DoNotPipeline.count(SU))
2672  continue;
2673  LLVM_DEBUG(dbgs() << "Do not pipeline SU(" << SU->NodeNum << ")\n");
2674  DoNotPipeline.insert(SU);
2675  for (auto &Dep : SU->Preds)
2676  Worklist.push_back(Dep.getSUnit());
2677  if (SU->getInstr()->isPHI())
2678  for (auto &Dep : SU->Succs)
2679  if (Dep.getKind() == SDep::Anti)
2680  Worklist.push_back(Dep.getSUnit());
2681  }
2682  return DoNotPipeline;
2683 }
2684 
2685 // Determine all instructions upon which any unpipelineable instruction depends
2686 // and ensure that they are in stage 0. If unable to do so, return false.
2690 
2691  int NewLastCycle = INT_MIN;
2692  for (SUnit &SU : SSD->SUnits) {
2693  if (!SU.isInstr())
2694  continue;
2695  if (!DNP.contains(&SU) || stageScheduled(&SU) == 0) {
2696  NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
2697  continue;
2698  }
2699 
2700  // Put the non-pipelined instruction as early as possible in the schedule
2701  int NewCycle = getFirstCycle();
2702  for (auto &Dep : SU.Preds)
2703  NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle);
2704 
2705  int OldCycle = InstrToCycle[&SU];
2706  if (OldCycle != NewCycle) {
2707  InstrToCycle[&SU] = NewCycle;
2708  auto &OldS = getInstructions(OldCycle);
2709  llvm::erase_value(OldS, &SU);
2710  getInstructions(NewCycle).emplace_back(&SU);
2711  LLVM_DEBUG(dbgs() << "SU(" << SU.NodeNum
2712  << ") is not pipelined; moving from cycle " << OldCycle
2713  << " to " << NewCycle << " Instr:" << *SU.getInstr());
2714  }
2715  NewLastCycle = std::max(NewLastCycle, NewCycle);
2716  }
2717  LastCycle = NewLastCycle;
2718  return true;
2719 }
2720 
2721 // Check if the generated schedule is valid. This function checks if
2722 // an instruction that uses a physical register is scheduled in a
2723 // different stage than the definition. The pipeliner does not handle
2724 // physical register values that may cross a basic block boundary.
2725 // Furthermore, if a physical def/use pair is assigned to the same
2726 // cycle, orderDependence does not guarantee def/use ordering, so that
2727 // case should be considered invalid. (The test checks for both
2728 // earlier and same-cycle use to be more robust.)
2730  for (SUnit &SU : SSD->SUnits) {
2731  if (!SU.hasPhysRegDefs)
2732  continue;
2733  int StageDef = stageScheduled(&SU);
2734  int CycleDef = InstrToCycle[&SU];
2735  assert(StageDef != -1 && "Instruction should have been scheduled.");
2736  for (auto &SI : SU.Succs)
2737  if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode())
2738  if (Register::isPhysicalRegister(SI.getReg())) {
2739  if (stageScheduled(SI.getSUnit()) != StageDef)
2740  return false;
2741  if (InstrToCycle[SI.getSUnit()] <= CycleDef)
2742  return false;
2743  }
2744  }
2745  return true;
2746 }
2747 
2748 /// A property of the node order in swing-modulo-scheduling is
2749 /// that for nodes outside circuits the following holds:
2750 /// none of them is scheduled after both a successor and a
2751 /// predecessor.
2752 /// The method below checks whether the property is met.
2753 /// If not, debug information is printed and statistics information updated.
2754 /// Note that we do not use an assert statement.
2755 /// The reason is that although an invalid node oder may prevent
2756 /// the pipeliner from finding a pipelined schedule for arbitrary II,
2757 /// it does not lead to the generation of incorrect code.
2758 void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
2759 
2760  // a sorted vector that maps each SUnit to its index in the NodeOrder
2761  typedef std::pair<SUnit *, unsigned> UnitIndex;
2762  std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
2763 
2764  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
2765  Indices.push_back(std::make_pair(NodeOrder[i], i));
2766 
2767  auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
2768  return std::get<0>(i1) < std::get<0>(i2);
2769  };
2770 
2771  // sort, so that we can perform a binary search
2772  llvm::sort(Indices, CompareKey);
2773 
2774  bool Valid = true;
2775  (void)Valid;
2776  // for each SUnit in the NodeOrder, check whether
2777  // it appears after both a successor and a predecessor
2778  // of the SUnit. If this is the case, and the SUnit
2779  // is not part of circuit, then the NodeOrder is not
2780  // valid.
2781  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
2782  SUnit *SU = NodeOrder[i];
2783  unsigned Index = i;
2784 
2785  bool PredBefore = false;
2786  bool SuccBefore = false;
2787 
2788  SUnit *Succ;
2789  SUnit *Pred;
2790  (void)Succ;
2791  (void)Pred;
2792 
2793  for (SDep &PredEdge : SU->Preds) {
2794  SUnit *PredSU = PredEdge.getSUnit();
2795  unsigned PredIndex = std::get<1>(
2796  *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));
2797  if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
2798  PredBefore = true;
2799  Pred = PredSU;
2800  break;
2801  }
2802  }
2803 
2804  for (SDep &SuccEdge : SU->Succs) {
2805  SUnit *SuccSU = SuccEdge.getSUnit();
2806  // Do not process a boundary node, it was not included in NodeOrder,
2807  // hence not in Indices either, call to std::lower_bound() below will
2808  // return Indices.end().
2809  if (SuccSU->isBoundaryNode())
2810  continue;
2811  unsigned SuccIndex = std::get<1>(
2812  *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey));
2813  if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
2814  SuccBefore = true;
2815  Succ = SuccSU;
2816  break;
2817  }
2818  }
2819 
2820  if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
2821  // instructions in circuits are allowed to be scheduled
2822  // after both a successor and predecessor.
2823  bool InCircuit = llvm::any_of(
2824  Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
2825  if (InCircuit)
2826  LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
2827  else {
2828  Valid = false;
2829  NumNodeOrderIssues++;
2830  LLVM_DEBUG(dbgs() << "Predecessor ";);
2831  }
2832  LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
2833  << " are scheduled before node " << SU->NodeNum
2834  << "\n";);
2835  }
2836  }
2837 
2838  LLVM_DEBUG({
2839  if (!Valid)
2840  dbgs() << "Invalid node order found!\n";
2841  });
2842 }
2843 
2844 /// Attempt to fix the degenerate cases when the instruction serialization
2845 /// causes the register lifetimes to overlap. For example,
2846 /// p' = store_pi(p, b)
2847 /// = load p, offset
2848 /// In this case p and p' overlap, which means that two registers are needed.
2849 /// Instead, this function changes the load to use p' and updates the offset.
2850 void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
2851  unsigned OverlapReg = 0;
2852  unsigned NewBaseReg = 0;
2853  for (SUnit *SU : Instrs) {
2854  MachineInstr *MI = SU->getInstr();
2855  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
2856  const MachineOperand &MO = MI->getOperand(i);
2857  // Look for an instruction that uses p. The instruction occurs in the
2858  // same cycle but occurs later in the serialized order.
2859  if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
2860  // Check that the instruction appears in the InstrChanges structure,
2861  // which contains instructions that can have the offset updated.
2863  InstrChanges.find(SU);
2864  if (It != InstrChanges.end()) {
2865  unsigned BasePos, OffsetPos;
2866  // Update the base register and adjust the offset.
2867  if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
2868  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2869  NewMI->getOperand(BasePos).setReg(NewBaseReg);
2870  int64_t NewOffset =
2871  MI->getOperand(OffsetPos).getImm() - It->second.second;
2872  NewMI->getOperand(OffsetPos).setImm(NewOffset);
2873  SU->setInstr(NewMI);
2874  MISUnitMap[NewMI] = SU;
2875  NewMIs[MI] = NewMI;
2876  }
2877  }
2878  OverlapReg = 0;
2879  NewBaseReg = 0;
2880  break;
2881  }
2882  // Look for an instruction of the form p' = op(p), which uses and defines
2883  // two virtual registers that get allocated to the same physical register.
2884  unsigned TiedUseIdx = 0;
2885  if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
2886  // OverlapReg is p in the example above.
2887  OverlapReg = MI->getOperand(TiedUseIdx).getReg();
2888  // NewBaseReg is p' in the example above.
2889  NewBaseReg = MI->getOperand(i).getReg();
2890  break;
2891  }
2892  }
2893  }
2894 }
2895 
2896 /// After the schedule has been formed, call this function to combine
2897 /// the instructions from the different stages/cycles. That is, this
2898 /// function creates a schedule that represents a single iteration.
2900  // Move all instructions to the first stage from later stages.
2901  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
2902  for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
2903  ++stage) {
2904  std::deque<SUnit *> &cycleInstrs =
2905  ScheduledInstrs[cycle + (stage * InitiationInterval)];
2906  for (SUnit *SU : llvm::reverse(cycleInstrs))
2907  ScheduledInstrs[cycle].push_front(SU);
2908  }
2909  }
2910 
2911  // Erase all the elements in the later stages. Only one iteration should
2912  // remain in the scheduled list, and it contains all the instructions.
2913  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
2914  ScheduledInstrs.erase(cycle);
2915 
2916  // Change the registers in instruction as specified in the InstrChanges
2917  // map. We need to use the new registers to create the correct order.
2918  for (const SUnit &SU : SSD->SUnits)
2919  SSD->applyInstrChange(SU.getInstr(), *this);
2920 
2921  // Reorder the instructions in each cycle to fix and improve the
2922  // generated code.
2923  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
2924  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
2925  std::deque<SUnit *> newOrderPhi;
2926  for (SUnit *SU : cycleInstrs) {
2927  if (SU->getInstr()->isPHI())
2928  newOrderPhi.push_back(SU);
2929  }
2930  std::deque<SUnit *> newOrderI;
2931  for (SUnit *SU : cycleInstrs) {
2932  if (!SU->getInstr()->isPHI())
2933  orderDependence(SSD, SU, newOrderI);
2934  }
2935  // Replace the old order with the new order.
2936  cycleInstrs.swap(newOrderPhi);
2937  llvm::append_range(cycleInstrs, newOrderI);
2938  SSD->fixupRegisterOverlaps(cycleInstrs);
2939  }
2940 
2941  LLVM_DEBUG(dump(););
2942 }
2943 
2944 void NodeSet::print(raw_ostream &os) const {
2945  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
2946  << " depth " << MaxDepth << " col " << Colocate << "\n";
2947  for (const auto &I : Nodes)
2948  os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
2949  os << "\n";
2950 }
2951 
2952 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2953 /// Print the schedule information to the given output.
2955  // Iterate over each cycle.
2956  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
2957  // Iterate over each instruction in the cycle.
2958  const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
2959  for (SUnit *CI : cycleInstrs->second) {
2960  os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
2961  os << "(" << CI->NodeNum << ") ";
2962  CI->getInstr()->print(os);
2963  os << "\n";
2964  }
2965  }
2966 }
2967 
2968 /// Utility function used for debugging to print the schedule.
2971 
2972 void ResourceManager::dumpMRT() const {
2973  LLVM_DEBUG({
2974  if (UseDFA)
2975  return;
2976  std::stringstream SS;
2977  SS << "MRT:\n";
2978  SS << std::setw(4) << "Slot";
2979  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I)
2980  SS << std::setw(3) << I;
2981  SS << std::setw(7) << "#Mops"
2982  << "\n";
2983  for (int Slot = 0; Slot < InitiationInterval; ++Slot) {
2984  SS << std::setw(4) << Slot;
2985  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I)
2986  SS << std::setw(3) << MRT[Slot][I];
2987  SS << std::setw(7) << NumScheduledMops[Slot] << "\n";
2988  }
2989  dbgs() << SS.str();
2990  });
2991 }
2992 #endif
2993 
2995  const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) {
2996  unsigned ProcResourceID = 0;
2997 
2998  // We currently limit the resource kinds to 64 and below so that we can use
2999  // uint64_t for Masks
3000  assert(SM.getNumProcResourceKinds() < 64 &&
3001  "Too many kinds of resources, unsupported");
3002  // Create a unique bitmask for every processor resource unit.
3003  // Skip resource at index 0, since it always references 'InvalidUnit'.
3004  Masks.resize(SM.getNumProcResourceKinds());
3005  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3006  const MCProcResourceDesc &Desc = *SM.getProcResource(I);
3007  if (Desc.SubUnitsIdxBegin)
3008  continue;
3009  Masks[I] = 1ULL << ProcResourceID;
3010  ProcResourceID++;
3011  }
3012  // Create a unique bitmask for every processor resource group.
3013  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3014  const MCProcResourceDesc &Desc = *SM.getProcResource(I);
3015  if (!Desc.SubUnitsIdxBegin)
3016  continue;
3017  Masks[I] = 1ULL << ProcResourceID;
3018  for (unsigned U = 0; U < Desc.NumUnits; ++U)
3019  Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];
3020  ProcResourceID++;
3021  }
3022  LLVM_DEBUG({
3023  if (SwpShowResMask) {
3024  dbgs() << "ProcResourceDesc:\n";
3025  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3026  const MCProcResourceDesc *ProcResource = SM.getProcResource(I);
3027  dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3028  ProcResource->Name, I, Masks[I],
3029  ProcResource->NumUnits);
3030  }
3031  dbgs() << " -----------------\n";
3032  }
3033  });
3034 }
3035 
3037  LLVM_DEBUG({
3038  if (SwpDebugResource)
3039  dbgs() << "canReserveResources:\n";
3040  });
3041  if (UseDFA)
3042  return DFAResources[positiveModulo(Cycle, InitiationInterval)]
3043  ->canReserveResources(&SU.getInstr()->getDesc());
3044 
3045  const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU);
3046  if (!SCDesc->isValid()) {
3047  LLVM_DEBUG({
3048  dbgs() << "No valid Schedule Class Desc for schedClass!\n";
3049  dbgs() << "isPseudo:" << SU.getInstr()->isPseudo() << "\n";
3050  });
3051  return true;
3052  }
3053 
3054  reserveResources(SCDesc, Cycle);
3055  bool Result = !isOverbooked();
3056  unreserveResources(SCDesc, Cycle);
3057 
3058  LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return " << Result << "\n\n";);
3059  return Result;
3060 }
3061 
3062 void ResourceManager::reserveResources(SUnit &SU, int Cycle) {
3063  LLVM_DEBUG({
3064  if (SwpDebugResource)
3065  dbgs() << "reserveResources:\n";
3066  });
3067  if (UseDFA)
3068  return DFAResources[positiveModulo(Cycle, InitiationInterval)]
3069  ->reserveResources(&SU.getInstr()->getDesc());
3070 
3071  const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU);
3072  if (!SCDesc->isValid()) {
3073  LLVM_DEBUG({
3074  dbgs() << "No valid Schedule Class Desc for schedClass!\n";
3075  dbgs() << "isPseudo:" << SU.getInstr()->isPseudo() << "\n";
3076  });
3077  return;
3078  }
3079 
3080  reserveResources(SCDesc, Cycle);
3081 
3082  LLVM_DEBUG({
3083  if (SwpDebugResource) {
3084  dumpMRT();
3085  dbgs() << "reserveResources: done!\n\n";
3086  }
3087  });
3088 }
3089 
3090 void ResourceManager::reserveResources(const MCSchedClassDesc *SCDesc,
3091  int Cycle) {
3092  assert(!UseDFA);
3093  for (const MCWriteProcResEntry &PRE : make_range(
3094  STI->getWriteProcResBegin(SCDesc), STI->getWriteProcResEnd(SCDesc)))
3095  for (int C = Cycle; C < Cycle + PRE.Cycles; ++C)
3096  ++MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];
3097 
3098  for (int C = Cycle; C < Cycle + SCDesc->NumMicroOps; ++C)
3099  ++NumScheduledMops[positiveModulo(C, InitiationInterval)];
3100 }
3101 
3102 void ResourceManager::unreserveResources(const MCSchedClassDesc *SCDesc,
3103  int Cycle) {
3104  assert(!UseDFA);
3105  for (const MCWriteProcResEntry &PRE : make_range(
3106  STI->getWriteProcResBegin(SCDesc), STI->getWriteProcResEnd(SCDesc)))
3107  for (int C = Cycle; C < Cycle + PRE.Cycles; ++C)
3108  --MRT[positiveModulo(C, InitiationInterval)][PRE.ProcResourceIdx];
3109 
3110  for (int C = Cycle; C < Cycle + SCDesc->NumMicroOps; ++C)
3111  --NumScheduledMops[positiveModulo(C, InitiationInterval)];
3112 }
3113 
3114 bool ResourceManager::isOverbooked() const {
3115  assert(!UseDFA);
3116  for (int Slot = 0; Slot < InitiationInterval; ++Slot) {
3117  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3118  const MCProcResourceDesc *Desc = SM.getProcResource(I);
3119  if (MRT[Slot][I] > Desc->NumUnits)
3120  return true;
3121  }
3122  if (NumScheduledMops[Slot] > IssueWidth)
3123  return true;
3124  }
3125  return false;
3126 }
3127 
3128 int ResourceManager::calculateResMIIDFA() const {
3129  assert(UseDFA);
3130 
3131  // Sort the instructions by the number of available choices for scheduling,
3132  // least to most. Use the number of critical resources as the tie breaker.
3133  FuncUnitSorter FUS = FuncUnitSorter(*ST);
3134  for (SUnit &SU : DAG->SUnits)
3135  FUS.calcCriticalResources(*SU.getInstr());
3137  FuncUnitOrder(FUS);
3138 
3139  for (SUnit &SU : DAG->SUnits)
3140  FuncUnitOrder.push(SU.getInstr());
3141 
3143  Resources.push_back(
3144  std::unique_ptr<DFAPacketizer>(TII->CreateTargetScheduleState(*ST)));
3145 
3146  while (!FuncUnitOrder.empty()) {
3147  MachineInstr *MI = FuncUnitOrder.top();
3148  FuncUnitOrder.pop();
3149  if (TII->isZeroCost(MI->getOpcode()))
3150  continue;
3151 
3152  // Attempt to reserve the instruction in an existing DFA. At least one
3153  // DFA is needed for each cycle.
3154  unsigned NumCycles = DAG->getSUnit(MI)->Latency;
3155  unsigned ReservedCycles = 0;
3156  auto *RI = Resources.begin();
3157  auto *RE = Resources.end();
3158  LLVM_DEBUG({
3159  dbgs() << "Trying to reserve resource for " << NumCycles
3160  << " cycles for \n";
3161  MI->dump();
3162  });
3163  for (unsigned C = 0; C < NumCycles; ++C)
3164  while (RI != RE) {
3165  if ((*RI)->canReserveResources(*MI)) {
3166  (*RI)->reserveResources(*MI);
3167  ++ReservedCycles;
3168  break;
3169  }
3170  RI++;
3171  }
3172  LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
3173  << ", NumCycles:" << NumCycles << "\n");
3174  // Add new DFAs, if needed, to reserve resources.
3175  for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
3177  << "NewResource created to reserve resources"
3178  << "\n");
3179  auto *NewResource = TII->CreateTargetScheduleState(*ST);
3180  assert(NewResource->canReserveResources(*MI) && "Reserve error.");
3181  NewResource->reserveResources(*MI);
3182  Resources.push_back(std::unique_ptr<DFAPacketizer>(NewResource));
3183  }
3184  }
3185 
3186  int Resmii = Resources.size();
3187  LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");
3188  return Resmii;
3189 }
3190 
3192  if (UseDFA)
3193  return calculateResMIIDFA();
3194 
3195  // Count each resource consumption and divide it by the number of units.
3196  // ResMII is the max value among them.
3197 
3198  int NumMops = 0;
3199  SmallVector<uint64_t> ResourceCount(SM.getNumProcResourceKinds());
3200  for (SUnit &SU : DAG->SUnits) {
3201  if (TII->isZeroCost(SU.getInstr()->getOpcode()))
3202  continue;
3203 
3204  const MCSchedClassDesc *SCDesc = DAG->getSchedClass(&SU);
3205  if (!SCDesc->isValid())
3206  continue;
3207 
3208  LLVM_DEBUG({
3209  if (SwpDebugResource) {
3210  DAG->dumpNode(SU);
3211  dbgs() << " #Mops: " << SCDesc->NumMicroOps << "\n"
3212  << " WriteProcRes: ";
3213  }
3214  });
3215  NumMops += SCDesc->NumMicroOps;
3216  for (const MCWriteProcResEntry &PRE :
3217  make_range(STI->getWriteProcResBegin(SCDesc),
3218  STI->getWriteProcResEnd(SCDesc))) {
3219  LLVM_DEBUG({
3220  if (SwpDebugResource) {
3221  const MCProcResourceDesc *Desc =
3222  SM.getProcResource(PRE.ProcResourceIdx);
3223  dbgs() << Desc->Name << ": " << PRE.Cycles << ", ";
3224  }
3225  });
3226  ResourceCount[PRE.ProcResourceIdx] += PRE.Cycles;
3227  }
3228  LLVM_DEBUG(if (SwpDebugResource) dbgs() << "\n");
3229  }
3230 
3231  int Result = (NumMops + IssueWidth - 1) / IssueWidth;
3232  LLVM_DEBUG({
3233  if (SwpDebugResource)
3234  dbgs() << "#Mops: " << NumMops << ", "
3235  << "IssueWidth: " << IssueWidth << ", "
3236  << "Cycles: " << Result << "\n";
3237  });
3238 
3239  LLVM_DEBUG({
3240  if (SwpDebugResource) {
3241  std::stringstream SS;
3242  SS << std::setw(2) << "ID" << std::setw(16) << "Name" << std::setw(10)
3243  << "Units" << std::setw(10) << "Consumed" << std::setw(10) << "Cycles"
3244  << "\n";
3245  dbgs() << SS.str();
3246  }
3247  });
3248  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3249  const MCProcResourceDesc *Desc = SM.getProcResource(I);
3250  int Cycles = (ResourceCount[I] + Desc->NumUnits - 1) / Desc->NumUnits;
3251  LLVM_DEBUG({
3252  if (SwpDebugResource) {
3253  std::stringstream SS;
3254  SS << std::setw(2) << I << std::setw(16) << Desc->Name << std::setw(10)
3255  << Desc->NumUnits << std::setw(10) << ResourceCount[I]
3256  << std::setw(10) << Cycles << "\n";
3257  dbgs() << SS.str();
3258  }
3259  });
3260  if (Cycles > Result)
3261  Result = Cycles;
3262  }
3263  return Result;
3264 }
3265 
3267  InitiationInterval = II;
3268  DFAResources.clear();
3269  DFAResources.resize(II);
3270  for (auto &I : DFAResources)
3271  I.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST));
3272  MRT.clear();
3274  NumScheduledMops.clear();
3275  NumScheduledMops.resize(II);
3276 }
i
i
Definition: README.txt:29
llvm::SwingSchedulerDAG
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
Definition: MachinePipeliner.h:113
computeLiveOuts
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
Definition: MachinePipeliner.cpp:1552
llvm::cl::Option::getPosition
unsigned getPosition() const
Definition: CommandLine.h:305
llvm::ScheduleDAG::MRI
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:560
llvm::MCSubtargetInfo::getWriteProcResEnd
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
Definition: MCSubtargetInfo.h:171
isDependenceBarrier
static bool isDependenceBarrier(MachineInstr &MI)
Return true if the instruction causes a chain between memory references before and after it.
Definition: MachinePipeliner.cpp:729
isIntersect
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
Definition: MachinePipeliner.cpp:1765
llvm::ScheduleDAGTopologicalSort::RemovePred
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
Definition: ScheduleDAG.cpp:566
llvm::TargetInstrInfo::PipelinerLoopInfo::shouldUseSchedule
virtual bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS)
Return true if the proposed schedule should used.
Definition: TargetInstrInfo.h:737
llvm::TargetInstrInfo::getMemOperandWithOffset
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
Definition: TargetInstrInfo.cpp:1070
ScheduleDAG.h
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
MachineInstr.h
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
MathExtras.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::RegPressureTracker::addLiveRegs
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
Definition: RegisterPressure.cpp:707
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Pass::getAnalysis
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
Definition: PassAnalysisSupport.h:230
it
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in it
Definition: README-SSE.txt:81
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:105
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::set_is_subset
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
Definition: SetOperations.h:72
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:256
ExperimentalCodeGen
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
llvm::MachinePipeliner::LoopInfo::FBB
MachineBasicBlock * FBB
Definition: MachinePipeliner.h:84
llvm::SwingSchedulerDAG::getHeight
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
Definition: MachinePipeliner.h:239
llvm::SwingSchedulerDAG::finishBlock
void finishBlock() override
Clean up after the software pipeliner runs.
Definition: MachinePipeliner.cpp:671
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:209
llvm::NodeSet::print
void print(raw_ostream &os) const
Definition: MachinePipeliner.cpp:2944
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
llvm::ModuloScheduleExpander::cleanup
void cleanup()
Performs final cleanup after expansion.
Definition: ModuloSchedule.cpp:184
llvm::SDep::Artificial
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
llvm::NodeSet::dump
LLVM_DUMP_METHOD void dump() const
Definition: MachinePipeliner.cpp:2970
llvm::ModuloSchedule
Represents a schedule for a single-block loop.
Definition: ModuloSchedule.h:79
llvm::SwpForceIssueWidth
cl::opt< int > SwpForceIssueWidth
A command line argument to force pipeliner to use specified issue width.
SwpLoopLimit
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
MCInstrDesc.h
SetOperations.h
llvm::MachinePipeliner::MF
MachineFunction * MF
Definition: MachinePipeliner.h:67
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::MCSubtargetInfo::getSchedModel
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
Definition: MCSubtargetInfo.h:163
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
llvm::ScheduleDAGInstrs::buildSchedGraph
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
Definition: ScheduleDAGInstrs.cpp:720
llvm::ResourceManager::init
void init(int II)
Initialize resources with the initiation interval II.
Definition: MachinePipeliner.cpp:3266
llvm::MachineInstr::memoperands_begin
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:731
llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1727
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::MachinePipelinerID
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
Definition: MachinePipeliner.cpp:198
llvm::NodeSet::compareRecMII
int compareRecMII(NodeSet &RHS)
Definition: MachinePipeliner.h:381
llvm::MapVector::clear
void clear()
Definition: MapVector.h:89
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1182
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:266
llvm::ScheduleDAGTopologicalSort::IsReachable
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
Definition: ScheduleDAG.cpp:723
llvm::MCInstrDesc::getSchedClass
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:618
CycleAnalysis.h
llvm::MachineMemOperand::getOffset
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
Definition: MachineMemOperand.h:226
getLoopPhiReg
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
Definition: MachinePipeliner.cpp:698
llvm::SMSchedule::normalizeNonPipelinedInstructions
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Definition: MachinePipeliner.cpp:2687
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
EnableSWP
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:627
MapVector.h
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:407
llvm::SDep::Anti
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
llvm::SwingSchedulerDAG::schedule
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
Definition: MachinePipeliner.cpp:492
llvm::MachineRegisterInfo::defusechain_instr_iterator
defusechain_iterator - This class provides iterator support for machine operands in the function that...
Definition: MachineRegisterInfo.h:277
ValueTracking.h
OptimizationRemarkEmitter.h
llvm::MCProcResourceDesc::NumUnits
unsigned NumUnits
Definition: MCSchedule.h:33
llvm::IntervalPressure
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
Definition: RegisterPressure.h:67
MachineBasicBlock.h
llvm::SetVector::iterator
typename vector_type::const_iterator iterator
Definition: SetVector.h:48
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:125
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::DenseMapBase::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition: MachineOperand.h:664
llvm::MachineBasicBlock::findDebugLoc
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions.
Definition: MachineBasicBlock.cpp:1387
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237
EnableSWPOptSize
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
llvm::SDep::Kind
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:52
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:320
llvm::DenseMapIterator
Definition: DenseMap.h:57
llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition: RegisterPressure.h:357
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:513
llvm::TargetInstrInfo::getIncrementValue
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
Definition: TargetInstrInfo.h:1395
DenseMap.h
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::RegisterMaskPair
Definition: RegisterPressure.h:38
llvm::SwingSchedulerDAG::isLoopCarriedDep
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true)
Return true for an order or output dependence that is loop carried potentially.
Definition: MachinePipeliner.cpp:2235
llvm::TargetInstrInfo::CreateTargetScheduleState
virtual DFAPacketizer * CreateTargetScheduleState(const TargetSubtargetInfo &) const
Create machine specific model for scheduling.
Definition: TargetInstrInfo.h:1788
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
multipleIterations
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
Definition: MachinePipeliner.cpp:2409
llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
ignoreDependence
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
Definition: MachinePipeliner.cpp:1381
i1
Decimal Convert From to National Zoned Signed int_ppc_altivec_bcdcfno i1
Definition: README_P9.txt:147
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1336
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::SwingSchedulerDAG::getZeroLatencyHeight
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
Definition: MachinePipeliner.h:243
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::MCWriteProcResEntry
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63
llvm::NodeSet::empty
bool empty() const
Definition: MachinePipeliner.h:369
llvm::SMSchedule::insert
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
Definition: MachinePipeliner.cpp:2321
llvm::PeelingModuloScheduleExpander
A reimplementation of ModuloScheduleExpander.
Definition: ModuloSchedule.h:282
llvm::MachinePipeliner::LoopInfo::BrCond
SmallVector< MachineOperand, 4 > BrCond
Definition: MachinePipeliner.h:85
llvm::ModuloScheduleExpander::expand
void expand()
Performs the actual expansion.
Definition: ModuloSchedule.cpp:67
computePath
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
Definition: MachinePipeliner.cpp:1523
llvm::MachineMemOperand::getAAInfo
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Definition: MachineMemOperand.h:257
SwpMaxMii
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:140
EmitTestAnnotations
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:201
llvm::InstrItineraryData::endStage
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
Definition: MCInstrItineraries.h:141
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MCSchedClassDesc::isValid
bool isValid() const
Definition: MCSchedule.h:127
RegisterPressure.h
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:265
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:89
llvm::MachineInstr::hasOrderedMemoryRef
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
Definition: MachineInstr.cpp:1363
MachineRegisterInfo.h
llvm::SwpEnableCopyToPhi
cl::opt< bool > SwpEnableCopyToPhi
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:585
llvm::MCSchedModel::getNumProcResourceKinds
unsigned getNumProcResourceKinds() const
Definition: MCSchedule.h:335
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1298
AliasAnalysis.h
llvm::SDep::isArtificial
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
llvm::MachinePipeliner::LI
LoopInfo LI
Definition: MachinePipeliner.h:91
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::TargetInstrInfo::isZeroCost
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form.
Definition: TargetInstrInfo.h:1605
llvm::MachineOptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: MachineOptimizationRemarkEmitter.h:57
llvm::TargetInstrInfo::PipelinerLoopInfo
Object returned by analyzeLoopForPipelining.
Definition: TargetInstrInfo.h:726
CommandLine.h
llvm::SUnit::isBoundaryNode
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:344
llvm::X86AS::SS
@ SS
Definition: X86.h:201
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:666
MCInstrItineraries.h
llvm::SUnit::removePred
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
Definition: ScheduleDAG.cpp:175
llvm::NodeOrder
@ NodeOrder
Definition: SIMachineScheduler.h:37
llvm::cl::apply
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1287
MachineLoopInfo.h
llvm::SMSchedule::dump
void dump() const
Utility function used for debugging to print the schedule.
Definition: MachinePipeliner.cpp:2969
llvm::AAResults
Definition: AliasAnalysis.h:518
llvm::SwingSchedulerDAG::getDistance
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
Definition: MachinePipeliner.h:260
llvm::SwingSchedulerDAG::isBackedge
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
Definition: MachinePipeliner.h:250
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
llvm::ScheduleDAGInstrs::UnknownValue
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
Definition: ScheduleDAGInstrs.h:237
llvm::MachineMemOperand::getValue
const Value * getValue() const
Return the base address of the memory access.
Definition: MachineMemOperand.h:210
llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:369
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::MCSchedClassDesc::NumMicroOps
uint16_t NumMicroOps
Definition: MCSchedule.h:116
llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
llvm::MachineOperand::setSubReg
void setSubReg(unsigned subReg)
Definition: MachineOperand.h:480
llvm::SwingSchedulerDAG::getMOV
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
Definition: MachinePipeliner.h:227
llvm::MCSchedClassDesc
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:109
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::LaneBitmask::getNone
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:83
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::SMSchedule::getFinalCycle
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Definition: MachinePipeliner.h:585
DFAPacketizer.h
llvm::MachineFunction::deleteMachineInstr
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
Definition: MachineFunction.cpp:418
llvm::SMSchedule::getFirstCycle
int getFirstCycle() const
Return the first cycle in the completed schedule.
Definition: MachinePipeliner.h:582
llvm::ScheduleDAGInstrs::getSUnit
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
Definition: ScheduleDAGInstrs.h:391
false
Definition: StackSlotColoring.cpp:141
TargetOpcodes.h
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::SUnit::NumPreds
unsigned NumPreds
Definition: ScheduleDAG.h:266
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
SDAG
QP Compare Ordered outs ins xscmpudp No SDAG
Definition: README_P9.txt:301
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:187
llvm::MachineInstr::isPseudo
bool isPseudo(QueryType Type=IgnoreBundle) const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
Definition: MachineInstr.h:852
llvm::SUnit::setInstr
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:366
llvm::ScheduleDAGTopologicalSort::AddPred
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
Definition: ScheduleDAG.cpp:548
llvm::GenericCycle
A possibly irreducible generalization of a Loop.
Definition: GenericCycleInfo.h:48
llvm::SMSchedule::setInitiationInterval
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
Definition: MachinePipeliner.h:572
llvm::Instruction
Definition: Instruction.h:42
llvm::SMSchedule::isValidSchedule
bool isValidSchedule(SwingSchedulerDAG *SSD)
Definition: MachinePipeliner.cpp:2729
SwpPruneDeps
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
llvm::MachineOptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: MachineOptimizationRemarkEmitter.h:109
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:625
llvm::SMSchedule::orderDependence
void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts)
Order the instructions within a cycle so that the definitions occur before the uses.
Definition: MachinePipeliner.cpp:2481
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1708
BitVector.h
llvm::SDep::Output
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
llvm::SlotIndexes
SlotIndexes pass.
Definition: SlotIndexes.h:319
llvm::SDep::Data
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
llvm::SMSchedule::reset
void reset()
Definition: MachinePipeliner.h:563
llvm::RegPressureDelta::Excess
PressureChange Excess
Definition: RegisterPressure.h:239
SmallPtrSet.h
llvm::BitVector
Definition: BitVector.h:75
llvm::pdb::PDB_ColorItem::Path
@ Path
llvm::NodeSet
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
Definition: MachinePipeliner.h:321
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:396
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::NodeSet::setColocate
void setColocate(unsigned c)
Definition: MachinePipeliner.h:375
llvm::erase_value
void erase_value(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:1810
llvm::SMSchedule::isLoopCarried
bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi)
Return true if the scheduled Phi has a loop carried operand.
Definition: MachinePipeliner.cpp:2609
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:271
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
DEBUG_TYPE
#define DEBUG_TYPE
Definition: MachinePipeliner.cpp:99
llvm::MachinePipeliner
The main class in the implementation of the target independent software pipeliner pass.
Definition: MachinePipeliner.h:65
llvm::SlotIndexes::insertMachineInstrInMaps
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:540
llvm::SDep::Order
@ Order
Any other ordering dependency.
Definition: ScheduleDAG.h:56
llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
llvm::TargetInstrInfo::analyzeLoopForPipelining
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
Definition: TargetInstrInfo.h:774
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:647
llvm::TargetInstrInfo::getBaseAndOffsetPosition
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
Definition: TargetInstrInfo.h:1367
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1538
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1292
llvm::SmallPtrSetImplBase::empty
bool empty() const
Definition: SmallPtrSet.h:92
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::SMSchedule::computeStart
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
Definition: MachinePipeliner.cpp:2420
llvm::cl::opt< bool >
llvm::SUnit::isPred
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
Definition: ScheduleDAG.h:431
llvm::getRegState
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
Definition: MachineInstrBuilder.h:566
llvm::MachineLoop
Definition: MachineLoopInfo.h:44
llvm::SetVector::contains
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:209
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:502
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:165
llvm::getUnderlyingObjects
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Definition: ValueTracking.cpp:4532
llvm::MachinePipeliner::LoopInfo::LoopPipelinerInfo
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopPipelinerInfo
Definition: MachinePipeliner.h:88
llvm::MCProcResourceDesc
Define a kind of processor resource that will be modeled by the scheduler.
Definition: MCSchedule.h:31
llvm::MachinePipeliner::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachinePipeliner.cpp:462
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:416
llvm::MapVector::find
iterator find(const KeyT &Key)
Definition: MapVector.h:148
llvm::MachineOptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
Definition: MachineOptimizationRemarkEmitter.cpp:48
Index
uint32_t Index
Definition: ELFObjHandler.cpp:82
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::ModuloScheduleTestAnnotater::annotate
void annotate()
Performs the annotation.
Definition: ModuloSchedule.cpp:2217
uint64_t
LiveIntervals.h
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::SMSchedule::getInitiationInterval
int getInitiationInterval() const
Return the initiation interval for this schedule.
Definition: MachinePipeliner.h:578
llvm::MachineOptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: MachineOptimizationRemarkEmitter.h:83
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:408
llvm::MachineFunction::CloneMachineInstr
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Definition: MachineFunction.cpp:383
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::MachineRegisterInfo::use_instr_begin
use_instr_iterator use_instr_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:485
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
s
multiplies can be turned into SHL s
Definition: README.txt:370
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::NodeSet::getRecMII
int getRecMII()
Definition: MachinePipeliner.h:383
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:54
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:714
llvm::codeview::FrameCookieKind::Copy
@ Copy
llvm::PressureChange::isValid
bool isValid() const
Definition: RegisterPressure.h:112
llvm::MachinePipeliner::ORE
MachineOptimizationRemarkEmitter * ORE
Definition: MachinePipeliner.h:68
I
#define I(x, y, z)
Definition: MD5.cpp:58
PriorityQueue.h
llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
llvm::SwingSchedulerDAG::getALAP
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Definition: MachinePipeliner.h:223
llvm::NodeSet::size
unsigned size() const
Definition: MachinePipeliner.h:367
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:439
MCRegisterInfo.h
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:166
llvm::PressureChange::getUnitInc
int getUnitInc() const
Definition: RegisterPressure.h:124
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:403
Pipelining
Modulo Software Pipelining
Definition: MachinePipeliner.cpp:207
ArrayRef.h
SwpForceII
static cl::opt< int > SwpForceII("pipeliner-force-ii", cl::desc("Force pipeliner to use specified II."), cl::Hidden, cl::init(-1))
A command line argument to force pipeliner to use specified initial interval.
SwpIgnoreRecMII
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))
MachineFunctionPass.h
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::pdb::PDB_MemoryType::Stack
@ Stack
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineRegisterInfo::isAllocatable
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
Definition: MachineRegisterInfo.h:948
llvm::TargetRegisterInfo::getRegPressureSetName
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
llvm::MachinePipeliner::LoopInfo::LoopCompare
MachineInstr * LoopCompare
Definition: MachinePipeliner.h:87
llvm::RegisterPressure::MaxSetPressure
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
Definition: RegisterPressure.h:49
getPhiRegs
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
Definition: MachinePipeliner.cpp:682
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:275
llvm::MachinePipeliner::LoopInfo::LoopInductionVar
MachineInstr * LoopInductionVar
Definition: MachinePipeliner.h:86
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::SMSchedule::isLoopCarriedDefOfUse
bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO)
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
Definition: MachinePipeliner.cpp:2637
isSuccOrder
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
Definition: MachinePipeliner.cpp:706
llvm::NodeSet::setRecMII
void setRecMII(unsigned mii)
Definition: MachinePipeliner.h:373
iterator_range.h
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1300
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::empty
bool empty() const
Definition: DenseMap.h:98
llvm::PressureChange::getPSet
unsigned getPSet() const
Definition: RegisterPressure.h:114
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:291
llvm::SMSchedule::getInstructions
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
Definition: MachinePipeliner.h:632
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::MachineInstr::dump
void dump() const
Definition: MachineInstr.cpp:1518
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1571
llvm::AArch64::RM
@ RM
Definition: AArch64ISelLowering.h:478
ScheduleDAGMutation.h
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:238
llvm::ScheduleDAG::TRI
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:558
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:95
llvm::SwingSchedulerDAG::getZeroLatencyDepth
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
Definition: MachinePipeliner.h:234
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:98
llvm::MCSubtargetInfo::getWriteProcResBegin
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
Definition: MCSubtargetInfo.h:167
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::ModuloScheduleTestAnnotater
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
Definition: ModuloSchedule.h:378
SwpPruneLoopCarried
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:561
llvm::SDep::getSUnit
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::MachinePipeliner::RegClassInfo
RegisterClassInfo RegClassInfo
Definition: MachinePipeliner.h:73
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1597
llvm::ResourceManager::calculateResMII
int calculateResMII() const
Definition: MachinePipeliner.cpp:3191
llvm::MachinePipeliner::ID
static char ID
Definition: MachinePipeliner.h:93
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
llvm::ScheduleDAG::MF
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:559
ModuloSchedule.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::PeelingModuloScheduleExpander::expand
void expand()
Definition: ModuloSchedule.cpp:2018
llvm::MachineOptimizationRemarkEmitterPass
The analysis pass.
Definition: MachineOptimizationRemarkEmitter.h:219
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
Compiler.h
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1818
llvm::SDep::Barrier
@ Barrier
An unknown scheduling barrier.
Definition: ScheduleDAG.h:69
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MCProcResourceDesc::SubUnitsIdxBegin
const unsigned * SubUnitsIdxBegin
Definition: MCSchedule.h:53
llvm::MachineInstr::print
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
Definition: MachineInstr.cpp:1555
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:374
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::MCProcResourceDesc::Name
const char * Name
Definition: MCSchedule.h:32
llvm::ScheduleDAGInstrs::dump
void dump() const override
Definition: ScheduleDAGInstrs.cpp:1166
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
llvm::Pass::dump
void dump() const
Definition: Pass.cpp:135
llvm::ScheduleDAGInstrs::getSchedClass
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
Definition: ScheduleDAGInstrs.h:266
llvm::NodeSet::count
unsigned count(SUnit *SU) const
Definition: MachinePipeliner.h:363
llvm::SMSchedule::finalizeSchedule
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Definition: MachinePipeliner.cpp:2899
SwpShowResMask
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:60
llvm::ScheduleDAGInstrs::MISUnitMap
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
Definition: ScheduleDAGInstrs.h:159
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition: MachineOperand.h:364
SwpDebugResource
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
llvm::MapVector::end
iterator end()
Definition: MapVector.h:72
llvm::InstrStage
These values represent a non-pipelined step in the execution of an instruction.
Definition: MCInstrItineraries.h:58
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::clear
void clear()
Completely clear the SetVector.
Definition: SetVector.h:220
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
Attributes.h
llvm::RegPressureDelta
Store the effects of a change in pressure on things that MI scheduler cares about.
Definition: RegisterPressure.h:238
llvm::MachineInstr::isRegSequence
bool isRegSequence() const
Definition: MachineInstr.h:1328
j
return j(j<< 16)
llvm::ScheduleDAG::SUnits
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:561
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1752
llvm::isIdentifiedObject
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Definition: AliasAnalysis.cpp:954
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
llvm::TargetInstrInfo::isPostIncrement
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
Definition: TargetInstrInfo.h:1437
llvm::ResourceManager::initProcResourceVectors
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
Definition: MachinePipeliner.cpp:2994
llvm::SwingSchedulerDAG::getDepth
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
Definition: MachinePipeliner.h:230
swapAntiDependences
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
Definition: MachinePipeliner.cpp:1149
SwpMaxStages
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
llvm::Successor
@ Successor
Definition: SIMachineScheduler.h:35
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
llvm::NodeSet::clear
void clear()
Definition: MachinePipeliner.h:397
llvm::MachineInstr::mayStore
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:1069
llvm::TargetInstrInfo::analyzeBranch
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
Definition: TargetInstrInfo.h:636
llvm::MachinePipeliner::TII
const TargetInstrInfo * TII
Definition: MachinePipeliner.h:72
llvm::SDep::setLatency
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
llvm::MCSchedModel::getProcResource
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition: MCSchedule.h:339
llvm::MachineInstr::hasUnmodeledSideEffects
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
Definition: MachineInstr.cpp:1437
llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:557
llvm::SMSchedule::computeUnpipelineableNodes
SmallSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
Definition: MachinePipeliner.cpp:2660
llvm::pdb::PDB_LocType::Slot
@ Slot
llvm::MachineLoop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: MachineLoopInfo.cpp:101
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::ScheduleDAGTopologicalSort::InitDAGTopologicalSorting
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
Definition: ScheduleDAG.cpp:438
llvm::SUnit::addPred
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
Definition: ScheduleDAG.cpp:107
llvm::TargetInstrInfo::areMemAccessesTriviallyDisjoint
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const
Sometimes, it is possible for the target to tell, even without aliasing information,...
Definition: TargetInstrInfo.h:1804
llvm::MCSchedModel
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:244
llvm::SUnit::hasPhysRegDefs
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
llvm::LiveIntervals
Definition: LiveIntervals.h:53
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:597
pred_L
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
Definition: MachinePipeliner.cpp:1467
llvm::SMSchedule::print
void print(raw_ostream &os) const
Print the schedule information to the given output.
Definition: MachinePipeliner.cpp:2954
llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:178
llvm::SwingSchedulerDAG::applyInstrChange
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
Definition: MachinePipeliner.cpp:2180
llvm::SDep::getKind
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
AA
llvm::ScheduleDAGInstrs::BB
MachineBasicBlock * BB
The block in which to insert instructions.
Definition: ScheduleDAGInstrs.h:146
llvm::MachinePipeliner::disabledByPragma
bool disabledByPragma
Definition: MachinePipeliner.h:74
llvm::SMSchedule::earliestCycleInChain
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
Definition: MachinePipeliner.cpp:2361
llvm::Cycle
CycleInfo::CycleT Cycle
Definition: CycleAnalysis.h:28
llvm::MCRegUnitIterator
Definition: MCRegisterInfo.h:680
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:222
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:215
llvm::MachinePipeliner::NumTries
static int NumTries
Definition: MachinePipeliner.h:78
llvm::ScheduleDAGInstrs::dumpNode
void dumpNode(const SUnit &SU) const override
Definition: ScheduleDAGInstrs.cpp:1158
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:305
llvm::rdf::NodeSet
std::set< NodeId > NodeSet
Definition: RDFGraph.h:514
MachineInstrBuilder.h
llvm::ScheduleDAGInstrs::finishBlock
virtual void finishBlock()
Cleans up after scheduling in the given block.
Definition: ScheduleDAGInstrs.cpp:178
succ_L
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
Definition: MachinePipeliner.cpp:1496
llvm::SUnit::isInstr
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
Definition: ScheduleDAG.h:362
llvm::MemoryLocation::getAfter
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Definition: MemoryLocation.h:270
llvm::SmallVectorImpl::iterator
typename SuperClass::iterator iterator
Definition: SmallVector.h:564
N
#define N
llvm::MCRegisterInfo::DiffListIterator::isValid
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
Definition: MCRegisterInfo.h:224
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:106
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1308
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:519
llvm::MachineLoop::getTopBlock
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
Definition: MachineLoopInfo.cpp:61
llvm::ResourceManager::canReserveResources
bool canReserveResources(SUnit &SU, int Cycle)
Check if the resources occupied by a machine instruction are available in the current state.
Definition: MachinePipeliner.cpp:3036
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:660
llvm::MachinePipeliner::LoopInfo::TBB
MachineBasicBlock * TBB
Definition: MachinePipeliner.h:83
llvm::TargetInstrInfo::PipelinerLoopInfo::shouldIgnoreForPipelining
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.
LaneBitmask.h
llvm::MCSchedModel::getSchedClassDesc
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition: MCSchedule.h:346
llvm::NodeSet::getNode
SUnit * getNode(unsigned i) const
Definition: MachinePipeliner.h:371
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
MachineMemOperand.h
llvm::SmallVectorImpl< const Value * >
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:365
MachineOperand.h
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::NodeSet::insert
bool insert(SUnit *SU)
Definition: MachinePipeliner.h:355
pred
hexagon gen pred
Definition: HexagonGenPredicate.cpp:134
llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::MachinePipeliner::II_setByPragma
unsigned II_setByPragma
Definition: MachinePipeliner.h:75
llvm::ModuloScheduleExpander
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
Definition: ModuloSchedule.h:160
llvm::MCInstrInfo::get
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
MachinePipeliner.h
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::InstrItineraryData::beginStage
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
Definition: MCInstrItineraries.h:135
llvm::SMSchedule::getMaxStageCount
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
Definition: MachinePipeliner.h:627
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::MCSchedModel::hasInstrSchedModel
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition: MCSchedule.h:320
llvm::cl::desc
Definition: CommandLine.h:412
llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:120
raw_ostream.h
llvm::SMSchedule::latestCycleInChain
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
Definition: MachinePipeliner.cpp:2384
llvm::SMSchedule::cycleScheduled
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
Definition: MachinePipeliner.h:620
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:51
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
MachineFunction.h
llvm::MachineInstr::mayRaiseFPException
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
Definition: MachineInstr.h:1089
llvm::MDString
A single uniqued string.
Definition: Metadata.h:612
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::InstrItineraryData::isEmpty
bool isEmpty() const
Returns true if there are no itineraries.
Definition: MCInstrItineraries.h:126
llvm::SwingSchedulerDAG::fixupRegisterOverlaps
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
Definition: MachinePipeliner.cpp:2850
llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition: MCSubtargetInfo.h:76
llvm::ResourceManager
Definition: MachinePipeliner.h:444
llvm::MachineInstr::operands
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:641
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::MachineRegisterInfo::use_instr_end
static use_instr_iterator use_instr_end()
Definition: MachineRegisterInfo.h:488
llvm::SwingSchedulerDAG::getASAP
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
Definition: MachinePipeliner.h:220
TargetRegisterInfo.h
Debug.h
llvm::InstrItineraryData
Itinerary data supplied by a subtarget to be used by a target.
Definition: MCInstrItineraries.h:109
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:307
llvm::SDep::getLatency
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
llvm::SMSchedule::stageScheduled
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
Definition: MachinePipeliner.h:611
llvm::SmallSet::contains
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition: SmallSet.h:235
SetVector.h
llvm::SMSchedule
This class represents the scheduled code.
Definition: MachinePipeliner.h:532
llvm::MachineOperand::isIdenticalTo
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
Definition: MachineOperand.cpp:287
MachineDominators.h
llvm::SwingSchedulerDAG::getInstrBaseReg
unsigned getInstrBaseReg(SUnit *SU)
Return the new base register that was stored away for the changed instruction.
Definition: MachinePipeliner.h:274
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365