LLVM  15.0.0git
MachinePipeliner.cpp
Go to the documentation of this file.
1 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10 //
11 // This SMS implementation is a target-independent back-end pass. When enabled,
12 // the pass runs just prior to the register allocation pass, while the machine
13 // IR is in SSA form. If software pipelining is successful, then the original
14 // loop is replaced by the optimized loop. The optimized loop contains one or
15 // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
16 // the instructions cannot be scheduled in a given MII, we increase the MII by
17 // one and try again.
18 //
19 // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
20 // represent loop carried dependences in the DAG as order edges to the Phi
21 // nodes. We also perform several passes over the DAG to eliminate unnecessary
22 // edges that inhibit the ability to pipeline. The implementation uses the
23 // DFAPacketizer class to compute the minimum initiation interval and the check
24 // where an instruction may be inserted in the pipelined schedule.
25 //
26 // In order for the SMS pass to work, several target specific hooks need to be
27 // implemented to get information about the loop structure and to rewrite
28 // instructions.
29 //
30 //===----------------------------------------------------------------------===//
31 
33 #include "llvm/ADT/ArrayRef.h"
34 #include "llvm/ADT/BitVector.h"
35 #include "llvm/ADT/DenseMap.h"
36 #include "llvm/ADT/MapVector.h"
37 #include "llvm/ADT/PriorityQueue.h"
38 #include "llvm/ADT/SetOperations.h"
39 #include "llvm/ADT/SetVector.h"
40 #include "llvm/ADT/SmallPtrSet.h"
41 #include "llvm/ADT/SmallSet.h"
42 #include "llvm/ADT/SmallVector.h"
43 #include "llvm/ADT/Statistic.h"
68 #include "llvm/Config/llvm-config.h"
69 #include "llvm/IR/Attributes.h"
70 #include "llvm/IR/Function.h"
71 #include "llvm/MC/LaneBitmask.h"
72 #include "llvm/MC/MCInstrDesc.h"
74 #include "llvm/MC/MCRegisterInfo.h"
75 #include "llvm/Pass.h"
77 #include "llvm/Support/Compiler.h"
78 #include "llvm/Support/Debug.h"
81 #include <algorithm>
82 #include <cassert>
83 #include <climits>
84 #include <cstdint>
85 #include <deque>
86 #include <functional>
87 #include <iterator>
88 #include <map>
89 #include <memory>
90 #include <tuple>
91 #include <utility>
92 #include <vector>
93 
94 using namespace llvm;
95 
96 #define DEBUG_TYPE "pipeliner"
97 
98 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
99 STATISTIC(NumPipelined, "Number of loops software pipelined");
100 STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
101 STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
102 STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
103 STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
104 STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
105 STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
106 STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
107 STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
108 STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
109 
110 /// A command line option to turn software pipelining on or off.
111 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
112  cl::desc("Enable Software Pipelining"));
113 
114 /// A command line option to enable SWP at -Os.
115 static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
116  cl::desc("Enable SWP at Os."), cl::Hidden,
117  cl::init(false));
118 
119 /// A command line argument to limit minimum initial interval for pipelining.
120 static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
121  cl::desc("Size limit for the MII."),
122  cl::Hidden, cl::init(27));
123 
124 /// A command line argument to limit the number of stages in the pipeline.
125 static cl::opt<int>
126  SwpMaxStages("pipeliner-max-stages",
127  cl::desc("Maximum stages allowed in the generated scheduled."),
128  cl::Hidden, cl::init(3));
129 
130 /// A command line option to disable the pruning of chain dependences due to
131 /// an unrelated Phi.
132 static cl::opt<bool>
133  SwpPruneDeps("pipeliner-prune-deps",
134  cl::desc("Prune dependences between unrelated Phi nodes."),
135  cl::Hidden, cl::init(true));
136 
137 /// A command line option to disable the pruning of loop carried order
138 /// dependences.
139 static cl::opt<bool>
140  SwpPruneLoopCarried("pipeliner-prune-loop-carried",
141  cl::desc("Prune loop carried order dependences."),
142  cl::Hidden, cl::init(true));
143 
144 #ifndef NDEBUG
145 static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
146 #endif
147 
148 static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
150  cl::desc("Ignore RecMII"));
151 
152 static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
153  cl::init(false));
154 static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
155  cl::init(false));
156 
158  "pipeliner-annotate-for-testing", cl::Hidden, cl::init(false),
159  cl::desc("Instead of emitting the pipelined code, annotate instructions "
160  "with the generated schedule for feeding into the "
161  "-modulo-schedule-test pass"));
162 
164  "pipeliner-experimental-cg", cl::Hidden, cl::init(false),
165  cl::desc(
166  "Use the experimental peeling code generator for software pipelining"));
167 
168 namespace llvm {
169 
170 // A command line option to enable the CopyToPhi DAG mutation.
171 cl::opt<bool> SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
172  cl::init(true),
173  cl::desc("Enable CopyToPhi DAG Mutation"));
174 
175 } // end namespace llvm
176 
177 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
178 char MachinePipeliner::ID = 0;
179 #ifndef NDEBUG
181 #endif
183 
185  "Modulo Software Pipelining", false, false)
191  "Modulo Software Pipelining", false, false)
192 
193 /// The "main" function for implementing Swing Modulo Scheduling.
194 bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
195  if (skipFunction(mf.getFunction()))
196  return false;
197 
198  if (!EnableSWP)
199  return false;
200 
201  if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&
203  return false;
204 
205  if (!mf.getSubtarget().enableMachinePipeliner())
206  return false;
207 
208  // Cannot pipeline loops without instruction itineraries if we are using
209  // DFA for the pipeliner.
210  if (mf.getSubtarget().useDFAforSMS() &&
211  (!mf.getSubtarget().getInstrItineraryData() ||
212  mf.getSubtarget().getInstrItineraryData()->isEmpty()))
213  return false;
214 
215  MF = &mf;
216  MLI = &getAnalysis<MachineLoopInfo>();
217  MDT = &getAnalysis<MachineDominatorTree>();
218  ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
219  TII = MF->getSubtarget().getInstrInfo();
220  RegClassInfo.runOnMachineFunction(*MF);
221 
222  for (auto &L : *MLI)
223  scheduleLoop(*L);
224 
225  return false;
226 }
227 
228 /// Attempt to perform the SMS algorithm on the specified loop. This function is
229 /// the main entry point for the algorithm. The function identifies candidate
230 /// loops, calculates the minimum initiation interval, and attempts to schedule
231 /// the loop.
232 bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
233  bool Changed = false;
234  for (auto &InnerLoop : L)
235  Changed |= scheduleLoop(*InnerLoop);
236 
237 #ifndef NDEBUG
238  // Stop trying after reaching the limit (if any).
239  int Limit = SwpLoopLimit;
240  if (Limit >= 0) {
241  if (NumTries >= SwpLoopLimit)
242  return Changed;
243  NumTries++;
244  }
245 #endif
246 
247  setPragmaPipelineOptions(L);
248  if (!canPipelineLoop(L)) {
249  LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
250  ORE->emit([&]() {
251  return MachineOptimizationRemarkMissed(DEBUG_TYPE, "canPipelineLoop",
252  L.getStartLoc(), L.getHeader())
253  << "Failed to pipeline loop";
254  });
255 
256  LI.LoopPipelinerInfo.reset();
257  return Changed;
258  }
259 
260  ++NumTrytoPipeline;
261 
262  Changed = swingModuloScheduler(L);
263 
264  LI.LoopPipelinerInfo.reset();
265  return Changed;
266 }
267 
268 void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
269  // Reset the pragma for the next loop in iteration.
270  disabledByPragma = false;
271  II_setByPragma = 0;
272 
273  MachineBasicBlock *LBLK = L.getTopBlock();
274 
275  if (LBLK == nullptr)
276  return;
277 
278  const BasicBlock *BBLK = LBLK->getBasicBlock();
279  if (BBLK == nullptr)
280  return;
281 
282  const Instruction *TI = BBLK->getTerminator();
283  if (TI == nullptr)
284  return;
285 
286  MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);
287  if (LoopID == nullptr)
288  return;
289 
290  assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");
291  assert(LoopID->getOperand(0) == LoopID && "invalid loop");
292 
293  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
294  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
295 
296  if (MD == nullptr)
297  continue;
298 
299  MDString *S = dyn_cast<MDString>(MD->getOperand(0));
300 
301  if (S == nullptr)
302  continue;
303 
304  if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
305  assert(MD->getNumOperands() == 2 &&
306  "Pipeline initiation interval hint metadata should have two operands.");
308  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
309  assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
310  } else if (S->getString() == "llvm.loop.pipeline.disable") {
311  disabledByPragma = true;
312  }
313  }
314 }
315 
316 /// Return true if the loop can be software pipelined. The algorithm is
317 /// restricted to loops with a single basic block. Make sure that the
318 /// branch in the loop can be analyzed.
319 bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
320  if (L.getNumBlocks() != 1) {
321  ORE->emit([&]() {
322  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
323  L.getStartLoc(), L.getHeader())
324  << "Not a single basic block: "
325  << ore::NV("NumBlocks", L.getNumBlocks());
326  });
327  return false;
328  }
329 
330  if (disabledByPragma) {
331  ORE->emit([&]() {
332  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
333  L.getStartLoc(), L.getHeader())
334  << "Disabled by Pragma.";
335  });
336  return false;
337  }
338 
339  // Check if the branch can't be understood because we can't do pipelining
340  // if that's the case.
341  LI.TBB = nullptr;
342  LI.FBB = nullptr;
343  LI.BrCond.clear();
344  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
345  LLVM_DEBUG(dbgs() << "Unable to analyzeBranch, can NOT pipeline Loop\n");
346  NumFailBranch++;
347  ORE->emit([&]() {
348  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
349  L.getStartLoc(), L.getHeader())
350  << "The branch can't be understood";
351  });
352  return false;
353  }
354 
355  LI.LoopInductionVar = nullptr;
356  LI.LoopCompare = nullptr;
358  if (!LI.LoopPipelinerInfo) {
359  LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");
360  NumFailLoop++;
361  ORE->emit([&]() {
362  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
363  L.getStartLoc(), L.getHeader())
364  << "The loop structure is not supported";
365  });
366  return false;
367  }
368 
369  if (!L.getLoopPreheader()) {
370  LLVM_DEBUG(dbgs() << "Preheader not found, can NOT pipeline Loop\n");
371  NumFailPreheader++;
372  ORE->emit([&]() {
373  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
374  L.getStartLoc(), L.getHeader())
375  << "No loop preheader found";
376  });
377  return false;
378  }
379 
380  // Remove any subregisters from inputs to phi nodes.
381  preprocessPhiNodes(*L.getHeader());
382  return true;
383 }
384 
385 void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
387  SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
388 
389  for (MachineInstr &PI : B.phis()) {
390  MachineOperand &DefOp = PI.getOperand(0);
391  assert(DefOp.getSubReg() == 0);
392  auto *RC = MRI.getRegClass(DefOp.getReg());
393 
394  for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
395  MachineOperand &RegOp = PI.getOperand(i);
396  if (RegOp.getSubReg() == 0)
397  continue;
398 
399  // If the operand uses a subregister, replace it with a new register
400  // without subregisters, and generate a copy to the new register.
401  Register NewReg = MRI.createVirtualRegister(RC);
402  MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
404  const DebugLoc &DL = PredB.findDebugLoc(At);
405  auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
406  .addReg(RegOp.getReg(), getRegState(RegOp),
407  RegOp.getSubReg());
408  Slots.insertMachineInstrInMaps(*Copy);
409  RegOp.setReg(NewReg);
410  RegOp.setSubReg(0);
411  }
412  }
413 }
414 
415 /// The SMS algorithm consists of the following main steps:
416 /// 1. Computation and analysis of the dependence graph.
417 /// 2. Ordering of the nodes (instructions).
418 /// 3. Attempt to Schedule the loop.
419 bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
420  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
421 
422  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
424 
426  // The kernel should not include any terminator instructions. These
427  // will be added back later.
428  SMS.startBlock(MBB);
429 
430  // Compute the number of 'real' instructions in the basic block by
431  // ignoring terminators.
432  unsigned size = MBB->size();
434  E = MBB->instr_end();
435  I != E; ++I, --size)
436  ;
437 
438  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
439  SMS.schedule();
440  SMS.exitRegion();
441 
442  SMS.finishBlock();
443  return SMS.hasNewSchedule();
444 }
445 
454 }
455 
456 void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
457  if (II_setByPragma > 0)
458  MII = II_setByPragma;
459  else
460  MII = std::max(ResMII, RecMII);
461 }
462 
463 void SwingSchedulerDAG::setMAX_II() {
464  if (II_setByPragma > 0)
465  MAX_II = II_setByPragma;
466  else
467  MAX_II = MII + 10;
468 }
469 
470 /// We override the schedule function in ScheduleDAGInstrs to implement the
471 /// scheduling part of the Swing Modulo Scheduling algorithm.
473  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
475  addLoopCarriedDependences(AA);
476  updatePhiDependences();
478  changeDependences();
479  postprocessDAG();
480  LLVM_DEBUG(dump());
481 
482  NodeSetType NodeSets;
483  findCircuits(NodeSets);
484  NodeSetType Circuits = NodeSets;
485 
486  // Calculate the MII.
487  unsigned ResMII = calculateResMII();
488  unsigned RecMII = calculateRecMII(NodeSets);
489 
490  fuseRecs(NodeSets);
491 
492  // This flag is used for testing and can cause correctness problems.
493  if (SwpIgnoreRecMII)
494  RecMII = 0;
495 
496  setMII(ResMII, RecMII);
497  setMAX_II();
498 
499  LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
500  << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
501 
502  // Can't schedule a loop without a valid MII.
503  if (MII == 0) {
504  LLVM_DEBUG(dbgs() << "Invalid Minimal Initiation Interval: 0\n");
505  NumFailZeroMII++;
506  Pass.ORE->emit([&]() {
508  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
509  << "Invalid Minimal Initiation Interval: 0";
510  });
511  return;
512  }
513 
514  // Don't pipeline large loops.
515  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {
516  LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
517  << ", we don't pipeline large loops\n");
518  NumFailLargeMaxMII++;
519  Pass.ORE->emit([&]() {
521  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
522  << "Minimal Initiation Interval too large: "
523  << ore::NV("MII", (int)MII) << " > "
524  << ore::NV("SwpMaxMii", SwpMaxMii) << "."
525  << "Refer to -pipeliner-max-mii.";
526  });
527  return;
528  }
529 
530  computeNodeFunctions(NodeSets);
531 
532  registerPressureFilter(NodeSets);
533 
534  colocateNodeSets(NodeSets);
535 
536  checkNodeSets(NodeSets);
537 
538  LLVM_DEBUG({
539  for (auto &I : NodeSets) {
540  dbgs() << " Rec NodeSet ";
541  I.dump();
542  }
543  });
544 
545  llvm::stable_sort(NodeSets, std::greater<NodeSet>());
546 
547  groupRemainingNodes(NodeSets);
548 
549  removeDuplicateNodes(NodeSets);
550 
551  LLVM_DEBUG({
552  for (auto &I : NodeSets) {
553  dbgs() << " NodeSet ";
554  I.dump();
555  }
556  });
557 
558  computeNodeOrder(NodeSets);
559 
560  // check for node order issues
561  checkValidNodeOrder(Circuits);
562 
563  SMSchedule Schedule(Pass.MF);
564  Scheduled = schedulePipeline(Schedule);
565 
566  if (!Scheduled){
567  LLVM_DEBUG(dbgs() << "No schedule found, return\n");
568  NumFailNoSchedule++;
569  Pass.ORE->emit([&]() {
571  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
572  << "Unable to find schedule";
573  });
574  return;
575  }
576 
577  unsigned numStages = Schedule.getMaxStageCount();
578  // No need to generate pipeline if there are no overlapped iterations.
579  if (numStages == 0) {
580  LLVM_DEBUG(dbgs() << "No overlapped iterations, skip.\n");
581  NumFailZeroStage++;
582  Pass.ORE->emit([&]() {
584  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
585  << "No need to pipeline - no overlapped iterations in schedule.";
586  });
587  return;
588  }
589  // Check that the maximum stage count is less than user-defined limit.
590  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) {
591  LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
592  << " : too many stages, abort\n");
593  NumFailLargeMaxStage++;
594  Pass.ORE->emit([&]() {
596  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
597  << "Too many stages in schedule: "
598  << ore::NV("numStages", (int)numStages) << " > "
599  << ore::NV("SwpMaxStages", SwpMaxStages)
600  << ". Refer to -pipeliner-max-stages.";
601  });
602  return;
603  }
604 
605  Pass.ORE->emit([&]() {
607  Loop.getHeader())
608  << "Pipelined succesfully!";
609  });
610 
611  // Generate the schedule as a ModuloSchedule.
612  DenseMap<MachineInstr *, int> Cycles, Stages;
613  std::vector<MachineInstr *> OrderedInsts;
614  for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle();
615  ++Cycle) {
616  for (SUnit *SU : Schedule.getInstructions(Cycle)) {
617  OrderedInsts.push_back(SU->getInstr());
618  Cycles[SU->getInstr()] = Cycle;
619  Stages[SU->getInstr()] = Schedule.stageScheduled(SU);
620  }
621  }
623  for (auto &KV : NewMIs) {
624  Cycles[KV.first] = Cycles[KV.second];
625  Stages[KV.first] = Stages[KV.second];
626  NewInstrChanges[KV.first] = InstrChanges[getSUnit(KV.first)];
627  }
628 
629  ModuloSchedule MS(MF, &Loop, std::move(OrderedInsts), std::move(Cycles),
630  std::move(Stages));
631  if (EmitTestAnnotations) {
632  assert(NewInstrChanges.empty() &&
633  "Cannot serialize a schedule with InstrChanges!");
635  MSTI.annotate();
636  return;
637  }
638  // The experimental code generator can't work if there are InstChanges.
639  if (ExperimentalCodeGen && NewInstrChanges.empty()) {
640  PeelingModuloScheduleExpander MSE(MF, MS, &LIS);
641  MSE.expand();
642  } else {
643  ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
644  MSE.expand();
645  MSE.cleanup();
646  }
647  ++NumPipelined;
648 }
649 
650 /// Clean up after the software pipeliner runs.
652  for (auto &KV : NewMIs)
653  MF.deleteMachineInstr(KV.second);
654  NewMIs.clear();
655 
656  // Call the superclass.
658 }
659 
660 /// Return the register values for the operands of a Phi instruction.
661 /// This function assume the instruction is a Phi.
663  unsigned &InitVal, unsigned &LoopVal) {
664  assert(Phi.isPHI() && "Expecting a Phi.");
665 
666  InitVal = 0;
667  LoopVal = 0;
668  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
669  if (Phi.getOperand(i + 1).getMBB() != Loop)
670  InitVal = Phi.getOperand(i).getReg();
671  else
672  LoopVal = Phi.getOperand(i).getReg();
673 
674  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
675 }
676 
677 /// Return the Phi register value that comes the loop block.
678 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
679  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
680  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
681  return Phi.getOperand(i).getReg();
682  return 0;
683 }
684 
685 /// Return true if SUb can be reached from SUa following the chain edges.
686 static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
687  SmallPtrSet<SUnit *, 8> Visited;
688  SmallVector<SUnit *, 8> Worklist;
689  Worklist.push_back(SUa);
690  while (!Worklist.empty()) {
691  const SUnit *SU = Worklist.pop_back_val();
692  for (auto &SI : SU->Succs) {
693  SUnit *SuccSU = SI.getSUnit();
694  if (SI.getKind() == SDep::Order) {
695  if (Visited.count(SuccSU))
696  continue;
697  if (SuccSU == SUb)
698  return true;
699  Worklist.push_back(SuccSU);
700  Visited.insert(SuccSU);
701  }
702  }
703  }
704  return false;
705 }
706 
707 /// Return true if the instruction causes a chain between memory
708 /// references before and after it.
710  return MI.isCall() || MI.mayRaiseFPException() ||
711  MI.hasUnmodeledSideEffects() ||
712  (MI.hasOrderedMemoryRef() &&
713  (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
714 }
715 
716 /// Return the underlying objects for the memory references of an instruction.
717 /// This function calls the code in ValueTracking, but first checks that the
718 /// instruction has a memory operand.
721  if (!MI->hasOneMemOperand())
722  return;
723  MachineMemOperand *MM = *MI->memoperands_begin();
724  if (!MM->getValue())
725  return;
726  getUnderlyingObjects(MM->getValue(), Objs);
727  for (const Value *V : Objs) {
728  if (!isIdentifiedObject(V)) {
729  Objs.clear();
730  return;
731  }
732  Objs.push_back(V);
733  }
734 }
735 
736 /// Add a chain edge between a load and store if the store can be an
737 /// alias of the load on a subsequent iteration, i.e., a loop carried
738 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
739 /// but that code doesn't create loop carried dependences.
740 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
744  for (auto &SU : SUnits) {
745  MachineInstr &MI = *SU.getInstr();
746  if (isDependenceBarrier(MI, AA))
747  PendingLoads.clear();
748  else if (MI.mayLoad()) {
750  ::getUnderlyingObjects(&MI, Objs);
751  if (Objs.empty())
752  Objs.push_back(UnknownValue);
753  for (auto V : Objs) {
754  SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
755  SUs.push_back(&SU);
756  }
757  } else if (MI.mayStore()) {
759  ::getUnderlyingObjects(&MI, Objs);
760  if (Objs.empty())
761  Objs.push_back(UnknownValue);
762  for (auto V : Objs) {
764  PendingLoads.find(V);
765  if (I == PendingLoads.end())
766  continue;
767  for (auto Load : I->second) {
768  if (isSuccOrder(Load, &SU))
769  continue;
770  MachineInstr &LdMI = *Load->getInstr();
771  // First, perform the cheaper check that compares the base register.
772  // If they are the same and the load offset is less than the store
773  // offset, then mark the dependence as loop carried potentially.
774  const MachineOperand *BaseOp1, *BaseOp2;
775  int64_t Offset1, Offset2;
776  bool Offset1IsScalable, Offset2IsScalable;
777  if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1,
778  Offset1IsScalable, TRI) &&
779  TII->getMemOperandWithOffset(MI, BaseOp2, Offset2,
780  Offset2IsScalable, TRI)) {
781  if (BaseOp1->isIdenticalTo(*BaseOp2) &&
782  Offset1IsScalable == Offset2IsScalable &&
783  (int)Offset1 < (int)Offset2) {
785  "What happened to the chain edge?");
786  SDep Dep(Load, SDep::Barrier);
787  Dep.setLatency(1);
788  SU.addPred(Dep);
789  continue;
790  }
791  }
792  // Second, the more expensive check that uses alias analysis on the
793  // base registers. If they alias, and the load offset is less than
794  // the store offset, the mark the dependence as loop carried.
795  if (!AA) {
796  SDep Dep(Load, SDep::Barrier);
797  Dep.setLatency(1);
798  SU.addPred(Dep);
799  continue;
800  }
801  MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
802  MachineMemOperand *MMO2 = *MI.memoperands_begin();
803  if (!MMO1->getValue() || !MMO2->getValue()) {
804  SDep Dep(Load, SDep::Barrier);
805  Dep.setLatency(1);
806  SU.addPred(Dep);
807  continue;
808  }
809  if (MMO1->getValue() == MMO2->getValue() &&
810  MMO1->getOffset() <= MMO2->getOffset()) {
811  SDep Dep(Load, SDep::Barrier);
812  Dep.setLatency(1);
813  SU.addPred(Dep);
814  continue;
815  }
816  if (!AA->isNoAlias(
817  MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
819  MMO2->getAAInfo()))) {
820  SDep Dep(Load, SDep::Barrier);
821  Dep.setLatency(1);
822  SU.addPred(Dep);
823  }
824  }
825  }
826  }
827  }
828 }
829 
830 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
831 /// processes dependences for PHIs. This function adds true dependences
832 /// from a PHI to a use, and a loop carried dependence from the use to the
833 /// PHI. The loop carried dependence is represented as an anti dependence
834 /// edge. This function also removes chain dependences between unrelated
835 /// PHIs.
836 void SwingSchedulerDAG::updatePhiDependences() {
837  SmallVector<SDep, 4> RemoveDeps;
839 
840  // Iterate over each DAG node.
841  for (SUnit &I : SUnits) {
842  RemoveDeps.clear();
843  // Set to true if the instruction has an operand defined by a Phi.
844  unsigned HasPhiUse = 0;
845  unsigned HasPhiDef = 0;
846  MachineInstr *MI = I.getInstr();
847  // Iterate over each operand, and we process the definitions.
848  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
849  MOE = MI->operands_end();
850  MOI != MOE; ++MOI) {
851  if (!MOI->isReg())
852  continue;
853  Register Reg = MOI->getReg();
854  if (MOI->isDef()) {
855  // If the register is used by a Phi, then create an anti dependence.
857  UI = MRI.use_instr_begin(Reg),
858  UE = MRI.use_instr_end();
859  UI != UE; ++UI) {
860  MachineInstr *UseMI = &*UI;
861  SUnit *SU = getSUnit(UseMI);
862  if (SU != nullptr && UseMI->isPHI()) {
863  if (!MI->isPHI()) {
864  SDep Dep(SU, SDep::Anti, Reg);
865  Dep.setLatency(1);
866  I.addPred(Dep);
867  } else {
868  HasPhiDef = Reg;
869  // Add a chain edge to a dependent Phi that isn't an existing
870  // predecessor.
871  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
872  I.addPred(SDep(SU, SDep::Barrier));
873  }
874  }
875  }
876  } else if (MOI->isUse()) {
877  // If the register is defined by a Phi, then create a true dependence.
879  if (DefMI == nullptr)
880  continue;
881  SUnit *SU = getSUnit(DefMI);
882  if (SU != nullptr && DefMI->isPHI()) {
883  if (!MI->isPHI()) {
884  SDep Dep(SU, SDep::Data, Reg);
885  Dep.setLatency(0);
886  ST.adjustSchedDependency(SU, 0, &I, MI->getOperandNo(MOI), Dep);
887  I.addPred(Dep);
888  } else {
889  HasPhiUse = Reg;
890  // Add a chain edge to a dependent Phi that isn't an existing
891  // predecessor.
892  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
893  I.addPred(SDep(SU, SDep::Barrier));
894  }
895  }
896  }
897  }
898  // Remove order dependences from an unrelated Phi.
899  if (!SwpPruneDeps)
900  continue;
901  for (auto &PI : I.Preds) {
902  MachineInstr *PMI = PI.getSUnit()->getInstr();
903  if (PMI->isPHI() && PI.getKind() == SDep::Order) {
904  if (I.getInstr()->isPHI()) {
905  if (PMI->getOperand(0).getReg() == HasPhiUse)
906  continue;
907  if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
908  continue;
909  }
910  RemoveDeps.push_back(PI);
911  }
912  }
913  for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
914  I.removePred(RemoveDeps[i]);
915  }
916 }
917 
918 /// Iterate over each DAG node and see if we can change any dependences
919 /// in order to reduce the recurrence MII.
920 void SwingSchedulerDAG::changeDependences() {
921  // See if an instruction can use a value from the previous iteration.
922  // If so, we update the base and offset of the instruction and change
923  // the dependences.
924  for (SUnit &I : SUnits) {
925  unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
926  int64_t NewOffset = 0;
927  if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
928  NewOffset))
929  continue;
930 
931  // Get the MI and SUnit for the instruction that defines the original base.
932  Register OrigBase = I.getInstr()->getOperand(BasePos).getReg();
933  MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
934  if (!DefMI)
935  continue;
936  SUnit *DefSU = getSUnit(DefMI);
937  if (!DefSU)
938  continue;
939  // Get the MI and SUnit for the instruction that defins the new base.
940  MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
941  if (!LastMI)
942  continue;
943  SUnit *LastSU = getSUnit(LastMI);
944  if (!LastSU)
945  continue;
946 
947  if (Topo.IsReachable(&I, LastSU))
948  continue;
949 
950  // Remove the dependence. The value now depends on a prior iteration.
952  for (const SDep &P : I.Preds)
953  if (P.getSUnit() == DefSU)
954  Deps.push_back(P);
955  for (int i = 0, e = Deps.size(); i != e; i++) {
956  Topo.RemovePred(&I, Deps[i].getSUnit());
957  I.removePred(Deps[i]);
958  }
959  // Remove the chain dependence between the instructions.
960  Deps.clear();
961  for (auto &P : LastSU->Preds)
962  if (P.getSUnit() == &I && P.getKind() == SDep::Order)
963  Deps.push_back(P);
964  for (int i = 0, e = Deps.size(); i != e; i++) {
965  Topo.RemovePred(LastSU, Deps[i].getSUnit());
966  LastSU->removePred(Deps[i]);
967  }
968 
969  // Add a dependence between the new instruction and the instruction
970  // that defines the new base.
971  SDep Dep(&I, SDep::Anti, NewBase);
972  Topo.AddPred(LastSU, &I);
973  LastSU->addPred(Dep);
974 
975  // Remember the base and offset information so that we can update the
976  // instruction during code generation.
977  InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
978  }
979 }
980 
981 namespace {
982 
983 // FuncUnitSorter - Comparison operator used to sort instructions by
984 // the number of functional unit choices.
985 struct FuncUnitSorter {
986  const InstrItineraryData *InstrItins;
987  const MCSubtargetInfo *STI;
989 
990  FuncUnitSorter(const TargetSubtargetInfo &TSI)
991  : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
992 
993  // Compute the number of functional unit alternatives needed
994  // at each stage, and take the minimum value. We prioritize the
995  // instructions by the least number of choices first.
996  unsigned minFuncUnits(const MachineInstr *Inst,
997  InstrStage::FuncUnits &F) const {
998  unsigned SchedClass = Inst->getDesc().getSchedClass();
999  unsigned min = UINT_MAX;
1000  if (InstrItins && !InstrItins->isEmpty()) {
1001  for (const InstrStage &IS :
1002  make_range(InstrItins->beginStage(SchedClass),
1003  InstrItins->endStage(SchedClass))) {
1004  InstrStage::FuncUnits funcUnits = IS.getUnits();
1005  unsigned numAlternatives = countPopulation(funcUnits);
1006  if (numAlternatives < min) {
1007  min = numAlternatives;
1008  F = funcUnits;
1009  }
1010  }
1011  return min;
1012  }
1013  if (STI && STI->getSchedModel().hasInstrSchedModel()) {
1014  const MCSchedClassDesc *SCDesc =
1015  STI->getSchedModel().getSchedClassDesc(SchedClass);
1016  if (!SCDesc->isValid())
1017  // No valid Schedule Class Desc for schedClass, should be
1018  // Pseudo/PostRAPseudo
1019  return min;
1020 
1021  for (const MCWriteProcResEntry &PRE :
1022  make_range(STI->getWriteProcResBegin(SCDesc),
1023  STI->getWriteProcResEnd(SCDesc))) {
1024  if (!PRE.Cycles)
1025  continue;
1026  const MCProcResourceDesc *ProcResource =
1027  STI->getSchedModel().getProcResource(PRE.ProcResourceIdx);
1028  unsigned NumUnits = ProcResource->NumUnits;
1029  if (NumUnits < min) {
1030  min = NumUnits;
1031  F = PRE.ProcResourceIdx;
1032  }
1033  }
1034  return min;
1035  }
1036  llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
1037  }
1038 
1039  // Compute the critical resources needed by the instruction. This
1040  // function records the functional units needed by instructions that
1041  // must use only one functional unit. We use this as a tie breaker
1042  // for computing the resource MII. The instrutions that require
1043  // the same, highly used, functional unit have high priority.
1044  void calcCriticalResources(MachineInstr &MI) {
1045  unsigned SchedClass = MI.getDesc().getSchedClass();
1046  if (InstrItins && !InstrItins->isEmpty()) {
1047  for (const InstrStage &IS :
1048  make_range(InstrItins->beginStage(SchedClass),
1049  InstrItins->endStage(SchedClass))) {
1050  InstrStage::FuncUnits FuncUnits = IS.getUnits();
1051  if (countPopulation(FuncUnits) == 1)
1052  Resources[FuncUnits]++;
1053  }
1054  return;
1055  }
1056  if (STI && STI->getSchedModel().hasInstrSchedModel()) {
1057  const MCSchedClassDesc *SCDesc =
1058  STI->getSchedModel().getSchedClassDesc(SchedClass);
1059  if (!SCDesc->isValid())
1060  // No valid Schedule Class Desc for schedClass, should be
1061  // Pseudo/PostRAPseudo
1062  return;
1063 
1064  for (const MCWriteProcResEntry &PRE :
1065  make_range(STI->getWriteProcResBegin(SCDesc),
1066  STI->getWriteProcResEnd(SCDesc))) {
1067  if (!PRE.Cycles)
1068  continue;
1069  Resources[PRE.ProcResourceIdx]++;
1070  }
1071  return;
1072  }
1073  llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
1074  }
1075 
1076  /// Return true if IS1 has less priority than IS2.
1077  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
1078  InstrStage::FuncUnits F1 = 0, F2 = 0;
1079  unsigned MFUs1 = minFuncUnits(IS1, F1);
1080  unsigned MFUs2 = minFuncUnits(IS2, F2);
1081  if (MFUs1 == MFUs2)
1082  return Resources.lookup(F1) < Resources.lookup(F2);
1083  return MFUs1 > MFUs2;
1084  }
1085 };
1086 
1087 } // end anonymous namespace
1088 
1089 /// Calculate the resource constrained minimum initiation interval for the
1090 /// specified loop. We use the DFA to model the resources needed for
1091 /// each instruction, and we ignore dependences. A different DFA is created
1092 /// for each cycle that is required. When adding a new instruction, we attempt
1093 /// to add it to each existing DFA, until a legal space is found. If the
1094 /// instruction cannot be reserved in an existing DFA, we create a new one.
1095 unsigned SwingSchedulerDAG::calculateResMII() {
1096 
1097  LLVM_DEBUG(dbgs() << "calculateResMII:\n");
1100  Resources.push_back(new ResourceManager(&MF.getSubtarget()));
1101 
1102  // Sort the instructions by the number of available choices for scheduling,
1103  // least to most. Use the number of critical resources as the tie breaker.
1104  FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget());
1105  for (MachineInstr &MI :
1107  FUS.calcCriticalResources(MI);
1109  FuncUnitOrder(FUS);
1110 
1111  for (MachineInstr &MI :
1113  FuncUnitOrder.push(&MI);
1114 
1115  while (!FuncUnitOrder.empty()) {
1116  MachineInstr *MI = FuncUnitOrder.top();
1117  FuncUnitOrder.pop();
1118  if (TII->isZeroCost(MI->getOpcode()))
1119  continue;
1120  // Attempt to reserve the instruction in an existing DFA. At least one
1121  // DFA is needed for each cycle.
1122  unsigned NumCycles = getSUnit(MI)->Latency;
1123  unsigned ReservedCycles = 0;
1124  SmallVectorImpl<ResourceManager *>::iterator RI = Resources.begin();
1125  SmallVectorImpl<ResourceManager *>::iterator RE = Resources.end();
1126  LLVM_DEBUG({
1127  dbgs() << "Trying to reserve resource for " << NumCycles
1128  << " cycles for \n";
1129  MI->dump();
1130  });
1131  for (unsigned C = 0; C < NumCycles; ++C)
1132  while (RI != RE) {
1133  if ((*RI)->canReserveResources(*MI)) {
1134  (*RI)->reserveResources(*MI);
1135  ++ReservedCycles;
1136  break;
1137  }
1138  RI++;
1139  }
1140  LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
1141  << ", NumCycles:" << NumCycles << "\n");
1142  // Add new DFAs, if needed, to reserve resources.
1143  for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
1145  << "NewResource created to reserve resources"
1146  << "\n");
1147  ResourceManager *NewResource = new ResourceManager(&MF.getSubtarget());
1148  assert(NewResource->canReserveResources(*MI) && "Reserve error.");
1149  NewResource->reserveResources(*MI);
1150  Resources.push_back(NewResource);
1151  }
1152  }
1153  int Resmii = Resources.size();
1154  LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");
1155  // Delete the memory for each of the DFAs that were created earlier.
1156  for (ResourceManager *RI : Resources) {
1157  ResourceManager *D = RI;
1158  delete D;
1159  }
1160  Resources.clear();
1161  return Resmii;
1162 }
1163 
1164 /// Calculate the recurrence-constrainted minimum initiation interval.
1165 /// Iterate over each circuit. Compute the delay(c) and distance(c)
1166 /// for each circuit. The II needs to satisfy the inequality
1167 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
1168 /// II that satisfies the inequality, and the RecMII is the maximum
1169 /// of those values.
1170 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1171  unsigned RecMII = 0;
1172 
1173  for (NodeSet &Nodes : NodeSets) {
1174  if (Nodes.empty())
1175  continue;
1176 
1177  unsigned Delay = Nodes.getLatency();
1178  unsigned Distance = 1;
1179 
1180  // ii = ceil(delay / distance)
1181  unsigned CurMII = (Delay + Distance - 1) / Distance;
1182  Nodes.setRecMII(CurMII);
1183  if (CurMII > RecMII)
1184  RecMII = CurMII;
1185  }
1186 
1187  return RecMII;
1188 }
1189 
1190 /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1191 /// but we do this to find the circuits, and then change them back.
1192 static void swapAntiDependences(std::vector<SUnit> &SUnits) {
1194  for (SUnit &SU : SUnits) {
1195  for (SDep &Pred : SU.Preds)
1196  if (Pred.getKind() == SDep::Anti)
1197  DepsAdded.push_back(std::make_pair(&SU, Pred));
1198  }
1199  for (std::pair<SUnit *, SDep> &P : DepsAdded) {
1200  // Remove this anti dependency and add one in the reverse direction.
1201  SUnit *SU = P.first;
1202  SDep &D = P.second;
1203  SUnit *TargetSU = D.getSUnit();
1204  unsigned Reg = D.getReg();
1205  unsigned Lat = D.getLatency();
1206  SU->removePred(D);
1207  SDep Dep(SU, SDep::Anti, Reg);
1208  Dep.setLatency(Lat);
1209  TargetSU->addPred(Dep);
1210  }
1211 }
1212 
1213 /// Create the adjacency structure of the nodes in the graph.
1214 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1215  SwingSchedulerDAG *DAG) {
1216  BitVector Added(SUnits.size());
1217  DenseMap<int, int> OutputDeps;
1218  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1219  Added.reset();
1220  // Add any successor to the adjacency matrix and exclude duplicates.
1221  for (auto &SI : SUnits[i].Succs) {
1222  // Only create a back-edge on the first and last nodes of a dependence
1223  // chain. This records any chains and adds them later.
1224  if (SI.getKind() == SDep::Output) {
1225  int N = SI.getSUnit()->NodeNum;
1226  int BackEdge = i;
1227  auto Dep = OutputDeps.find(BackEdge);
1228  if (Dep != OutputDeps.end()) {
1229  BackEdge = Dep->second;
1230  OutputDeps.erase(Dep);
1231  }
1232  OutputDeps[N] = BackEdge;
1233  }
1234  // Do not process a boundary node, an artificial node.
1235  // A back-edge is processed only if it goes to a Phi.
1236  if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||
1237  (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
1238  continue;
1239  int N = SI.getSUnit()->NodeNum;
1240  if (!Added.test(N)) {
1241  AdjK[i].push_back(N);
1242  Added.set(N);
1243  }
1244  }
1245  // A chain edge between a store and a load is treated as a back-edge in the
1246  // adjacency matrix.
1247  for (auto &PI : SUnits[i].Preds) {
1248  if (!SUnits[i].getInstr()->mayStore() ||
1249  !DAG->isLoopCarriedDep(&SUnits[i], PI, false))
1250  continue;
1251  if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1252  int N = PI.getSUnit()->NodeNum;
1253  if (!Added.test(N)) {
1254  AdjK[i].push_back(N);
1255  Added.set(N);
1256  }
1257  }
1258  }
1259  }
1260  // Add back-edges in the adjacency matrix for the output dependences.
1261  for (auto &OD : OutputDeps)
1262  if (!Added.test(OD.second)) {
1263  AdjK[OD.first].push_back(OD.second);
1264  Added.set(OD.second);
1265  }
1266 }
1267 
1268 /// Identify an elementary circuit in the dependence graph starting at the
1269 /// specified node.
1270 bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1271  bool HasBackedge) {
1272  SUnit *SV = &SUnits[V];
1273  bool F = false;
1274  Stack.insert(SV);
1275  Blocked.set(V);
1276 
1277  for (auto W : AdjK[V]) {
1278  if (NumPaths > MaxPaths)
1279  break;
1280  if (W < S)
1281  continue;
1282  if (W == S) {
1283  if (!HasBackedge)
1284  NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
1285  F = true;
1286  ++NumPaths;
1287  break;
1288  } else if (!Blocked.test(W)) {
1289  if (circuit(W, S, NodeSets,
1290  Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
1291  F = true;
1292  }
1293  }
1294 
1295  if (F)
1296  unblock(V);
1297  else {
1298  for (auto W : AdjK[V]) {
1299  if (W < S)
1300  continue;
1301  B[W].insert(SV);
1302  }
1303  }
1304  Stack.pop_back();
1305  return F;
1306 }
1307 
1308 /// Unblock a node in the circuit finding algorithm.
1309 void SwingSchedulerDAG::Circuits::unblock(int U) {
1310  Blocked.reset(U);
1311  SmallPtrSet<SUnit *, 4> &BU = B[U];
1312  while (!BU.empty()) {
1314  assert(SI != BU.end() && "Invalid B set.");
1315  SUnit *W = *SI;
1316  BU.erase(W);
1317  if (Blocked.test(W->NodeNum))
1318  unblock(W->NodeNum);
1319  }
1320 }
1321 
1322 /// Identify all the elementary circuits in the dependence graph using
1323 /// Johnson's circuit algorithm.
1324 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1325  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1326  // but we do this to find the circuits, and then change them back.
1328 
1329  Circuits Cir(SUnits, Topo);
1330  // Create the adjacency structure.
1331  Cir.createAdjacencyStructure(this);
1332  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1333  Cir.reset();
1334  Cir.circuit(i, i, NodeSets);
1335  }
1336 
1337  // Change the dependences back so that we've created a DAG again.
1339 }
1340 
1341 // Create artificial dependencies between the source of COPY/REG_SEQUENCE that
1342 // is loop-carried to the USE in next iteration. This will help pipeliner avoid
1343 // additional copies that are needed across iterations. An artificial dependence
1344 // edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
1345 
1346 // PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
1347 // SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
1348 // PHI-------True-Dep------> USEOfPhi
1349 
1350 // The mutation creates
1351 // USEOfPHI -------Artificial-Dep---> SRCOfCopy
1352 
1353 // This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
1354 // (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
1355 // late to avoid additional copies across iterations. The possible scheduling
1356 // order would be
1357 // USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
1358 
1360  for (SUnit &SU : DAG->SUnits) {
1361  // Find the COPY/REG_SEQUENCE instruction.
1362  if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
1363  continue;
1364 
1365  // Record the loop carried PHIs.
1366  SmallVector<SUnit *, 4> PHISUs;
1367  // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
1368  SmallVector<SUnit *, 4> SrcSUs;
1369 
1370  for (auto &Dep : SU.Preds) {
1371  SUnit *TmpSU = Dep.getSUnit();
1372  MachineInstr *TmpMI = TmpSU->getInstr();
1373  SDep::Kind DepKind = Dep.getKind();
1374  // Save the loop carried PHI.
1375  if (DepKind == SDep::Anti && TmpMI->isPHI())
1376  PHISUs.push_back(TmpSU);
1377  // Save the source of COPY/REG_SEQUENCE.
1378  // If the source has no pre-decessors, we will end up creating cycles.
1379  else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)
1380  SrcSUs.push_back(TmpSU);
1381  }
1382 
1383  if (PHISUs.size() == 0 || SrcSUs.size() == 0)
1384  continue;
1385 
1386  // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
1387  // SUnit to the container.
1388  SmallVector<SUnit *, 8> UseSUs;
1389  // Do not use iterator based loop here as we are updating the container.
1390  for (size_t Index = 0; Index < PHISUs.size(); ++Index) {
1391  for (auto &Dep : PHISUs[Index]->Succs) {
1392  if (Dep.getKind() != SDep::Data)
1393  continue;
1394 
1395  SUnit *TmpSU = Dep.getSUnit();
1396  MachineInstr *TmpMI = TmpSU->getInstr();
1397  if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
1398  PHISUs.push_back(TmpSU);
1399  continue;
1400  }
1401  UseSUs.push_back(TmpSU);
1402  }
1403  }
1404 
1405  if (UseSUs.size() == 0)
1406  continue;
1407 
1408  SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
1409  // Add the artificial dependencies if it does not form a cycle.
1410  for (auto I : UseSUs) {
1411  for (auto Src : SrcSUs) {
1412  if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
1413  Src->addPred(SDep(I, SDep::Artificial));
1414  SDAG->Topo.AddPred(Src, I);
1415  }
1416  }
1417  }
1418  }
1419 }
1420 
1421 /// Return true for DAG nodes that we ignore when computing the cost functions.
1422 /// We ignore the back-edge recurrence in order to avoid unbounded recursion
1423 /// in the calculation of the ASAP, ALAP, etc functions.
1424 static bool ignoreDependence(const SDep &D, bool isPred) {
1425  if (D.isArtificial() || D.getSUnit()->isBoundaryNode())
1426  return true;
1427  return D.getKind() == SDep::Anti && isPred;
1428 }
1429 
1430 /// Compute several functions need to order the nodes for scheduling.
1431 /// ASAP - Earliest time to schedule a node.
1432 /// ALAP - Latest time to schedule a node.
1433 /// MOV - Mobility function, difference between ALAP and ASAP.
1434 /// D - Depth of each node.
1435 /// H - Height of each node.
1436 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1437  ScheduleInfo.resize(SUnits.size());
1438 
1439  LLVM_DEBUG({
1440  for (int I : Topo) {
1441  const SUnit &SU = SUnits[I];
1442  dumpNode(SU);
1443  }
1444  });
1445 
1446  int maxASAP = 0;
1447  // Compute ASAP and ZeroLatencyDepth.
1448  for (int I : Topo) {
1449  int asap = 0;
1450  int zeroLatencyDepth = 0;
1451  SUnit *SU = &SUnits[I];
1452  for (const SDep &P : SU->Preds) {
1453  SUnit *pred = P.getSUnit();
1454  if (P.getLatency() == 0)
1455  zeroLatencyDepth =
1456  std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
1457  if (ignoreDependence(P, true))
1458  continue;
1459  asap = std::max(asap, (int)(getASAP(pred) + P.getLatency() -
1460  getDistance(pred, SU, P) * MII));
1461  }
1462  maxASAP = std::max(maxASAP, asap);
1463  ScheduleInfo[I].ASAP = asap;
1464  ScheduleInfo[I].ZeroLatencyDepth = zeroLatencyDepth;
1465  }
1466 
1467  // Compute ALAP, ZeroLatencyHeight, and MOV.
1468  for (int I : llvm::reverse(Topo)) {
1469  int alap = maxASAP;
1470  int zeroLatencyHeight = 0;
1471  SUnit *SU = &SUnits[I];
1472  for (const SDep &S : SU->Succs) {
1473  SUnit *succ = S.getSUnit();
1474  if (succ->isBoundaryNode())
1475  continue;
1476  if (S.getLatency() == 0)
1477  zeroLatencyHeight =
1478  std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
1479  if (ignoreDependence(S, true))
1480  continue;
1481  alap = std::min(alap, (int)(getALAP(succ) - S.getLatency() +
1482  getDistance(SU, succ, S) * MII));
1483  }
1484 
1485  ScheduleInfo[I].ALAP = alap;
1486  ScheduleInfo[I].ZeroLatencyHeight = zeroLatencyHeight;
1487  }
1488 
1489  // After computing the node functions, compute the summary for each node set.
1490  for (NodeSet &I : NodeSets)
1491  I.computeNodeSetInfo(this);
1492 
1493  LLVM_DEBUG({
1494  for (unsigned i = 0; i < SUnits.size(); i++) {
1495  dbgs() << "\tNode " << i << ":\n";
1496  dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
1497  dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
1498  dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
1499  dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
1500  dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
1501  dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
1502  dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
1503  }
1504  });
1505 }
1506 
1507 /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1508 /// as the predecessors of the elements of NodeOrder that are not also in
1509 /// NodeOrder.
1512  const NodeSet *S = nullptr) {
1513  Preds.clear();
1514  for (const SUnit *SU : NodeOrder) {
1515  for (const SDep &Pred : SU->Preds) {
1516  if (S && S->count(Pred.getSUnit()) == 0)
1517  continue;
1518  if (ignoreDependence(Pred, true))
1519  continue;
1520  if (NodeOrder.count(Pred.getSUnit()) == 0)
1521  Preds.insert(Pred.getSUnit());
1522  }
1523  // Back-edges are predecessors with an anti-dependence.
1524  for (const SDep &Succ : SU->Succs) {
1525  if (Succ.getKind() != SDep::Anti)
1526  continue;
1527  if (S && S->count(Succ.getSUnit()) == 0)
1528  continue;
1529  if (NodeOrder.count(Succ.getSUnit()) == 0)
1530  Preds.insert(Succ.getSUnit());
1531  }
1532  }
1533  return !Preds.empty();
1534 }
1535 
1536 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1537 /// as the successors of the elements of NodeOrder that are not also in
1538 /// NodeOrder.
1541  const NodeSet *S = nullptr) {
1542  Succs.clear();
1543  for (const SUnit *SU : NodeOrder) {
1544  for (const SDep &Succ : SU->Succs) {
1545  if (S && S->count(Succ.getSUnit()) == 0)
1546  continue;
1547  if (ignoreDependence(Succ, false))
1548  continue;
1549  if (NodeOrder.count(Succ.getSUnit()) == 0)
1550  Succs.insert(Succ.getSUnit());
1551  }
1552  for (const SDep &Pred : SU->Preds) {
1553  if (Pred.getKind() != SDep::Anti)
1554  continue;
1555  if (S && S->count(Pred.getSUnit()) == 0)
1556  continue;
1557  if (NodeOrder.count(Pred.getSUnit()) == 0)
1558  Succs.insert(Pred.getSUnit());
1559  }
1560  }
1561  return !Succs.empty();
1562 }
1563 
1564 /// Return true if there is a path from the specified node to any of the nodes
1565 /// in DestNodes. Keep track and return the nodes in any path.
1566 static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
1567  SetVector<SUnit *> &DestNodes,
1568  SetVector<SUnit *> &Exclude,
1569  SmallPtrSet<SUnit *, 8> &Visited) {
1570  if (Cur->isBoundaryNode())
1571  return false;
1572  if (Exclude.contains(Cur))
1573  return false;
1574  if (DestNodes.contains(Cur))
1575  return true;
1576  if (!Visited.insert(Cur).second)
1577  return Path.contains(Cur);
1578  bool FoundPath = false;
1579  for (auto &SI : Cur->Succs)
1580  if (!ignoreDependence(SI, false))
1581  FoundPath |=
1582  computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1583  for (auto &PI : Cur->Preds)
1584  if (PI.getKind() == SDep::Anti)
1585  FoundPath |=
1586  computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1587  if (FoundPath)
1588  Path.insert(Cur);
1589  return FoundPath;
1590 }
1591 
1592 /// Compute the live-out registers for the instructions in a node-set.
1593 /// The live-out registers are those that are defined in the node-set,
1594 /// but not used. Except for use operands of Phis.
1596  NodeSet &NS) {
1601  for (SUnit *SU : NS) {
1602  const MachineInstr *MI = SU->getInstr();
1603  if (MI->isPHI())
1604  continue;
1605  for (const MachineOperand &MO : MI->operands())
1606  if (MO.isReg() && MO.isUse()) {
1607  Register Reg = MO.getReg();
1609  Uses.insert(Reg);
1610  else if (MRI.isAllocatable(Reg))
1611  for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
1612  ++Units)
1613  Uses.insert(*Units);
1614  }
1615  }
1616  for (SUnit *SU : NS)
1617  for (const MachineOperand &MO : SU->getInstr()->operands())
1618  if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1619  Register Reg = MO.getReg();
1621  if (!Uses.count(Reg))
1622  LiveOutRegs.push_back(RegisterMaskPair(Reg,
1624  } else if (MRI.isAllocatable(Reg)) {
1625  for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
1626  ++Units)
1627  if (!Uses.count(*Units))
1628  LiveOutRegs.push_back(RegisterMaskPair(*Units,
1630  }
1631  }
1632  RPTracker.addLiveRegs(LiveOutRegs);
1633 }
1634 
1635 /// A heuristic to filter nodes in recurrent node-sets if the register
1636 /// pressure of a set is too high.
1637 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1638  for (auto &NS : NodeSets) {
1639  // Skip small node-sets since they won't cause register pressure problems.
1640  if (NS.size() <= 2)
1641  continue;
1642  IntervalPressure RecRegPressure;
1643  RegPressureTracker RecRPTracker(RecRegPressure);
1644  RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
1645  computeLiveOuts(MF, RecRPTracker, NS);
1646  RecRPTracker.closeBottom();
1647 
1648  std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1649  llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
1650  return A->NodeNum > B->NodeNum;
1651  });
1652 
1653  for (auto &SU : SUnits) {
1654  // Since we're computing the register pressure for a subset of the
1655  // instructions in a block, we need to set the tracker for each
1656  // instruction in the node-set. The tracker is set to the instruction
1657  // just after the one we're interested in.
1659  RecRPTracker.setPos(std::next(CurInstI));
1660 
1661  RegPressureDelta RPDelta;
1662  ArrayRef<PressureChange> CriticalPSets;
1663  RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
1664  CriticalPSets,
1665  RecRegPressure.MaxSetPressure);
1666  if (RPDelta.Excess.isValid()) {
1667  LLVM_DEBUG(
1668  dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
1669  << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
1670  << ":" << RPDelta.Excess.getUnitInc() << "\n");
1671  NS.setExceedPressure(SU);
1672  break;
1673  }
1674  RecRPTracker.recede();
1675  }
1676  }
1677 }
1678 
1679 /// A heuristic to colocate node sets that have the same set of
1680 /// successors.
1681 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
1682  unsigned Colocate = 0;
1683  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
1684  NodeSet &N1 = NodeSets[i];
1686  if (N1.empty() || !succ_L(N1, S1))
1687  continue;
1688  for (int j = i + 1; j < e; ++j) {
1689  NodeSet &N2 = NodeSets[j];
1690  if (N1.compareRecMII(N2) != 0)
1691  continue;
1693  if (N2.empty() || !succ_L(N2, S2))
1694  continue;
1695  if (llvm::set_is_subset(S1, S2) && S1.size() == S2.size()) {
1696  N1.setColocate(++Colocate);
1697  N2.setColocate(Colocate);
1698  break;
1699  }
1700  }
1701  }
1702 }
1703 
1704 /// Check if the existing node-sets are profitable. If not, then ignore the
1705 /// recurrent node-sets, and attempt to schedule all nodes together. This is
1706 /// a heuristic. If the MII is large and all the recurrent node-sets are small,
1707 /// then it's best to try to schedule all instructions together instead of
1708 /// starting with the recurrent node-sets.
1709 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
1710  // Look for loops with a large MII.
1711  if (MII < 17)
1712  return;
1713  // Check if the node-set contains only a simple add recurrence.
1714  for (auto &NS : NodeSets) {
1715  if (NS.getRecMII() > 2)
1716  return;
1717  if (NS.getMaxDepth() > MII)
1718  return;
1719  }
1720  NodeSets.clear();
1721  LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
1722 }
1723 
1724 /// Add the nodes that do not belong to a recurrence set into groups
1725 /// based upon connected components.
1726 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
1727  SetVector<SUnit *> NodesAdded;
1728  SmallPtrSet<SUnit *, 8> Visited;
1729  // Add the nodes that are on a path between the previous node sets and
1730  // the current node set.
1731  for (NodeSet &I : NodeSets) {
1733  // Add the nodes from the current node set to the previous node set.
1734  if (succ_L(I, N)) {
1736  for (SUnit *NI : N) {
1737  Visited.clear();
1738  computePath(NI, Path, NodesAdded, I, Visited);
1739  }
1740  if (!Path.empty())
1741  I.insert(Path.begin(), Path.end());
1742  }
1743  // Add the nodes from the previous node set to the current node set.
1744  N.clear();
1745  if (succ_L(NodesAdded, N)) {
1747  for (SUnit *NI : N) {
1748  Visited.clear();
1749  computePath(NI, Path, I, NodesAdded, Visited);
1750  }
1751  if (!Path.empty())
1752  I.insert(Path.begin(), Path.end());
1753  }
1754  NodesAdded.insert(I.begin(), I.end());
1755  }
1756 
1757  // Create a new node set with the connected nodes of any successor of a node
1758  // in a recurrent set.
1759  NodeSet NewSet;
1761  if (succ_L(NodesAdded, N))
1762  for (SUnit *I : N)
1763  addConnectedNodes(I, NewSet, NodesAdded);
1764  if (!NewSet.empty())
1765  NodeSets.push_back(NewSet);
1766 
1767  // Create a new node set with the connected nodes of any predecessor of a node
1768  // in a recurrent set.
1769  NewSet.clear();
1770  if (pred_L(NodesAdded, N))
1771  for (SUnit *I : N)
1772  addConnectedNodes(I, NewSet, NodesAdded);
1773  if (!NewSet.empty())
1774  NodeSets.push_back(NewSet);
1775 
1776  // Create new nodes sets with the connected nodes any remaining node that
1777  // has no predecessor.
1778  for (SUnit &SU : SUnits) {
1779  if (NodesAdded.count(&SU) == 0) {
1780  NewSet.clear();
1781  addConnectedNodes(&SU, NewSet, NodesAdded);
1782  if (!NewSet.empty())
1783  NodeSets.push_back(NewSet);
1784  }
1785  }
1786 }
1787 
1788 /// Add the node to the set, and add all of its connected nodes to the set.
1789 void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
1790  SetVector<SUnit *> &NodesAdded) {
1791  NewSet.insert(SU);
1792  NodesAdded.insert(SU);
1793  for (auto &SI : SU->Succs) {
1794  SUnit *Successor = SI.getSUnit();
1795  if (!SI.isArtificial() && !Successor->isBoundaryNode() &&
1796  NodesAdded.count(Successor) == 0)
1797  addConnectedNodes(Successor, NewSet, NodesAdded);
1798  }
1799  for (auto &PI : SU->Preds) {
1800  SUnit *Predecessor = PI.getSUnit();
1801  if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
1802  addConnectedNodes(Predecessor, NewSet, NodesAdded);
1803  }
1804 }
1805 
1806 /// Return true if Set1 contains elements in Set2. The elements in common
1807 /// are returned in a different container.
1808 static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
1809  SmallSetVector<SUnit *, 8> &Result) {
1810  Result.clear();
1811  for (SUnit *SU : Set1) {
1812  if (Set2.count(SU) != 0)
1813  Result.insert(SU);
1814  }
1815  return !Result.empty();
1816 }
1817 
1818 /// Merge the recurrence node sets that have the same initial node.
1819 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
1820  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1821  ++I) {
1822  NodeSet &NI = *I;
1823  for (NodeSetType::iterator J = I + 1; J != E;) {
1824  NodeSet &NJ = *J;
1825  if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
1826  if (NJ.compareRecMII(NI) > 0)
1827  NI.setRecMII(NJ.getRecMII());
1828  for (SUnit *SU : *J)
1829  I->insert(SU);
1830  NodeSets.erase(J);
1831  E = NodeSets.end();
1832  } else {
1833  ++J;
1834  }
1835  }
1836  }
1837 }
1838 
1839 /// Remove nodes that have been scheduled in previous NodeSets.
1840 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
1841  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1842  ++I)
1843  for (NodeSetType::iterator J = I + 1; J != E;) {
1844  J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
1845 
1846  if (J->empty()) {
1847  NodeSets.erase(J);
1848  E = NodeSets.end();
1849  } else {
1850  ++J;
1851  }
1852  }
1853 }
1854 
1855 /// Compute an ordered list of the dependence graph nodes, which
1856 /// indicates the order that the nodes will be scheduled. This is a
1857 /// two-level algorithm. First, a partial order is created, which
1858 /// consists of a list of sets ordered from highest to lowest priority.
1859 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
1861  NodeOrder.clear();
1862 
1863  for (auto &Nodes : NodeSets) {
1864  LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
1865  OrderKind Order;
1867  if (pred_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) {
1868  R.insert(N.begin(), N.end());
1869  Order = BottomUp;
1870  LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
1871  } else if (succ_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) {
1872  R.insert(N.begin(), N.end());
1873  Order = TopDown;
1874  LLVM_DEBUG(dbgs() << " Top down (succs) ");
1875  } else if (isIntersect(N, Nodes, R)) {
1876  // If some of the successors are in the existing node-set, then use the
1877  // top-down ordering.
1878  Order = TopDown;
1879  LLVM_DEBUG(dbgs() << " Top down (intersect) ");
1880  } else if (NodeSets.size() == 1) {
1881  for (auto &N : Nodes)
1882  if (N->Succs.size() == 0)
1883  R.insert(N);
1884  Order = BottomUp;
1885  LLVM_DEBUG(dbgs() << " Bottom up (all) ");
1886  } else {
1887  // Find the node with the highest ASAP.
1888  SUnit *maxASAP = nullptr;
1889  for (SUnit *SU : Nodes) {
1890  if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||
1891  (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))
1892  maxASAP = SU;
1893  }
1894  R.insert(maxASAP);
1895  Order = BottomUp;
1896  LLVM_DEBUG(dbgs() << " Bottom up (default) ");
1897  }
1898 
1899  while (!R.empty()) {
1900  if (Order == TopDown) {
1901  // Choose the node with the maximum height. If more than one, choose
1902  // the node wiTH the maximum ZeroLatencyHeight. If still more than one,
1903  // choose the node with the lowest MOV.
1904  while (!R.empty()) {
1905  SUnit *maxHeight = nullptr;
1906  for (SUnit *I : R) {
1907  if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
1908  maxHeight = I;
1909  else if (getHeight(I) == getHeight(maxHeight) &&
1911  maxHeight = I;
1912  else if (getHeight(I) == getHeight(maxHeight) &&
1914  getZeroLatencyHeight(maxHeight) &&
1915  getMOV(I) < getMOV(maxHeight))
1916  maxHeight = I;
1917  }
1918  NodeOrder.insert(maxHeight);
1919  LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
1920  R.remove(maxHeight);
1921  for (const auto &I : maxHeight->Succs) {
1922  if (Nodes.count(I.getSUnit()) == 0)
1923  continue;
1924  if (NodeOrder.contains(I.getSUnit()))
1925  continue;
1926  if (ignoreDependence(I, false))
1927  continue;
1928  R.insert(I.getSUnit());
1929  }
1930  // Back-edges are predecessors with an anti-dependence.
1931  for (const auto &I : maxHeight->Preds) {
1932  if (I.getKind() != SDep::Anti)
1933  continue;
1934  if (Nodes.count(I.getSUnit()) == 0)
1935  continue;
1936  if (NodeOrder.contains(I.getSUnit()))
1937  continue;
1938  R.insert(I.getSUnit());
1939  }
1940  }
1941  Order = BottomUp;
1942  LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
1944  if (pred_L(NodeOrder, N, &Nodes))
1945  R.insert(N.begin(), N.end());
1946  } else {
1947  // Choose the node with the maximum depth. If more than one, choose
1948  // the node with the maximum ZeroLatencyDepth. If still more than one,
1949  // choose the node with the lowest MOV.
1950  while (!R.empty()) {
1951  SUnit *maxDepth = nullptr;
1952  for (SUnit *I : R) {
1953  if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
1954  maxDepth = I;
1955  else if (getDepth(I) == getDepth(maxDepth) &&
1957  maxDepth = I;
1958  else if (getDepth(I) == getDepth(maxDepth) &&
1959  getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&
1960  getMOV(I) < getMOV(maxDepth))
1961  maxDepth = I;
1962  }
1963  NodeOrder.insert(maxDepth);
1964  LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
1965  R.remove(maxDepth);
1966  if (Nodes.isExceedSU(maxDepth)) {
1967  Order = TopDown;
1968  R.clear();
1969  R.insert(Nodes.getNode(0));
1970  break;
1971  }
1972  for (const auto &I : maxDepth->Preds) {
1973  if (Nodes.count(I.getSUnit()) == 0)
1974  continue;
1975  if (NodeOrder.contains(I.getSUnit()))
1976  continue;
1977  R.insert(I.getSUnit());
1978  }
1979  // Back-edges are predecessors with an anti-dependence.
1980  for (const auto &I : maxDepth->Succs) {
1981  if (I.getKind() != SDep::Anti)
1982  continue;
1983  if (Nodes.count(I.getSUnit()) == 0)
1984  continue;
1985  if (NodeOrder.contains(I.getSUnit()))
1986  continue;
1987  R.insert(I.getSUnit());
1988  }
1989  }
1990  Order = TopDown;
1991  LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
1993  if (succ_L(NodeOrder, N, &Nodes))
1994  R.insert(N.begin(), N.end());
1995  }
1996  }
1997  LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
1998  }
1999 
2000  LLVM_DEBUG({
2001  dbgs() << "Node order: ";
2002  for (SUnit *I : NodeOrder)
2003  dbgs() << " " << I->NodeNum << " ";
2004  dbgs() << "\n";
2005  });
2006 }
2007 
2008 /// Process the nodes in the computed order and create the pipelined schedule
2009 /// of the instructions, if possible. Return true if a schedule is found.
2010 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2011 
2012  if (NodeOrder.empty()){
2013  LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
2014  return false;
2015  }
2016 
2017  bool scheduleFound = false;
2018  // Keep increasing II until a valid schedule is found.
2019  for (unsigned II = MII; II <= MAX_II && !scheduleFound; ++II) {
2020  Schedule.reset();
2021  Schedule.setInitiationInterval(II);
2022  LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
2023 
2024  SetVector<SUnit *>::iterator NI = NodeOrder.begin();
2025  SetVector<SUnit *>::iterator NE = NodeOrder.end();
2026  do {
2027  SUnit *SU = *NI;
2028 
2029  // Compute the schedule time for the instruction, which is based
2030  // upon the scheduled time for any predecessors/successors.
2031  int EarlyStart = INT_MIN;
2032  int LateStart = INT_MAX;
2033  // These values are set when the size of the schedule window is limited
2034  // due to chain dependences.
2035  int SchedEnd = INT_MAX;
2036  int SchedStart = INT_MIN;
2037  Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
2038  II, this);
2039  LLVM_DEBUG({
2040  dbgs() << "\n";
2041  dbgs() << "Inst (" << SU->NodeNum << ") ";
2042  SU->getInstr()->dump();
2043  dbgs() << "\n";
2044  });
2045  LLVM_DEBUG({
2046  dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
2047  LateStart, SchedEnd, SchedStart);
2048  });
2049 
2050  if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
2051  SchedStart > LateStart)
2052  scheduleFound = false;
2053  else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
2054  SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
2055  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2056  } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
2057  SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
2058  scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
2059  } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2060  SchedEnd =
2061  std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
2062  // When scheduling a Phi it is better to start at the late cycle and go
2063  // backwards. The default order may insert the Phi too far away from
2064  // its first dependence.
2065  if (SU->getInstr()->isPHI())
2066  scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
2067  else
2068  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2069  } else {
2070  int FirstCycle = Schedule.getFirstCycle();
2071  scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
2072  FirstCycle + getASAP(SU) + II - 1, II);
2073  }
2074  // Even if we find a schedule, make sure the schedule doesn't exceed the
2075  // allowable number of stages. We keep trying if this happens.
2076  if (scheduleFound)
2077  if (SwpMaxStages > -1 &&
2078  Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
2079  scheduleFound = false;
2080 
2081  LLVM_DEBUG({
2082  if (!scheduleFound)
2083  dbgs() << "\tCan't schedule\n";
2084  });
2085  } while (++NI != NE && scheduleFound);
2086 
2087  // If a schedule is found, ensure non-pipelined instructions are in stage 0
2088  if (scheduleFound)
2089  scheduleFound =
2090  Schedule.normalizeNonPipelinedInstructions(this, LoopPipelinerInfo);
2091 
2092  // If a schedule is found, check if it is a valid schedule too.
2093  if (scheduleFound)
2094  scheduleFound = Schedule.isValidSchedule(this);
2095  }
2096 
2097  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound
2098  << " (II=" << Schedule.getInitiationInterval()
2099  << ")\n");
2100 
2101  if (scheduleFound) {
2102  Schedule.finalizeSchedule(this);
2103  Pass.ORE->emit([&]() {
2105  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
2106  << "Schedule found with Initiation Interval: "
2107  << ore::NV("II", Schedule.getInitiationInterval())
2108  << ", MaxStageCount: "
2109  << ore::NV("MaxStageCount", Schedule.getMaxStageCount());
2110  });
2111  } else
2112  Schedule.reset();
2113 
2114  return scheduleFound && Schedule.getMaxStageCount() > 0;
2115 }
2116 
2117 /// Return true if we can compute the amount the instruction changes
2118 /// during each iteration. Set Delta to the amount of the change.
2119 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
2121  const MachineOperand *BaseOp;
2122  int64_t Offset;
2123  bool OffsetIsScalable;
2124  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
2125  return false;
2126 
2127  // FIXME: This algorithm assumes instructions have fixed-size offsets.
2128  if (OffsetIsScalable)
2129  return false;
2130 
2131  if (!BaseOp->isReg())
2132  return false;
2133 
2134  Register BaseReg = BaseOp->getReg();
2135 
2137  // Check if there is a Phi. If so, get the definition in the loop.
2138  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
2139  if (BaseDef && BaseDef->isPHI()) {
2140  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
2141  BaseDef = MRI.getVRegDef(BaseReg);
2142  }
2143  if (!BaseDef)
2144  return false;
2145 
2146  int D = 0;
2147  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
2148  return false;
2149 
2150  Delta = D;
2151  return true;
2152 }
2153 
2154 /// Check if we can change the instruction to use an offset value from the
2155 /// previous iteration. If so, return true and set the base and offset values
2156 /// so that we can rewrite the load, if necessary.
2157 /// v1 = Phi(v0, v3)
2158 /// v2 = load v1, 0
2159 /// v3 = post_store v1, 4, x
2160 /// This function enables the load to be rewritten as v2 = load v3, 4.
2161 bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
2162  unsigned &BasePos,
2163  unsigned &OffsetPos,
2164  unsigned &NewBase,
2165  int64_t &Offset) {
2166  // Get the load instruction.
2167  if (TII->isPostIncrement(*MI))
2168  return false;
2169  unsigned BasePosLd, OffsetPosLd;
2170  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
2171  return false;
2172  Register BaseReg = MI->getOperand(BasePosLd).getReg();
2173 
2174  // Look for the Phi instruction.
2175  MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
2176  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
2177  if (!Phi || !Phi->isPHI())
2178  return false;
2179  // Get the register defined in the loop block.
2180  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
2181  if (!PrevReg)
2182  return false;
2183 
2184  // Check for the post-increment load/store instruction.
2185  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
2186  if (!PrevDef || PrevDef == MI)
2187  return false;
2188 
2189  if (!TII->isPostIncrement(*PrevDef))
2190  return false;
2191 
2192  unsigned BasePos1 = 0, OffsetPos1 = 0;
2193  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
2194  return false;
2195 
2196  // Make sure that the instructions do not access the same memory location in
2197  // the next iteration.
2198  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
2199  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
2200  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2201  NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
2202  bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
2203  MF.deleteMachineInstr(NewMI);
2204  if (!Disjoint)
2205  return false;
2206 
2207  // Set the return value once we determine that we return true.
2208  BasePos = BasePosLd;
2209  OffsetPos = OffsetPosLd;
2210  NewBase = PrevReg;
2211  Offset = StoreOffset;
2212  return true;
2213 }
2214 
2215 /// Apply changes to the instruction if needed. The changes are need
2216 /// to improve the scheduling and depend up on the final schedule.
2218  SMSchedule &Schedule) {
2219  SUnit *SU = getSUnit(MI);
2221  InstrChanges.find(SU);
2222  if (It != InstrChanges.end()) {
2223  std::pair<unsigned, int64_t> RegAndOffset = It->second;
2224  unsigned BasePos, OffsetPos;
2225  if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
2226  return;
2227  Register BaseReg = MI->getOperand(BasePos).getReg();
2228  MachineInstr *LoopDef = findDefInLoop(BaseReg);
2229  int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
2230  int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
2231  int BaseStageNum = Schedule.stageScheduled(SU);
2232  int BaseCycleNum = Schedule.cycleScheduled(SU);
2233  if (BaseStageNum < DefStageNum) {
2234  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2235  int OffsetDiff = DefStageNum - BaseStageNum;
2236  if (DefCycleNum < BaseCycleNum) {
2237  NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
2238  if (OffsetDiff > 0)
2239  --OffsetDiff;
2240  }
2241  int64_t NewOffset =
2242  MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2243  NewMI->getOperand(OffsetPos).setImm(NewOffset);
2244  SU->setInstr(NewMI);
2245  MISUnitMap[NewMI] = SU;
2246  NewMIs[MI] = NewMI;
2247  }
2248  }
2249 }
2250 
2251 /// Return the instruction in the loop that defines the register.
2252 /// If the definition is a Phi, then follow the Phi operand to
2253 /// the instruction in the loop.
2254 MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
2257  while (Def->isPHI()) {
2258  if (!Visited.insert(Def).second)
2259  break;
2260  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
2261  if (Def->getOperand(i + 1).getMBB() == BB) {
2262  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
2263  break;
2264  }
2265  }
2266  return Def;
2267 }
2268 
2269 /// Return true for an order or output dependence that is loop carried
2270 /// potentially. A dependence is loop carried if the destination defines a valu
2271 /// that may be used or defined by the source in a subsequent iteration.
2273  bool isSucc) {
2274  if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
2275  Dep.isArtificial() || Dep.getSUnit()->isBoundaryNode())
2276  return false;
2277 
2278  if (!SwpPruneLoopCarried)
2279  return true;
2280 
2281  if (Dep.getKind() == SDep::Output)
2282  return true;
2283 
2284  MachineInstr *SI = Source->getInstr();
2285  MachineInstr *DI = Dep.getSUnit()->getInstr();
2286  if (!isSucc)
2287  std::swap(SI, DI);
2288  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
2289 
2290  // Assume ordered loads and stores may have a loop carried dependence.
2291  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
2292  SI->mayRaiseFPException() || DI->mayRaiseFPException() ||
2293  SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
2294  return true;
2295 
2296  // Only chain dependences between a load and store can be loop carried.
2297  if (!DI->mayStore() || !SI->mayLoad())
2298  return false;
2299 
2300  unsigned DeltaS, DeltaD;
2301  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
2302  return true;
2303 
2304  const MachineOperand *BaseOpS, *BaseOpD;
2305  int64_t OffsetS, OffsetD;
2306  bool OffsetSIsScalable, OffsetDIsScalable;
2308  if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, OffsetSIsScalable,
2309  TRI) ||
2310  !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, OffsetDIsScalable,
2311  TRI))
2312  return true;
2313 
2314  assert(!OffsetSIsScalable && !OffsetDIsScalable &&
2315  "Expected offsets to be byte offsets");
2316 
2317  if (!BaseOpS->isIdenticalTo(*BaseOpD))
2318  return true;
2319 
2320  // Check that the base register is incremented by a constant value for each
2321  // iteration.
2322  MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg());
2323  if (!Def || !Def->isPHI())
2324  return true;
2325  unsigned InitVal = 0;
2326  unsigned LoopVal = 0;
2327  getPhiRegs(*Def, BB, InitVal, LoopVal);
2328  MachineInstr *LoopDef = MRI.getVRegDef(LoopVal);
2329  int D = 0;
2330  if (!LoopDef || !TII->getIncrementValue(*LoopDef, D))
2331  return true;
2332 
2333  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
2334  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
2335 
2336  // This is the main test, which checks the offset values and the loop
2337  // increment value to determine if the accesses may be loop carried.
2338  if (AccessSizeS == MemoryLocation::UnknownSize ||
2339  AccessSizeD == MemoryLocation::UnknownSize)
2340  return true;
2341 
2342  if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD)
2343  return true;
2344 
2345  return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
2346 }
2347 
2348 void SwingSchedulerDAG::postprocessDAG() {
2349  for (auto &M : Mutations)
2350  M->apply(this);
2351 }
2352 
2353 /// Try to schedule the node at the specified StartCycle and continue
2354 /// until the node is schedule or the EndCycle is reached. This function
2355 /// returns true if the node is scheduled. This routine may search either
2356 /// forward or backward for a place to insert the instruction based upon
2357 /// the relative values of StartCycle and EndCycle.
2358 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
2359  bool forward = true;
2360  LLVM_DEBUG({
2361  dbgs() << "Trying to insert node between " << StartCycle << " and "
2362  << EndCycle << " II: " << II << "\n";
2363  });
2364  if (StartCycle > EndCycle)
2365  forward = false;
2366 
2367  // The terminating condition depends on the direction.
2368  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
2369  for (int curCycle = StartCycle; curCycle != termCycle;
2370  forward ? ++curCycle : --curCycle) {
2371 
2372  // Add the already scheduled instructions at the specified cycle to the
2373  // DFA.
2374  ProcItinResources.clearResources();
2375  for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
2376  checkCycle <= LastCycle; checkCycle += II) {
2377  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
2378 
2379  for (SUnit *CI : cycleInstrs) {
2380  if (ST.getInstrInfo()->isZeroCost(CI->getInstr()->getOpcode()))
2381  continue;
2382  assert(ProcItinResources.canReserveResources(*CI->getInstr()) &&
2383  "These instructions have already been scheduled.");
2384  ProcItinResources.reserveResources(*CI->getInstr());
2385  }
2386  }
2387  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
2388  ProcItinResources.canReserveResources(*SU->getInstr())) {
2389  LLVM_DEBUG({
2390  dbgs() << "\tinsert at cycle " << curCycle << " ";
2391  SU->getInstr()->dump();
2392  });
2393 
2394  ScheduledInstrs[curCycle].push_back(SU);
2395  InstrToCycle.insert(std::make_pair(SU, curCycle));
2396  if (curCycle > LastCycle)
2397  LastCycle = curCycle;
2398  if (curCycle < FirstCycle)
2399  FirstCycle = curCycle;
2400  return true;
2401  }
2402  LLVM_DEBUG({
2403  dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
2404  SU->getInstr()->dump();
2405  });
2406  }
2407  return false;
2408 }
2409 
2410 // Return the cycle of the earliest scheduled instruction in the chain.
2412  SmallPtrSet<SUnit *, 8> Visited;
2413  SmallVector<SDep, 8> Worklist;
2414  Worklist.push_back(Dep);
2415  int EarlyCycle = INT_MAX;
2416  while (!Worklist.empty()) {
2417  const SDep &Cur = Worklist.pop_back_val();
2418  SUnit *PrevSU = Cur.getSUnit();
2419  if (Visited.count(PrevSU))
2420  continue;
2421  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
2422  if (it == InstrToCycle.end())
2423  continue;
2424  EarlyCycle = std::min(EarlyCycle, it->second);
2425  for (const auto &PI : PrevSU->Preds)
2426  if (PI.getKind() == SDep::Order || PI.getKind() == SDep::Output)
2427  Worklist.push_back(PI);
2428  Visited.insert(PrevSU);
2429  }
2430  return EarlyCycle;
2431 }
2432 
2433 // Return the cycle of the latest scheduled instruction in the chain.
2435  SmallPtrSet<SUnit *, 8> Visited;
2436  SmallVector<SDep, 8> Worklist;
2437  Worklist.push_back(Dep);
2438  int LateCycle = INT_MIN;
2439  while (!Worklist.empty()) {
2440  const SDep &Cur = Worklist.pop_back_val();
2441  SUnit *SuccSU = Cur.getSUnit();
2442  if (Visited.count(SuccSU) || SuccSU->isBoundaryNode())
2443  continue;
2444  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
2445  if (it == InstrToCycle.end())
2446  continue;
2447  LateCycle = std::max(LateCycle, it->second);
2448  for (const auto &SI : SuccSU->Succs)
2449  if (SI.getKind() == SDep::Order || SI.getKind() == SDep::Output)
2450  Worklist.push_back(SI);
2451  Visited.insert(SuccSU);
2452  }
2453  return LateCycle;
2454 }
2455 
2456 /// If an instruction has a use that spans multiple iterations, then
2457 /// return true. These instructions are characterized by having a back-ege
2458 /// to a Phi, which contains a reference to another Phi.
2460  for (auto &P : SU->Preds)
2461  if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
2462  for (auto &S : P.getSUnit()->Succs)
2463  if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
2464  return P.getSUnit();
2465  return nullptr;
2466 }
2467 
2468 /// Compute the scheduling start slot for the instruction. The start slot
2469 /// depends on any predecessor or successor nodes scheduled already.
2470 void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
2471  int *MinEnd, int *MaxStart, int II,
2472  SwingSchedulerDAG *DAG) {
2473  // Iterate over each instruction that has been scheduled already. The start
2474  // slot computation depends on whether the previously scheduled instruction
2475  // is a predecessor or successor of the specified instruction.
2476  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
2477 
2478  // Iterate over each instruction in the current cycle.
2479  for (SUnit *I : getInstructions(cycle)) {
2480  // Because we're processing a DAG for the dependences, we recognize
2481  // the back-edge in recurrences by anti dependences.
2482  for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
2483  const SDep &Dep = SU->Preds[i];
2484  if (Dep.getSUnit() == I) {
2485  if (!DAG->isBackedge(SU, Dep)) {
2486  int EarlyStart = cycle + Dep.getLatency() -
2487  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
2488  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2489  if (DAG->isLoopCarriedDep(SU, Dep, false)) {
2490  int End = earliestCycleInChain(Dep) + (II - 1);
2491  *MinEnd = std::min(*MinEnd, End);
2492  }
2493  } else {
2494  int LateStart = cycle - Dep.getLatency() +
2495  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
2496  *MinLateStart = std::min(*MinLateStart, LateStart);
2497  }
2498  }
2499  // For instruction that requires multiple iterations, make sure that
2500  // the dependent instruction is not scheduled past the definition.
2501  SUnit *BE = multipleIterations(I, DAG);
2502  if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
2503  !SU->isPred(I))
2504  *MinLateStart = std::min(*MinLateStart, cycle);
2505  }
2506  for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {
2507  if (SU->Succs[i].getSUnit() == I) {
2508  const SDep &Dep = SU->Succs[i];
2509  if (!DAG->isBackedge(SU, Dep)) {
2510  int LateStart = cycle - Dep.getLatency() +
2511  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
2512  *MinLateStart = std::min(*MinLateStart, LateStart);
2513  if (DAG->isLoopCarriedDep(SU, Dep)) {
2514  int Start = latestCycleInChain(Dep) + 1 - II;
2515  *MaxStart = std::max(*MaxStart, Start);
2516  }
2517  } else {
2518  int EarlyStart = cycle + Dep.getLatency() -
2519  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
2520  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2521  }
2522  }
2523  }
2524  }
2525  }
2526 }
2527 
2528 /// Order the instructions within a cycle so that the definitions occur
2529 /// before the uses. Returns true if the instruction is added to the start
2530 /// of the list, or false if added to the end.
2532  std::deque<SUnit *> &Insts) {
2533  MachineInstr *MI = SU->getInstr();
2534  bool OrderBeforeUse = false;
2535  bool OrderAfterDef = false;
2536  bool OrderBeforeDef = false;
2537  unsigned MoveDef = 0;
2538  unsigned MoveUse = 0;
2539  int StageInst1 = stageScheduled(SU);
2540 
2541  unsigned Pos = 0;
2542  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
2543  ++I, ++Pos) {
2544  for (MachineOperand &MO : MI->operands()) {
2545  if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
2546  continue;
2547 
2548  Register Reg = MO.getReg();
2549  unsigned BasePos, OffsetPos;
2550  if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
2551  if (MI->getOperand(BasePos).getReg() == Reg)
2552  if (unsigned NewReg = SSD->getInstrBaseReg(SU))
2553  Reg = NewReg;
2554  bool Reads, Writes;
2555  std::tie(Reads, Writes) =
2556  (*I)->getInstr()->readsWritesVirtualRegister(Reg);
2557  if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
2558  OrderBeforeUse = true;
2559  if (MoveUse == 0)
2560  MoveUse = Pos;
2561  } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
2562  // Add the instruction after the scheduled instruction.
2563  OrderAfterDef = true;
2564  MoveDef = Pos;
2565  } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
2566  if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
2567  OrderBeforeUse = true;
2568  if (MoveUse == 0)
2569  MoveUse = Pos;
2570  } else {
2571  OrderAfterDef = true;
2572  MoveDef = Pos;
2573  }
2574  } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
2575  OrderBeforeUse = true;
2576  if (MoveUse == 0)
2577  MoveUse = Pos;
2578  if (MoveUse != 0) {
2579  OrderAfterDef = true;
2580  MoveDef = Pos - 1;
2581  }
2582  } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
2583  // Add the instruction before the scheduled instruction.
2584  OrderBeforeUse = true;
2585  if (MoveUse == 0)
2586  MoveUse = Pos;
2587  } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
2588  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
2589  if (MoveUse == 0) {
2590  OrderBeforeDef = true;
2591  MoveUse = Pos;
2592  }
2593  }
2594  }
2595  // Check for order dependences between instructions. Make sure the source
2596  // is ordered before the destination.
2597  for (auto &S : SU->Succs) {
2598  if (S.getSUnit() != *I)
2599  continue;
2600  if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
2601  OrderBeforeUse = true;
2602  if (Pos < MoveUse)
2603  MoveUse = Pos;
2604  }
2605  // We did not handle HW dependences in previous for loop,
2606  // and we normally set Latency = 0 for Anti deps,
2607  // so may have nodes in same cycle with Anti denpendent on HW regs.
2608  else if (S.getKind() == SDep::Anti && stageScheduled(*I) == StageInst1) {
2609  OrderBeforeUse = true;
2610  if ((MoveUse == 0) || (Pos < MoveUse))
2611  MoveUse = Pos;
2612  }
2613  }
2614  for (auto &P : SU->Preds) {
2615  if (P.getSUnit() != *I)
2616  continue;
2617  if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
2618  OrderAfterDef = true;
2619  MoveDef = Pos;
2620  }
2621  }
2622  }
2623 
2624  // A circular dependence.
2625  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
2626  OrderBeforeUse = false;
2627 
2628  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
2629  // to a loop-carried dependence.
2630  if (OrderBeforeDef)
2631  OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
2632 
2633  // The uncommon case when the instruction order needs to be updated because
2634  // there is both a use and def.
2635  if (OrderBeforeUse && OrderAfterDef) {
2636  SUnit *UseSU = Insts.at(MoveUse);
2637  SUnit *DefSU = Insts.at(MoveDef);
2638  if (MoveUse > MoveDef) {
2639  Insts.erase(Insts.begin() + MoveUse);
2640  Insts.erase(Insts.begin() + MoveDef);
2641  } else {
2642  Insts.erase(Insts.begin() + MoveDef);
2643  Insts.erase(Insts.begin() + MoveUse);
2644  }
2645  orderDependence(SSD, UseSU, Insts);
2646  orderDependence(SSD, SU, Insts);
2647  orderDependence(SSD, DefSU, Insts);
2648  return;
2649  }
2650  // Put the new instruction first if there is a use in the list. Otherwise,
2651  // put it at the end of the list.
2652  if (OrderBeforeUse)
2653  Insts.push_front(SU);
2654  else
2655  Insts.push_back(SU);
2656 }
2657 
2658 /// Return true if the scheduled Phi has a loop carried operand.
2660  if (!Phi.isPHI())
2661  return false;
2662  assert(Phi.isPHI() && "Expecting a Phi.");
2663  SUnit *DefSU = SSD->getSUnit(&Phi);
2664  unsigned DefCycle = cycleScheduled(DefSU);
2665  int DefStage = stageScheduled(DefSU);
2666 
2667  unsigned InitVal = 0;
2668  unsigned LoopVal = 0;
2669  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
2670  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
2671  if (!UseSU)
2672  return true;
2673  if (UseSU->getInstr()->isPHI())
2674  return true;
2675  unsigned LoopCycle = cycleScheduled(UseSU);
2676  int LoopStage = stageScheduled(UseSU);
2677  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
2678 }
2679 
2680 /// Return true if the instruction is a definition that is loop carried
2681 /// and defines the use on the next iteration.
2682 /// v1 = phi(v2, v3)
2683 /// (Def) v3 = op v1
2684 /// (MO) = v1
2685 /// If MO appears before Def, then then v1 and v3 may get assigned to the same
2686 /// register.
2689  if (!MO.isReg())
2690  return false;
2691  if (Def->isPHI())
2692  return false;
2693  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
2694  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
2695  return false;
2696  if (!isLoopCarried(SSD, *Phi))
2697  return false;
2698  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
2699  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
2700  MachineOperand &DMO = Def->getOperand(i);
2701  if (!DMO.isReg() || !DMO.isDef())
2702  continue;
2703  if (DMO.getReg() == LoopReg)
2704  return true;
2705  }
2706  return false;
2707 }
2708 
2709 /// Determine transitive dependences of unpipelineable instructions
2712  SmallSet<SUnit *, 8> DoNotPipeline;
2713  SmallVector<SUnit *, 8> Worklist;
2714 
2715  for (auto &SU : SSD->SUnits)
2716  if (SU.isInstr() && PLI->shouldIgnoreForPipelining(SU.getInstr()))
2717  Worklist.push_back(&SU);
2718 
2719  while (!Worklist.empty()) {
2720  auto SU = Worklist.pop_back_val();
2721  if (DoNotPipeline.count(SU))
2722  continue;
2723  LLVM_DEBUG(dbgs() << "Do not pipeline SU(" << SU->NodeNum << ")\n");
2724  DoNotPipeline.insert(SU);
2725  for (auto &Dep : SU->Preds)
2726  Worklist.push_back(Dep.getSUnit());
2727  if (SU->getInstr()->isPHI())
2728  for (auto &Dep : SU->Succs)
2729  if (Dep.getKind() == SDep::Anti)
2730  Worklist.push_back(Dep.getSUnit());
2731  }
2732  return DoNotPipeline;
2733 }
2734 
2735 // Determine all instructions upon which any unpipelineable instruction depends
2736 // and ensure that they are in stage 0. If unable to do so, return false.
2740 
2741  int NewLastCycle = INT_MIN;
2742  for (SUnit &SU : SSD->SUnits) {
2743  if (!SU.isInstr())
2744  continue;
2745  if (!DNP.contains(&SU) || stageScheduled(&SU) == 0) {
2746  NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
2747  continue;
2748  }
2749 
2750  // Put the non-pipelined instruction as early as possible in the schedule
2751  int NewCycle = getFirstCycle();
2752  for (auto &Dep : SU.Preds)
2753  NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle);
2754 
2755  int OldCycle = InstrToCycle[&SU];
2756  if (OldCycle != NewCycle) {
2757  InstrToCycle[&SU] = NewCycle;
2758  auto &OldS = getInstructions(OldCycle);
2759  llvm::erase_value(OldS, &SU);
2760  getInstructions(NewCycle).emplace_back(&SU);
2761  LLVM_DEBUG(dbgs() << "SU(" << SU.NodeNum
2762  << ") is not pipelined; moving from cycle " << OldCycle
2763  << " to " << NewCycle << " Instr:" << *SU.getInstr());
2764  }
2765  NewLastCycle = std::max(NewLastCycle, NewCycle);
2766  }
2767  LastCycle = NewLastCycle;
2768  return true;
2769 }
2770 
2771 // Check if the generated schedule is valid. This function checks if
2772 // an instruction that uses a physical register is scheduled in a
2773 // different stage than the definition. The pipeliner does not handle
2774 // physical register values that may cross a basic block boundary.
2775 // Furthermore, if a physical def/use pair is assigned to the same
2776 // cycle, orderDependence does not guarantee def/use ordering, so that
2777 // case should be considered invalid. (The test checks for both
2778 // earlier and same-cycle use to be more robust.)
2780  for (SUnit &SU : SSD->SUnits) {
2781  if (!SU.hasPhysRegDefs)
2782  continue;
2783  int StageDef = stageScheduled(&SU);
2784  int CycleDef = InstrToCycle[&SU];
2785  assert(StageDef != -1 && "Instruction should have been scheduled.");
2786  for (auto &SI : SU.Succs)
2787  if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode())
2788  if (Register::isPhysicalRegister(SI.getReg())) {
2789  if (stageScheduled(SI.getSUnit()) != StageDef)
2790  return false;
2791  if (InstrToCycle[SI.getSUnit()] <= CycleDef)
2792  return false;
2793  }
2794  }
2795  return true;
2796 }
2797 
2798 /// A property of the node order in swing-modulo-scheduling is
2799 /// that for nodes outside circuits the following holds:
2800 /// none of them is scheduled after both a successor and a
2801 /// predecessor.
2802 /// The method below checks whether the property is met.
2803 /// If not, debug information is printed and statistics information updated.
2804 /// Note that we do not use an assert statement.
2805 /// The reason is that although an invalid node oder may prevent
2806 /// the pipeliner from finding a pipelined schedule for arbitrary II,
2807 /// it does not lead to the generation of incorrect code.
2808 void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
2809 
2810  // a sorted vector that maps each SUnit to its index in the NodeOrder
2811  typedef std::pair<SUnit *, unsigned> UnitIndex;
2812  std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
2813 
2814  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
2815  Indices.push_back(std::make_pair(NodeOrder[i], i));
2816 
2817  auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
2818  return std::get<0>(i1) < std::get<0>(i2);
2819  };
2820 
2821  // sort, so that we can perform a binary search
2822  llvm::sort(Indices, CompareKey);
2823 
2824  bool Valid = true;
2825  (void)Valid;
2826  // for each SUnit in the NodeOrder, check whether
2827  // it appears after both a successor and a predecessor
2828  // of the SUnit. If this is the case, and the SUnit
2829  // is not part of circuit, then the NodeOrder is not
2830  // valid.
2831  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
2832  SUnit *SU = NodeOrder[i];
2833  unsigned Index = i;
2834 
2835  bool PredBefore = false;
2836  bool SuccBefore = false;
2837 
2838  SUnit *Succ;
2839  SUnit *Pred;
2840  (void)Succ;
2841  (void)Pred;
2842 
2843  for (SDep &PredEdge : SU->Preds) {
2844  SUnit *PredSU = PredEdge.getSUnit();
2845  unsigned PredIndex = std::get<1>(
2846  *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));
2847  if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
2848  PredBefore = true;
2849  Pred = PredSU;
2850  break;
2851  }
2852  }
2853 
2854  for (SDep &SuccEdge : SU->Succs) {
2855  SUnit *SuccSU = SuccEdge.getSUnit();
2856  // Do not process a boundary node, it was not included in NodeOrder,
2857  // hence not in Indices either, call to std::lower_bound() below will
2858  // return Indices.end().
2859  if (SuccSU->isBoundaryNode())
2860  continue;
2861  unsigned SuccIndex = std::get<1>(
2862  *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey));
2863  if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
2864  SuccBefore = true;
2865  Succ = SuccSU;
2866  break;
2867  }
2868  }
2869 
2870  if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
2871  // instructions in circuits are allowed to be scheduled
2872  // after both a successor and predecessor.
2873  bool InCircuit = llvm::any_of(
2874  Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
2875  if (InCircuit)
2876  LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
2877  else {
2878  Valid = false;
2879  NumNodeOrderIssues++;
2880  LLVM_DEBUG(dbgs() << "Predecessor ";);
2881  }
2882  LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
2883  << " are scheduled before node " << SU->NodeNum
2884  << "\n";);
2885  }
2886  }
2887 
2888  LLVM_DEBUG({
2889  if (!Valid)
2890  dbgs() << "Invalid node order found!\n";
2891  });
2892 }
2893 
2894 /// Attempt to fix the degenerate cases when the instruction serialization
2895 /// causes the register lifetimes to overlap. For example,
2896 /// p' = store_pi(p, b)
2897 /// = load p, offset
2898 /// In this case p and p' overlap, which means that two registers are needed.
2899 /// Instead, this function changes the load to use p' and updates the offset.
2900 void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
2901  unsigned OverlapReg = 0;
2902  unsigned NewBaseReg = 0;
2903  for (SUnit *SU : Instrs) {
2904  MachineInstr *MI = SU->getInstr();
2905  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
2906  const MachineOperand &MO = MI->getOperand(i);
2907  // Look for an instruction that uses p. The instruction occurs in the
2908  // same cycle but occurs later in the serialized order.
2909  if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
2910  // Check that the instruction appears in the InstrChanges structure,
2911  // which contains instructions that can have the offset updated.
2913  InstrChanges.find(SU);
2914  if (It != InstrChanges.end()) {
2915  unsigned BasePos, OffsetPos;
2916  // Update the base register and adjust the offset.
2917  if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
2918  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2919  NewMI->getOperand(BasePos).setReg(NewBaseReg);
2920  int64_t NewOffset =
2921  MI->getOperand(OffsetPos).getImm() - It->second.second;
2922  NewMI->getOperand(OffsetPos).setImm(NewOffset);
2923  SU->setInstr(NewMI);
2924  MISUnitMap[NewMI] = SU;
2925  NewMIs[MI] = NewMI;
2926  }
2927  }
2928  OverlapReg = 0;
2929  NewBaseReg = 0;
2930  break;
2931  }
2932  // Look for an instruction of the form p' = op(p), which uses and defines
2933  // two virtual registers that get allocated to the same physical register.
2934  unsigned TiedUseIdx = 0;
2935  if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
2936  // OverlapReg is p in the example above.
2937  OverlapReg = MI->getOperand(TiedUseIdx).getReg();
2938  // NewBaseReg is p' in the example above.
2939  NewBaseReg = MI->getOperand(i).getReg();
2940  break;
2941  }
2942  }
2943  }
2944 }
2945 
2946 /// After the schedule has been formed, call this function to combine
2947 /// the instructions from the different stages/cycles. That is, this
2948 /// function creates a schedule that represents a single iteration.
2950  // Move all instructions to the first stage from later stages.
2951  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
2952  for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
2953  ++stage) {
2954  std::deque<SUnit *> &cycleInstrs =
2955  ScheduledInstrs[cycle + (stage * InitiationInterval)];
2956  for (SUnit *SU : llvm::reverse(cycleInstrs))
2957  ScheduledInstrs[cycle].push_front(SU);
2958  }
2959  }
2960 
2961  // Erase all the elements in the later stages. Only one iteration should
2962  // remain in the scheduled list, and it contains all the instructions.
2963  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
2964  ScheduledInstrs.erase(cycle);
2965 
2966  // Change the registers in instruction as specified in the InstrChanges
2967  // map. We need to use the new registers to create the correct order.
2968  for (const SUnit &SU : SSD->SUnits)
2969  SSD->applyInstrChange(SU.getInstr(), *this);
2970 
2971  // Reorder the instructions in each cycle to fix and improve the
2972  // generated code.
2973  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
2974  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
2975  std::deque<SUnit *> newOrderPhi;
2976  for (SUnit *SU : cycleInstrs) {
2977  if (SU->getInstr()->isPHI())
2978  newOrderPhi.push_back(SU);
2979  }
2980  std::deque<SUnit *> newOrderI;
2981  for (SUnit *SU : cycleInstrs) {
2982  if (!SU->getInstr()->isPHI())
2983  orderDependence(SSD, SU, newOrderI);
2984  }
2985  // Replace the old order with the new order.
2986  cycleInstrs.swap(newOrderPhi);
2987  llvm::append_range(cycleInstrs, newOrderI);
2988  SSD->fixupRegisterOverlaps(cycleInstrs);
2989  }
2990 
2991  LLVM_DEBUG(dump(););
2992 }
2993 
2994 void NodeSet::print(raw_ostream &os) const {
2995  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
2996  << " depth " << MaxDepth << " col " << Colocate << "\n";
2997  for (const auto &I : Nodes)
2998  os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
2999  os << "\n";
3000 }
3001 
3002 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3003 /// Print the schedule information to the given output.
3005  // Iterate over each cycle.
3006  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
3007  // Iterate over each instruction in the cycle.
3008  const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
3009  for (SUnit *CI : cycleInstrs->second) {
3010  os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
3011  os << "(" << CI->NodeNum << ") ";
3012  CI->getInstr()->print(os);
3013  os << "\n";
3014  }
3015  }
3016 }
3017 
3018 /// Utility function used for debugging to print the schedule.
3021 
3022 #endif
3023 
3025  const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) {
3026  unsigned ProcResourceID = 0;
3027 
3028  // We currently limit the resource kinds to 64 and below so that we can use
3029  // uint64_t for Masks
3030  assert(SM.getNumProcResourceKinds() < 64 &&
3031  "Too many kinds of resources, unsupported");
3032  // Create a unique bitmask for every processor resource unit.
3033  // Skip resource at index 0, since it always references 'InvalidUnit'.
3034  Masks.resize(SM.getNumProcResourceKinds());
3035  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3036  const MCProcResourceDesc &Desc = *SM.getProcResource(I);
3037  if (Desc.SubUnitsIdxBegin)
3038  continue;
3039  Masks[I] = 1ULL << ProcResourceID;
3040  ProcResourceID++;
3041  }
3042  // Create a unique bitmask for every processor resource group.
3043  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3044  const MCProcResourceDesc &Desc = *SM.getProcResource(I);
3045  if (!Desc.SubUnitsIdxBegin)
3046  continue;
3047  Masks[I] = 1ULL << ProcResourceID;
3048  for (unsigned U = 0; U < Desc.NumUnits; ++U)
3049  Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];
3050  ProcResourceID++;
3051  }
3052  LLVM_DEBUG({
3053  if (SwpShowResMask) {
3054  dbgs() << "ProcResourceDesc:\n";
3055  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
3056  const MCProcResourceDesc *ProcResource = SM.getProcResource(I);
3057  dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
3058  ProcResource->Name, I, Masks[I],
3059  ProcResource->NumUnits);
3060  }
3061  dbgs() << " -----------------\n";
3062  }
3063  });
3064 }
3065 
3067 
3068  LLVM_DEBUG({
3069  if (SwpDebugResource)
3070  dbgs() << "canReserveResources:\n";
3071  });
3072  if (UseDFA)
3073  return DFAResources->canReserveResources(MID);
3074 
3075  unsigned InsnClass = MID->getSchedClass();
3076  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
3077  if (!SCDesc->isValid()) {
3078  LLVM_DEBUG({
3079  dbgs() << "No valid Schedule Class Desc for schedClass!\n";
3080  dbgs() << "isPseudo:" << MID->isPseudo() << "\n";
3081  });
3082  return true;
3083  }
3084 
3085  const MCWriteProcResEntry *I = STI->getWriteProcResBegin(SCDesc);
3086  const MCWriteProcResEntry *E = STI->getWriteProcResEnd(SCDesc);
3087  for (; I != E; ++I) {
3088  if (!I->Cycles)
3089  continue;
3090  const MCProcResourceDesc *ProcResource =
3091  SM.getProcResource(I->ProcResourceIdx);
3092  unsigned NumUnits = ProcResource->NumUnits;
3093  LLVM_DEBUG({
3094  if (SwpDebugResource)
3095  dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
3096  ProcResource->Name, I->ProcResourceIdx,
3097  ProcResourceCount[I->ProcResourceIdx], NumUnits,
3098  I->Cycles);
3099  });
3100  if (ProcResourceCount[I->ProcResourceIdx] >= NumUnits)
3101  return false;
3102  }
3103  LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return true\n\n";);
3104  return true;
3105 }
3106 
3108  LLVM_DEBUG({
3109  if (SwpDebugResource)
3110  dbgs() << "reserveResources:\n";
3111  });
3112  if (UseDFA)
3113  return DFAResources->reserveResources(MID);
3114 
3115  unsigned InsnClass = MID->getSchedClass();
3116  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
3117  if (!SCDesc->isValid()) {
3118  LLVM_DEBUG({
3119  dbgs() << "No valid Schedule Class Desc for schedClass!\n";
3120  dbgs() << "isPseudo:" << MID->isPseudo() << "\n";
3121  });
3122  return;
3123  }
3124  for (const MCWriteProcResEntry &PRE :
3125  make_range(STI->getWriteProcResBegin(SCDesc),
3126  STI->getWriteProcResEnd(SCDesc))) {
3127  if (!PRE.Cycles)
3128  continue;
3129  ++ProcResourceCount[PRE.ProcResourceIdx];
3130  LLVM_DEBUG({
3131  if (SwpDebugResource) {
3132  const MCProcResourceDesc *ProcResource =
3133  SM.getProcResource(PRE.ProcResourceIdx);
3134  dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
3135  ProcResource->Name, PRE.ProcResourceIdx,
3136  ProcResourceCount[PRE.ProcResourceIdx],
3137  ProcResource->NumUnits, PRE.Cycles);
3138  }
3139  });
3140  }
3141  LLVM_DEBUG({
3142  if (SwpDebugResource)
3143  dbgs() << "reserveResources: done!\n\n";
3144  });
3145 }
3146 
3148  return canReserveResources(&MI.getDesc());
3149 }
3150 
3152  return reserveResources(&MI.getDesc());
3153 }
3154 
3156  if (UseDFA)
3157  return DFAResources->clearResources();
3158  std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0);
3159 }
i
i
Definition: README.txt:29
llvm::SwingSchedulerDAG
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
Definition: MachinePipeliner.h:112
computeLiveOuts
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
Definition: MachinePipeliner.cpp:1595
llvm::cl::Option::getPosition
unsigned getPosition() const
Definition: CommandLine.h:298
llvm::ScheduleDAG::MRI
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:561
llvm::MCSubtargetInfo::getWriteProcResEnd
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
Definition: MCSubtargetInfo.h:171
isIntersect
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
Definition: MachinePipeliner.cpp:1808
llvm::ScheduleDAGTopologicalSort::RemovePred
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
Definition: ScheduleDAG.cpp:566
llvm::TargetInstrInfo::getMemOperandWithOffset
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
Definition: TargetInstrInfo.cpp:1070
ScheduleDAG.h
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
MachineInstr.h
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:494
MathExtras.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::RegPressureTracker::addLiveRegs
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
Definition: RegisterPressure.cpp:707
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Pass::getAnalysis
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
Definition: PassAnalysisSupport.h:230
it
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in it
Definition: README-SSE.txt:81
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:103
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::set_is_subset
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
Definition: SetOperations.h:72
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:379
llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:256
ExperimentalCodeGen
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
llvm::MachinePipeliner::LoopInfo::FBB
MachineBasicBlock * FBB
Definition: MachinePipeliner.h:83
llvm::SwingSchedulerDAG::getHeight
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
Definition: MachinePipeliner.h:238
llvm::SwingSchedulerDAG::finishBlock
void finishBlock() override
Clean up after the software pipeliner runs.
Definition: MachinePipeliner.cpp:651
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:205
llvm::NodeSet::print
void print(raw_ostream &os) const
Definition: MachinePipeliner.cpp:2994
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
llvm::ModuloScheduleExpander::cleanup
void cleanup()
Performs final cleanup after expansion.
Definition: ModuloSchedule.cpp:176
llvm::SDep::Artificial
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
llvm::NodeSet::dump
LLVM_DUMP_METHOD void dump() const
Definition: MachinePipeliner.cpp:3020
llvm::ModuloSchedule
Represents a schedule for a single-block loop.
Definition: ModuloSchedule.h:79
SwpLoopLimit
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
MCInstrDesc.h
SetOperations.h
llvm::MachinePipeliner::MF
MachineFunction * MF
Definition: MachinePipeliner.h:66
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::MCSubtargetInfo::getSchedModel
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
Definition: MCSubtargetInfo.h:163
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:199
llvm::ScheduleDAGInstrs::buildSchedGraph
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
Definition: ScheduleDAGInstrs.cpp:720
llvm::MachineInstr::memoperands_begin
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:709
llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1736
llvm::MCInstrDesc::isPseudo
bool isPseudo() const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
Definition: MCInstrDesc.h:266
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::MachinePipelinerID
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
Definition: MachinePipeliner.cpp:182
llvm::NodeSet::compareRecMII
int compareRecMII(NodeSet &RHS)
Definition: MachinePipeliner.h:380
llvm::MapVector::clear
void clear()
Definition: MapVector.h:89
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:241
llvm::ScheduleDAGTopologicalSort::IsReachable
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
Definition: ScheduleDAG.cpp:723
llvm::MCInstrDesc::getSchedClass
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:618
llvm::MachineMemOperand::getOffset
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
Definition: MachineMemOperand.h:226
getLoopPhiReg
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
Definition: MachinePipeliner.cpp:678
llvm::SMSchedule::normalizeNonPipelinedInstructions
bool normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Definition: MachinePipeliner.cpp:2737
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
EnableSWP
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:630
MapVector.h
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:407
llvm::SDep::Anti
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
llvm::SwingSchedulerDAG::schedule
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
Definition: MachinePipeliner.cpp:472
llvm::MachineRegisterInfo::defusechain_instr_iterator
defusechain_iterator - This class provides iterator support for machine operands in the function that...
Definition: MachineRegisterInfo.h:277
ValueTracking.h
OptimizationRemarkEmitter.h
llvm::MCProcResourceDesc::NumUnits
unsigned NumUnits
Definition: MCSchedule.h:33
llvm::IntervalPressure
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
Definition: RegisterPressure.h:67
MachineBasicBlock.h
llvm::SetVector::iterator
typename vector_type::const_iterator iterator
Definition: SetVector.h:48
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:125
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::DenseMapBase::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:304
llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition: MachineOperand.h:664
llvm::MachineBasicBlock::findDebugLoc
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions.
Definition: MachineBasicBlock.cpp:1373
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:234
EnableSWPOptSize
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
llvm::SDep::Kind
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:52
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:319
llvm::DenseMapIterator
Definition: DenseMap.h:57
llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition: RegisterPressure.h:357
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:488
llvm::TargetInstrInfo::getIncrementValue
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
Definition: TargetInstrInfo.h:1390
DenseMap.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::RegisterMaskPair
Definition: RegisterPressure.h:38
llvm::SwingSchedulerDAG::isLoopCarriedDep
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true)
Return true for an order or output dependence that is loop carried potentially.
Definition: MachinePipeliner.cpp:2272
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
multipleIterations
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
Definition: MachinePipeliner.cpp:2459
llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
ignoreDependence
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
Definition: MachinePipeliner.cpp:1424
i1
Decimal Convert From to National Zoned Signed int_ppc_altivec_bcdcfno i1
Definition: README_P9.txt:147
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1292
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::SwingSchedulerDAG::getZeroLatencyHeight
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
Definition: MachinePipeliner.h:242
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::MCWriteProcResEntry
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63
llvm::NodeSet::empty
bool empty() const
Definition: MachinePipeliner.h:368
llvm::SMSchedule::insert
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
Definition: MachinePipeliner.cpp:2358
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:654
llvm::PeelingModuloScheduleExpander
A reimplementation of ModuloScheduleExpander.
Definition: ModuloSchedule.h:280
llvm::MachinePipeliner::LoopInfo::BrCond
SmallVector< MachineOperand, 4 > BrCond
Definition: MachinePipeliner.h:84
llvm::ModuloScheduleExpander::expand
void expand()
Performs the actual expansion.
Definition: ModuloSchedule.cpp:67
computePath
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
Definition: MachinePipeliner.cpp:1566
llvm::MachineMemOperand::getAAInfo
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Definition: MachineMemOperand.h:257
SwpMaxMii
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:140
EmitTestAnnotations
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:201
llvm::InstrItineraryData::endStage
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
Definition: MCInstrItineraries.h:141
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:103
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MCSchedClassDesc::isValid
bool isValid() const
Definition: MCSchedule.h:127
RegisterPressure.h
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:240
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:89
llvm::MachineInstr::hasOrderedMemoryRef
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
Definition: MachineInstr.cpp:1329
MachineRegisterInfo.h
llvm::SwpEnableCopyToPhi
cl::opt< bool > SwpEnableCopyToPhi
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:585
llvm::MCSchedModel::getNumProcResourceKinds
unsigned getNumProcResourceKinds() const
Definition: MCSchedule.h:335
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1210
AliasAnalysis.h
llvm::SDep::isArtificial
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
llvm::MachinePipeliner::LI
LoopInfo LI
Definition: MachinePipeliner.h:90
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::TargetInstrInfo::isZeroCost
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form.
Definition: TargetInstrInfo.h:1600
llvm::MachineOptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: MachineOptimizationRemarkEmitter.h:57
llvm::TargetInstrInfo::PipelinerLoopInfo
Object returned by analyzeLoopForPipelining.
Definition: TargetInstrInfo.h:728
CommandLine.h
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::SUnit::isBoundaryNode
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:344
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:666
MCInstrItineraries.h
llvm::SUnit::removePred
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
Definition: ScheduleDAG.cpp:175
llvm::NodeOrder
@ NodeOrder
Definition: SIMachineScheduler.h:37
llvm::cl::apply
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1280
MachineLoopInfo.h
llvm::SMSchedule::dump
void dump() const
Utility function used for debugging to print the schedule.
Definition: MachinePipeliner.cpp:3019
llvm::AAResults
Definition: AliasAnalysis.h:511
llvm::SwingSchedulerDAG::getDistance
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
Definition: MachinePipeliner.h:259
llvm::SwingSchedulerDAG::isBackedge
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
Definition: MachinePipeliner.h:249
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
llvm::ScheduleDAGInstrs::UnknownValue
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
Definition: ScheduleDAGInstrs.h:236
llvm::MachineMemOperand::getValue
const Value * getValue() const
Return the base address of the memory access.
Definition: MachineMemOperand.h:210
llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:369
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:501
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
llvm::MachineOperand::setSubReg
void setSubReg(unsigned subReg)
Definition: MachineOperand.h:480
llvm::SwingSchedulerDAG::getMOV
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
Definition: MachinePipeliner.h:226
llvm::MCSchedClassDesc
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:109
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::LaneBitmask::getNone
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:83
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::SMSchedule::getFinalCycle
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Definition: MachinePipeliner.h:546
DFAPacketizer.h
llvm::MachineFunction::deleteMachineInstr
void deleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
Definition: MachineFunction.cpp:418
llvm::SMSchedule::getFirstCycle
int getFirstCycle() const
Return the first cycle in the completed schedule.
Definition: MachinePipeliner.h:543
llvm::ScheduleDAGInstrs::getSUnit
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
Definition: ScheduleDAGInstrs.h:390
false
Definition: StackSlotColoring.cpp:141
TargetOpcodes.h
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::SUnit::NumPreds
unsigned NumPreds
Definition: ScheduleDAG.h:266
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
SDAG
QP Compare Ordered outs ins xscmpudp No SDAG
Definition: README_P9.txt:301
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:187
llvm::SUnit::setInstr
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:366
llvm::ScheduleDAGTopologicalSort::AddPred
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
Definition: ScheduleDAG.cpp:548
llvm::GenericCycle
A possibly irreducible generalization of a Loop.
Definition: GenericCycleInfo.h:48
llvm::SMSchedule::setInitiationInterval
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
Definition: MachinePipeliner.h:536
llvm::Instruction
Definition: Instruction.h:42
llvm::SMSchedule::isValidSchedule
bool isValidSchedule(SwingSchedulerDAG *SSD)
Definition: MachinePipeliner.cpp:2779
SwpPruneDeps
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
llvm::MachineOptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: MachineOptimizationRemarkEmitter.h:109
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:619
llvm::SMSchedule::orderDependence
void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts)
Order the instructions within a cycle so that the definitions occur before the uses.
Definition: MachinePipeliner.cpp:2531
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1777
BitVector.h
llvm::SDep::Output
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
llvm::SlotIndexes
SlotIndexes pass.
Definition: SlotIndexes.h:313
isDependenceBarrier
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA)
Return true if the instruction causes a chain between memory references before and after it.
Definition: MachinePipeliner.cpp:709
llvm::SDep::Data
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
llvm::SMSchedule::reset
void reset()
Definition: MachinePipeliner.h:527
llvm::RegPressureDelta::Excess
PressureChange Excess
Definition: RegisterPressure.h:239
SmallPtrSet.h
llvm::BitVector
Definition: BitVector.h:75
llvm::pdb::PDB_ColorItem::Path
@ Path
llvm::NodeSet
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
Definition: MachinePipeliner.h:320
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:396
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::NodeSet::setColocate
void setColocate(unsigned c)
Definition: MachinePipeliner.h:374
llvm::erase_value
void erase_value(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:1815
llvm::SMSchedule::isLoopCarried
bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi)
Return true if the scheduled Phi has a loop carried operand.
Definition: MachinePipeliner.cpp:2659
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:279
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
DEBUG_TYPE
#define DEBUG_TYPE
Definition: MachinePipeliner.cpp:96
llvm::MachinePipeliner
The main class in the implementation of the target independent software pipeliner pass.
Definition: MachinePipeliner.h:64
llvm::SlotIndexes::insertMachineInstrInMaps
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:534
llvm::SDep::Order
@ Order
Any other ordering dependency.
Definition: ScheduleDAG.h:56
llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:28
llvm::TargetInstrInfo::analyzeLoopForPipelining
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
Definition: TargetInstrInfo.h:769
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:642
llvm::ResourceManager::canReserveResources
bool canReserveResources(const MCInstrDesc *MID) const
Check if the resources occupied by a MCInstrDesc are available in the current state.
Definition: MachinePipeliner.cpp:3066
llvm::TargetInstrInfo::getBaseAndOffsetPosition
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
Definition: TargetInstrInfo.h:1362
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1204
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:656
llvm::SMSchedule::computeStart
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
Definition: MachinePipeliner.cpp:2470
llvm::cl::opt< bool >
llvm::SUnit::isPred
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
Definition: ScheduleDAG.h:431
llvm::getRegState
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
Definition: MachineInstrBuilder.h:528
llvm::MachineLoop
Definition: MachineLoopInfo.h:44
llvm::SetVector::contains
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:209
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:567
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
llvm::getUnderlyingObjects
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Definition: ValueTracking.cpp:4500
llvm::MachinePipeliner::LoopInfo::LoopPipelinerInfo
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopPipelinerInfo
Definition: MachinePipeliner.h:87
llvm::MCProcResourceDesc
Define a kind of processor resource that will be modeled by the scheduler.
Definition: MCSchedule.h:31
llvm::MachinePipeliner::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachinePipeliner.cpp:446
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:413
llvm::MapVector::find
iterator find(const KeyT &Key)
Definition: MapVector.h:148
llvm::MachineOptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
Definition: MachineOptimizationRemarkEmitter.cpp:48
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::ModuloScheduleTestAnnotater::annotate
void annotate()
Performs the annotation.
Definition: ModuloSchedule.cpp:2184
uint64_t
LiveIntervals.h
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::SMSchedule::getInitiationInterval
int getInitiationInterval() const
Return the initiation interval for this schedule.
Definition: MachinePipeliner.h:539
llvm::MachineOptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: MachineOptimizationRemarkEmitter.h:83
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:408
llvm::MachineFunction::CloneMachineInstr
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Definition: MachineFunction.cpp:383
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::MachineRegisterInfo::use_instr_begin
use_instr_iterator use_instr_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:485
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
s
multiplies can be turned into SHL s
Definition: README.txt:370
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::NodeSet::getRecMII
int getRecMII()
Definition: MachinePipeliner.h:382
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:716
llvm::codeview::FrameCookieKind::Copy
@ Copy
llvm::PressureChange::isValid
bool isValid() const
Definition: RegisterPressure.h:112
llvm::MachinePipeliner::ORE
MachineOptimizationRemarkEmitter * ORE
Definition: MachinePipeliner.h:67
I
#define I(x, y, z)
Definition: MD5.cpp:58
PriorityQueue.h
llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
llvm::SwingSchedulerDAG::getALAP
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Definition: MachinePipeliner.h:222
llvm::NodeSet::size
unsigned size() const
Definition: MachinePipeliner.h:366
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
MCRegisterInfo.h
llvm::MachineBasicBlock::getFirstNonPHI
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: MachineBasicBlock.cpp:196
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:166
llvm::PressureChange::getUnitInc
int getUnitInc() const
Definition: RegisterPressure.h:124
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:403
Pipelining
Modulo Software Pipelining
Definition: MachinePipeliner.cpp:191
ArrayRef.h
SwpIgnoreRecMII
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::desc("Ignore RecMII"))
MachineFunctionPass.h
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::pdb::PDB_MemoryType::Stack
@ Stack
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
llvm::MachineRegisterInfo::isAllocatable
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
Definition: MachineRegisterInfo.h:943
llvm::TargetRegisterInfo::getRegPressureSetName
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
llvm::MachinePipeliner::LoopInfo::LoopCompare
MachineInstr * LoopCompare
Definition: MachinePipeliner.h:86
llvm::RegisterPressure::MaxSetPressure
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
Definition: RegisterPressure.h:49
getPhiRegs
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
Definition: MachinePipeliner.cpp:662
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:248
llvm::MachinePipeliner::LoopInfo::LoopInductionVar
MachineInstr * LoopInductionVar
Definition: MachinePipeliner.h:85
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::SMSchedule::isLoopCarriedDefOfUse
bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO)
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
Definition: MachinePipeliner.cpp:2687
isSuccOrder
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
Definition: MachinePipeliner.cpp:686
llvm::NodeSet::setRecMII
void setRecMII(unsigned mii)
Definition: MachinePipeliner.h:372
iterator_range.h
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1256
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::MDNode
Metadata node.
Definition: Metadata.h:937
llvm::PressureChange::getPSet
unsigned getPSet() const
Definition: RegisterPressure.h:114
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:264
llvm::SMSchedule::getInstructions
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
Definition: MachinePipeliner.h:593
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::MachineInstr::dump
void dump() const
Definition: MachineInstr.cpp:1491
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1598
ScheduleDAGMutation.h
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:238
llvm::ScheduleDAG::TRI
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:95
llvm::SwingSchedulerDAG::getZeroLatencyDepth
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
Definition: MachinePipeliner.h:233
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::MCSubtargetInfo::getWriteProcResBegin
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
Definition: MCSubtargetInfo.h:167
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::ModuloScheduleTestAnnotater
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
Definition: ModuloSchedule.h:376
SwpPruneLoopCarried
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:561
llvm::SDep::getSUnit
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::MachinePipeliner::RegClassInfo
RegisterClassInfo RegClassInfo
Definition: MachinePipeliner.h:72
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1624
llvm::MachinePipeliner::ID
static char ID
Definition: MachinePipeliner.h:92
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:491
llvm::ScheduleDAG::MF
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:560
ModuloSchedule.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::PeelingModuloScheduleExpander::expand
void expand()
Definition: ModuloSchedule.cpp:1985
llvm::MachineOptimizationRemarkEmitterPass
The analysis pass.
Definition: MachineOptimizationRemarkEmitter.h:219
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
Compiler.h
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1823
llvm::SDep::Barrier
@ Barrier
An unknown scheduling barrier.
Definition: ScheduleDAG.h:69
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MCProcResourceDesc::SubUnitsIdxBegin
const unsigned * SubUnitsIdxBegin
Definition: MCSchedule.h:53
llvm::MachineInstr::print
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
Definition: MachineInstr.cpp:1528
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:374
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::MCProcResourceDesc::Name
const char * Name
Definition: MCSchedule.h:32
llvm::ScheduleDAGInstrs::dump
void dump() const override
Definition: ScheduleDAGInstrs.cpp:1162
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:288
llvm::Pass::dump
void dump() const
Definition: Pass.cpp:135
llvm::NodeSet::count
unsigned count(SUnit *SU) const
Definition: MachinePipeliner.h:362
llvm::SMSchedule::finalizeSchedule
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Definition: MachinePipeliner.cpp:2949
SwpShowResMask
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:60
llvm::ScheduleDAGInstrs::MISUnitMap
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
Definition: ScheduleDAGInstrs.h:158
llvm::ResourceManager::reserveResources
void reserveResources(const MCInstrDesc *MID)
Reserve the resources occupied by a MCInstrDesc and change the current state to reflect that change.
Definition: MachinePipeliner.cpp:3107
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition: MachineOperand.h:364
SwpDebugResource
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
llvm::MapVector::end
iterator end()
Definition: MapVector.h:72
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::empty
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:98
llvm::InstrStage
These values represent a non-pipelined step in the execution of an instruction.
Definition: MCInstrItineraries.h:58
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::clear
void clear()
Completely clear the SetVector.
Definition: SetVector.h:220
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
Attributes.h
llvm::RegPressureDelta
Store the effects of a change in pressure on things that MI scheduler cares about.
Definition: RegisterPressure.h:238
llvm::MachineInstr::isRegSequence
bool isRegSequence() const
Definition: MachineInstr.h:1284
j
return j(j<< 16)
llvm::ScheduleDAG::SUnits
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1761
llvm::isIdentifiedObject
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Definition: AliasAnalysis.cpp:974
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
llvm::TargetInstrInfo::isPostIncrement
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
Definition: TargetInstrInfo.h:1432
llvm::ResourceManager::initProcResourceVectors
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
Definition: MachinePipeliner.cpp:3024
llvm::SwingSchedulerDAG::getDepth
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
Definition: MachinePipeliner.h:229
swapAntiDependences
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
Definition: MachinePipeliner.cpp:1192
SwpMaxStages
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
llvm::Successor
@ Successor
Definition: SIMachineScheduler.h:35
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
llvm::NodeSet::clear
void clear()
Definition: MachinePipeliner.h:396
llvm::MachineInstr::mayStore
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:1025
llvm::TargetInstrInfo::analyzeBranch
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
Definition: TargetInstrInfo.h:638
llvm::MachinePipeliner::TII
const TargetInstrInfo * TII
Definition: MachinePipeliner.h:71
llvm::SDep::setLatency
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
llvm::MCSchedModel::getProcResource
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition: MCSchedule.h:339
llvm::MachineInstr::hasUnmodeledSideEffects
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
Definition: MachineInstr.cpp:1410
llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
llvm::SMSchedule::computeUnpipelineableNodes
SmallSet< SUnit *, 8 > computeUnpipelineableNodes(SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI)
Determine transitive dependences of unpipelineable instructions.
Definition: MachinePipeliner.cpp:2710
llvm::MachineLoop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: MachineLoopInfo.cpp:101
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1562
llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::ResourceManager::clearResources
void clearResources()
Reset the state.
Definition: MachinePipeliner.cpp:3155
llvm::ScheduleDAGTopologicalSort::InitDAGTopologicalSorting
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
Definition: ScheduleDAG.cpp:438
llvm::SUnit::addPred
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
Definition: ScheduleDAG.cpp:107
llvm::TargetInstrInfo::areMemAccessesTriviallyDisjoint
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const
Sometimes, it is possible for the target to tell, even without aliasing information,...
Definition: TargetInstrInfo.h:1799
llvm::MCSchedModel
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:244
llvm::SUnit::hasPhysRegDefs
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
llvm::LiveIntervals
Definition: LiveIntervals.h:54
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:591
pred_L
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
Definition: MachinePipeliner.cpp:1510
llvm::SMSchedule::print
void print(raw_ostream &os) const
Print the schedule information to the given output.
Definition: MachinePipeliner.cpp:3004
llvm::SwingSchedulerDAG::applyInstrChange
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
Definition: MachinePipeliner.cpp:2217
llvm::SDep::getKind
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
AA
llvm::ScheduleDAGInstrs::BB
MachineBasicBlock * BB
The block in which to insert instructions.
Definition: ScheduleDAGInstrs.h:145
llvm::MachinePipeliner::disabledByPragma
bool disabledByPragma
Definition: MachinePipeliner.h:73
llvm::SMSchedule::earliestCycleInChain
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
Definition: MachinePipeliner.cpp:2411
llvm::Cycle
CycleInfo::CycleT Cycle
Definition: CycleAnalysis.h:28
llvm::MCRegUnitIterator
Definition: MCRegisterInfo.h:680
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:222
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:215
llvm::MachinePipeliner::NumTries
static int NumTries
Definition: MachinePipeliner.h:77
llvm::ScheduleDAGInstrs::dumpNode
void dumpNode(const SUnit &SU) const override
Definition: ScheduleDAGInstrs.cpp:1154
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:278
llvm::rdf::NodeSet
std::set< NodeId > NodeSet
Definition: RDFGraph.h:513
MachineInstrBuilder.h
llvm::ScheduleDAGInstrs::finishBlock
virtual void finishBlock()
Cleans up after scheduling in the given block.
Definition: ScheduleDAGInstrs.cpp:178
succ_L
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
Definition: MachinePipeliner.cpp:1539
llvm::SUnit::isInstr
bool isInstr() const
Returns true if this SUnit refers to a machine instruction as opposed to an SDNode.
Definition: ScheduleDAG.h:362
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
llvm::MemoryLocation::getAfter
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Definition: MemoryLocation.h:270
llvm::SmallVectorImpl::iterator
typename SuperClass::iterator iterator
Definition: SmallVector.h:558
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:92
N
#define N
llvm::MCRegisterInfo::DiffListIterator::isValid
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
Definition: MCRegisterInfo.h:224
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:53
DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:104
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1351
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:494
llvm::MachineLoop::getTopBlock
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
Definition: MachineLoopInfo.cpp:61
llvm::MachinePipeliner::LoopInfo::TBB
MachineBasicBlock * TBB
Definition: MachinePipeliner.h:82
llvm::TargetInstrInfo::PipelinerLoopInfo::shouldIgnoreForPipelining
virtual bool shouldIgnoreForPipelining(const MachineInstr *MI) const =0
Return true if the given instruction should not be pipelined and should be ignored.
LaneBitmask.h
llvm::MCSchedModel::getSchedClassDesc
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition: MCSchedule.h:346
llvm::NodeSet::getNode
SUnit * getNode(unsigned i) const
Definition: MachinePipeliner.h:370
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
MachineMemOperand.h
llvm::SmallVectorImpl< const Value * >
MachineOperand.h
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::NodeSet::insert
bool insert(SUnit *SU)
Definition: MachinePipeliner.h:354
pred
hexagon gen pred
Definition: HexagonGenPredicate.cpp:134
llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::MachinePipeliner::II_setByPragma
unsigned II_setByPragma
Definition: MachinePipeliner.h:74
llvm::ModuloScheduleExpander
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
Definition: ModuloSchedule.h:160
llvm::MCInstrInfo::get
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
MachinePipeliner.h
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::InstrItineraryData::beginStage
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
Definition: MCInstrItineraries.h:135
llvm::SMSchedule::getMaxStageCount
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
Definition: MachinePipeliner.h:588
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::MCSchedModel::hasInstrSchedModel
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition: MCSchedule.h:320
llvm::cl::desc
Definition: CommandLine.h:405
llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:119
raw_ostream.h
llvm::SMSchedule::latestCycleInChain
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
Definition: MachinePipeliner.cpp:2434
llvm::SMSchedule::cycleScheduled
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
Definition: MachinePipeliner.h:581
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:51
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
MachineFunction.h
llvm::MachineInstr::mayRaiseFPException
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
Definition: MachineInstr.h:1045
llvm::MDString
A single uniqued string.
Definition: Metadata.h:612
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::InstrItineraryData::isEmpty
bool isEmpty() const
Returns true if there are no itineraries.
Definition: MCInstrItineraries.h:126
llvm::SwingSchedulerDAG::fixupRegisterOverlaps
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
Definition: MachinePipeliner.cpp:2900
llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition: MCSubtargetInfo.h:76
llvm::ResourceManager
Definition: MachinePipeliner.h:443
llvm::MachineInstr::operands
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:619
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::MachineRegisterInfo::use_instr_end
static use_instr_iterator use_instr_end()
Definition: MachineRegisterInfo.h:488
llvm::SwingSchedulerDAG::getASAP
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
Definition: MachinePipeliner.h:219
TargetRegisterInfo.h
Debug.h
llvm::InstrItineraryData
Itinerary data supplied by a subtarget to be used by a target.
Definition: MCInstrItineraries.h:109
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:280
llvm::SDep::getLatency
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
llvm::SMSchedule::stageScheduled
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
Definition: MachinePipeliner.h:572
llvm::SmallSet::contains
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition: SmallSet.h:238
SetVector.h
llvm::SMSchedule
This class represents the scheduled code.
Definition: MachinePipeliner.h:497
llvm::MachineOperand::isIdenticalTo
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
Definition: MachineOperand.cpp:285
MachineDominators.h
llvm::SwingSchedulerDAG::getInstrBaseReg
unsigned getInstrBaseReg(SUnit *SU)
Return the new base register that was stored away for the changed instruction.
Definition: MachinePipeliner.h:273
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365