LLVM  13.0.0git
MachinePipeliner.cpp
Go to the documentation of this file.
1 //===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
10 //
11 // This SMS implementation is a target-independent back-end pass. When enabled,
12 // the pass runs just prior to the register allocation pass, while the machine
13 // IR is in SSA form. If software pipelining is successful, then the original
14 // loop is replaced by the optimized loop. The optimized loop contains one or
15 // more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
16 // the instructions cannot be scheduled in a given MII, we increase the MII by
17 // one and try again.
18 //
19 // The SMS implementation is an extension of the ScheduleDAGInstrs class. We
20 // represent loop carried dependences in the DAG as order edges to the Phi
21 // nodes. We also perform several passes over the DAG to eliminate unnecessary
22 // edges that inhibit the ability to pipeline. The implementation uses the
23 // DFAPacketizer class to compute the minimum initiation interval and the check
24 // where an instruction may be inserted in the pipelined schedule.
25 //
26 // In order for the SMS pass to work, several target specific hooks need to be
27 // implemented to get information about the loop structure and to rewrite
28 // instructions.
29 //
30 //===----------------------------------------------------------------------===//
31 
32 #include "llvm/ADT/ArrayRef.h"
33 #include "llvm/ADT/BitVector.h"
34 #include "llvm/ADT/DenseMap.h"
35 #include "llvm/ADT/MapVector.h"
36 #include "llvm/ADT/PriorityQueue.h"
37 #include "llvm/ADT/SetOperations.h"
38 #include "llvm/ADT/SetVector.h"
39 #include "llvm/ADT/SmallPtrSet.h"
40 #include "llvm/ADT/SmallSet.h"
41 #include "llvm/ADT/SmallVector.h"
42 #include "llvm/ADT/Statistic.h"
67 #include "llvm/Config/llvm-config.h"
68 #include "llvm/IR/Attributes.h"
69 #include "llvm/IR/DebugLoc.h"
70 #include "llvm/IR/Function.h"
71 #include "llvm/MC/LaneBitmask.h"
72 #include "llvm/MC/MCInstrDesc.h"
74 #include "llvm/MC/MCRegisterInfo.h"
75 #include "llvm/Pass.h"
77 #include "llvm/Support/Compiler.h"
78 #include "llvm/Support/Debug.h"
81 #include <algorithm>
82 #include <cassert>
83 #include <climits>
84 #include <cstdint>
85 #include <deque>
86 #include <functional>
87 #include <iterator>
88 #include <map>
89 #include <memory>
90 #include <tuple>
91 #include <utility>
92 #include <vector>
93 
94 using namespace llvm;
95 
96 #define DEBUG_TYPE "pipeliner"
97 
98 STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
99 STATISTIC(NumPipelined, "Number of loops software pipelined");
100 STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
101 STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
102 STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
103 STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
104 STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
105 STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
106 STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
107 STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
108 STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
109 
110 /// A command line option to turn software pipelining on or off.
111 static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
113  cl::desc("Enable Software Pipelining"));
114 
115 /// A command line option to enable SWP at -Os.
116 static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
117  cl::desc("Enable SWP at Os."), cl::Hidden,
118  cl::init(false));
119 
120 /// A command line argument to limit minimum initial interval for pipelining.
121 static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
122  cl::desc("Size limit for the MII."),
123  cl::Hidden, cl::init(27));
124 
125 /// A command line argument to limit the number of stages in the pipeline.
126 static cl::opt<int>
127  SwpMaxStages("pipeliner-max-stages",
128  cl::desc("Maximum stages allowed in the generated scheduled."),
129  cl::Hidden, cl::init(3));
130 
131 /// A command line option to disable the pruning of chain dependences due to
132 /// an unrelated Phi.
133 static cl::opt<bool>
134  SwpPruneDeps("pipeliner-prune-deps",
135  cl::desc("Prune dependences between unrelated Phi nodes."),
136  cl::Hidden, cl::init(true));
137 
138 /// A command line option to disable the pruning of loop carried order
139 /// dependences.
140 static cl::opt<bool>
141  SwpPruneLoopCarried("pipeliner-prune-loop-carried",
142  cl::desc("Prune loop carried order dependences."),
143  cl::Hidden, cl::init(true));
144 
145 #ifndef NDEBUG
146 static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
147 #endif
148 
149 static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
150  cl::ReallyHidden, cl::init(false),
151  cl::ZeroOrMore, cl::desc("Ignore RecMII"));
152 
153 static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
154  cl::init(false));
155 static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
156  cl::init(false));
157 
159  "pipeliner-annotate-for-testing", cl::Hidden, cl::init(false),
160  cl::desc("Instead of emitting the pipelined code, annotate instructions "
161  "with the generated schedule for feeding into the "
162  "-modulo-schedule-test pass"));
163 
165  "pipeliner-experimental-cg", cl::Hidden, cl::init(false),
166  cl::desc(
167  "Use the experimental peeling code generator for software pipelining"));
168 
169 namespace llvm {
170 
171 // A command line option to enable the CopyToPhi DAG mutation.
173  SwpEnableCopyToPhi("pipeliner-enable-copytophi", cl::ReallyHidden,
174  cl::init(true), cl::ZeroOrMore,
175  cl::desc("Enable CopyToPhi DAG Mutation"));
176 
177 } // end namespace llvm
178 
179 unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
180 char MachinePipeliner::ID = 0;
181 #ifndef NDEBUG
183 #endif
185 
187  "Modulo Software Pipelining", false, false)
193  "Modulo Software Pipelining", false, false)
194 
195 /// The "main" function for implementing Swing Modulo Scheduling.
196 bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
197  if (skipFunction(mf.getFunction()))
198  return false;
199 
200  if (!EnableSWP)
201  return false;
202 
203  if (mf.getFunction().getAttributes().hasAttribute(
204  AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
206  return false;
207 
208  if (!mf.getSubtarget().enableMachinePipeliner())
209  return false;
210 
211  // Cannot pipeline loops without instruction itineraries if we are using
212  // DFA for the pipeliner.
213  if (mf.getSubtarget().useDFAforSMS() &&
214  (!mf.getSubtarget().getInstrItineraryData() ||
215  mf.getSubtarget().getInstrItineraryData()->isEmpty()))
216  return false;
217 
218  MF = &mf;
219  MLI = &getAnalysis<MachineLoopInfo>();
220  MDT = &getAnalysis<MachineDominatorTree>();
221  ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
222  TII = MF->getSubtarget().getInstrInfo();
223  RegClassInfo.runOnMachineFunction(*MF);
224 
225  for (auto &L : *MLI)
226  scheduleLoop(*L);
227 
228  return false;
229 }
230 
231 /// Attempt to perform the SMS algorithm on the specified loop. This function is
232 /// the main entry point for the algorithm. The function identifies candidate
233 /// loops, calculates the minimum initiation interval, and attempts to schedule
234 /// the loop.
235 bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
236  bool Changed = false;
237  for (auto &InnerLoop : L)
238  Changed |= scheduleLoop(*InnerLoop);
239 
240 #ifndef NDEBUG
241  // Stop trying after reaching the limit (if any).
242  int Limit = SwpLoopLimit;
243  if (Limit >= 0) {
244  if (NumTries >= SwpLoopLimit)
245  return Changed;
246  NumTries++;
247  }
248 #endif
249 
250  setPragmaPipelineOptions(L);
251  if (!canPipelineLoop(L)) {
252  LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
253  ORE->emit([&]() {
254  return MachineOptimizationRemarkMissed(DEBUG_TYPE, "canPipelineLoop",
255  L.getStartLoc(), L.getHeader())
256  << "Failed to pipeline loop";
257  });
258 
259  return Changed;
260  }
261 
262  ++NumTrytoPipeline;
263 
264  Changed = swingModuloScheduler(L);
265 
266  return Changed;
267 }
268 
269 void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
270  // Reset the pragma for the next loop in iteration.
271  disabledByPragma = false;
272  II_setByPragma = 0;
273 
274  MachineBasicBlock *LBLK = L.getTopBlock();
275 
276  if (LBLK == nullptr)
277  return;
278 
279  const BasicBlock *BBLK = LBLK->getBasicBlock();
280  if (BBLK == nullptr)
281  return;
282 
283  const Instruction *TI = BBLK->getTerminator();
284  if (TI == nullptr)
285  return;
286 
287  MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);
288  if (LoopID == nullptr)
289  return;
290 
291  assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");
292  assert(LoopID->getOperand(0) == LoopID && "invalid loop");
293 
294  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
295  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
296 
297  if (MD == nullptr)
298  continue;
299 
300  MDString *S = dyn_cast<MDString>(MD->getOperand(0));
301 
302  if (S == nullptr)
303  continue;
304 
305  if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
306  assert(MD->getNumOperands() == 2 &&
307  "Pipeline initiation interval hint metadata should have two operands.");
309  mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
310  assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
311  } else if (S->getString() == "llvm.loop.pipeline.disable") {
312  disabledByPragma = true;
313  }
314  }
315 }
316 
317 /// Return true if the loop can be software pipelined. The algorithm is
318 /// restricted to loops with a single basic block. Make sure that the
319 /// branch in the loop can be analyzed.
320 bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
321  if (L.getNumBlocks() != 1) {
322  ORE->emit([&]() {
323  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
324  L.getStartLoc(), L.getHeader())
325  << "Not a single basic block: "
326  << ore::NV("NumBlocks", L.getNumBlocks());
327  });
328  return false;
329  }
330 
331  if (disabledByPragma) {
332  ORE->emit([&]() {
333  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
334  L.getStartLoc(), L.getHeader())
335  << "Disabled by Pragma.";
336  });
337  return false;
338  }
339 
340  // Check if the branch can't be understood because we can't do pipelining
341  // if that's the case.
342  LI.TBB = nullptr;
343  LI.FBB = nullptr;
344  LI.BrCond.clear();
345  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
346  LLVM_DEBUG(dbgs() << "Unable to analyzeBranch, can NOT pipeline Loop\n");
347  NumFailBranch++;
348  ORE->emit([&]() {
349  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
350  L.getStartLoc(), L.getHeader())
351  << "The branch can't be understood";
352  });
353  return false;
354  }
355 
356  LI.LoopInductionVar = nullptr;
357  LI.LoopCompare = nullptr;
359  LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");
360  NumFailLoop++;
361  ORE->emit([&]() {
362  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
363  L.getStartLoc(), L.getHeader())
364  << "The loop structure is not supported";
365  });
366  return false;
367  }
368 
369  if (!L.getLoopPreheader()) {
370  LLVM_DEBUG(dbgs() << "Preheader not found, can NOT pipeline Loop\n");
371  NumFailPreheader++;
372  ORE->emit([&]() {
373  return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
374  L.getStartLoc(), L.getHeader())
375  << "No loop preheader found";
376  });
377  return false;
378  }
379 
380  // Remove any subregisters from inputs to phi nodes.
381  preprocessPhiNodes(*L.getHeader());
382  return true;
383 }
384 
385 void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
387  SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
388 
389  for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) {
390  MachineOperand &DefOp = PI.getOperand(0);
391  assert(DefOp.getSubReg() == 0);
392  auto *RC = MRI.getRegClass(DefOp.getReg());
393 
394  for (unsigned i = 1, n = PI.getNumOperands(); i != n; i += 2) {
395  MachineOperand &RegOp = PI.getOperand(i);
396  if (RegOp.getSubReg() == 0)
397  continue;
398 
399  // If the operand uses a subregister, replace it with a new register
400  // without subregisters, and generate a copy to the new register.
401  Register NewReg = MRI.createVirtualRegister(RC);
402  MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
404  const DebugLoc &DL = PredB.findDebugLoc(At);
405  auto Copy = BuildMI(PredB, At, DL, TII->get(TargetOpcode::COPY), NewReg)
406  .addReg(RegOp.getReg(), getRegState(RegOp),
407  RegOp.getSubReg());
408  Slots.insertMachineInstrInMaps(*Copy);
409  RegOp.setReg(NewReg);
410  RegOp.setSubReg(0);
411  }
412  }
413 }
414 
415 /// The SMS algorithm consists of the following main steps:
416 /// 1. Computation and analysis of the dependence graph.
417 /// 2. Ordering of the nodes (instructions).
418 /// 3. Attempt to Schedule the loop.
419 bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
420  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
421 
422  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
424 
426  // The kernel should not include any terminator instructions. These
427  // will be added back later.
428  SMS.startBlock(MBB);
429 
430  // Compute the number of 'real' instructions in the basic block by
431  // ignoring terminators.
432  unsigned size = MBB->size();
434  E = MBB->instr_end();
435  I != E; ++I, --size)
436  ;
437 
438  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
439  SMS.schedule();
440  SMS.exitRegion();
441 
442  SMS.finishBlock();
443  return SMS.hasNewSchedule();
444 }
445 
454 }
455 
456 void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
457  if (II_setByPragma > 0)
458  MII = II_setByPragma;
459  else
460  MII = std::max(ResMII, RecMII);
461 }
462 
463 void SwingSchedulerDAG::setMAX_II() {
464  if (II_setByPragma > 0)
465  MAX_II = II_setByPragma;
466  else
467  MAX_II = MII + 10;
468 }
469 
470 /// We override the schedule function in ScheduleDAGInstrs to implement the
471 /// scheduling part of the Swing Modulo Scheduling algorithm.
473  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
474  buildSchedGraph(AA);
475  addLoopCarriedDependences(AA);
476  updatePhiDependences();
478  changeDependences();
479  postprocessDAG();
480  LLVM_DEBUG(dump());
481 
482  NodeSetType NodeSets;
483  findCircuits(NodeSets);
484  NodeSetType Circuits = NodeSets;
485 
486  // Calculate the MII.
487  unsigned ResMII = calculateResMII();
488  unsigned RecMII = calculateRecMII(NodeSets);
489 
490  fuseRecs(NodeSets);
491 
492  // This flag is used for testing and can cause correctness problems.
493  if (SwpIgnoreRecMII)
494  RecMII = 0;
495 
496  setMII(ResMII, RecMII);
497  setMAX_II();
498 
499  LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
500  << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
501 
502  // Can't schedule a loop without a valid MII.
503  if (MII == 0) {
504  LLVM_DEBUG(dbgs() << "Invalid Minimal Initiation Interval: 0\n");
505  NumFailZeroMII++;
506  Pass.ORE->emit([&]() {
508  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
509  << "Invalid Minimal Initiation Interval: 0";
510  });
511  return;
512  }
513 
514  // Don't pipeline large loops.
515  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {
516  LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
517  << ", we don't pipleline large loops\n");
518  NumFailLargeMaxMII++;
519  Pass.ORE->emit([&]() {
521  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
522  << "Minimal Initiation Interval too large: "
523  << ore::NV("MII", (int)MII) << " > "
524  << ore::NV("SwpMaxMii", SwpMaxMii) << "."
525  << "Refer to -pipeliner-max-mii.";
526  });
527  return;
528  }
529 
530  computeNodeFunctions(NodeSets);
531 
532  registerPressureFilter(NodeSets);
533 
534  colocateNodeSets(NodeSets);
535 
536  checkNodeSets(NodeSets);
537 
538  LLVM_DEBUG({
539  for (auto &I : NodeSets) {
540  dbgs() << " Rec NodeSet ";
541  I.dump();
542  }
543  });
544 
545  llvm::stable_sort(NodeSets, std::greater<NodeSet>());
546 
547  groupRemainingNodes(NodeSets);
548 
549  removeDuplicateNodes(NodeSets);
550 
551  LLVM_DEBUG({
552  for (auto &I : NodeSets) {
553  dbgs() << " NodeSet ";
554  I.dump();
555  }
556  });
557 
558  computeNodeOrder(NodeSets);
559 
560  // check for node order issues
561  checkValidNodeOrder(Circuits);
562 
563  SMSchedule Schedule(Pass.MF);
564  Scheduled = schedulePipeline(Schedule);
565 
566  if (!Scheduled){
567  LLVM_DEBUG(dbgs() << "No schedule found, return\n");
568  NumFailNoSchedule++;
569  Pass.ORE->emit([&]() {
571  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
572  << "Unable to find schedule";
573  });
574  return;
575  }
576 
577  unsigned numStages = Schedule.getMaxStageCount();
578  // No need to generate pipeline if there are no overlapped iterations.
579  if (numStages == 0) {
580  LLVM_DEBUG(dbgs() << "No overlapped iterations, skip.\n");
581  NumFailZeroStage++;
582  Pass.ORE->emit([&]() {
584  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
585  << "No need to pipeline - no overlapped iterations in schedule.";
586  });
587  return;
588  }
589  // Check that the maximum stage count is less than user-defined limit.
590  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) {
591  LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
592  << " : too many stages, abort\n");
593  NumFailLargeMaxStage++;
594  Pass.ORE->emit([&]() {
596  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
597  << "Too many stages in schedule: "
598  << ore::NV("numStages", (int)numStages) << " > "
599  << ore::NV("SwpMaxStages", SwpMaxStages)
600  << ". Refer to -pipeliner-max-stages.";
601  });
602  return;
603  }
604 
605  Pass.ORE->emit([&]() {
607  Loop.getHeader())
608  << "Pipelined succesfully!";
609  });
610 
611  // Generate the schedule as a ModuloSchedule.
612  DenseMap<MachineInstr *, int> Cycles, Stages;
613  std::vector<MachineInstr *> OrderedInsts;
614  for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle();
615  ++Cycle) {
616  for (SUnit *SU : Schedule.getInstructions(Cycle)) {
617  OrderedInsts.push_back(SU->getInstr());
618  Cycles[SU->getInstr()] = Cycle;
619  Stages[SU->getInstr()] = Schedule.stageScheduled(SU);
620  }
621  }
623  for (auto &KV : NewMIs) {
624  Cycles[KV.first] = Cycles[KV.second];
625  Stages[KV.first] = Stages[KV.second];
626  NewInstrChanges[KV.first] = InstrChanges[getSUnit(KV.first)];
627  }
628 
629  ModuloSchedule MS(MF, &Loop, std::move(OrderedInsts), std::move(Cycles),
630  std::move(Stages));
631  if (EmitTestAnnotations) {
632  assert(NewInstrChanges.empty() &&
633  "Cannot serialize a schedule with InstrChanges!");
635  MSTI.annotate();
636  return;
637  }
638  // The experimental code generator can't work if there are InstChanges.
639  if (ExperimentalCodeGen && NewInstrChanges.empty()) {
640  PeelingModuloScheduleExpander MSE(MF, MS, &LIS);
641  MSE.expand();
642  } else {
643  ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
644  MSE.expand();
645  MSE.cleanup();
646  }
647  ++NumPipelined;
648 }
649 
650 /// Clean up after the software pipeliner runs.
652  for (auto &KV : NewMIs)
653  MF.DeleteMachineInstr(KV.second);
654  NewMIs.clear();
655 
656  // Call the superclass.
658 }
659 
660 /// Return the register values for the operands of a Phi instruction.
661 /// This function assume the instruction is a Phi.
663  unsigned &InitVal, unsigned &LoopVal) {
664  assert(Phi.isPHI() && "Expecting a Phi.");
665 
666  InitVal = 0;
667  LoopVal = 0;
668  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
669  if (Phi.getOperand(i + 1).getMBB() != Loop)
670  InitVal = Phi.getOperand(i).getReg();
671  else
672  LoopVal = Phi.getOperand(i).getReg();
673 
674  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
675 }
676 
677 /// Return the Phi register value that comes the loop block.
678 static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
679  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
680  if (Phi.getOperand(i + 1).getMBB() == LoopBB)
681  return Phi.getOperand(i).getReg();
682  return 0;
683 }
684 
685 /// Return true if SUb can be reached from SUa following the chain edges.
686 static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
687  SmallPtrSet<SUnit *, 8> Visited;
688  SmallVector<SUnit *, 8> Worklist;
689  Worklist.push_back(SUa);
690  while (!Worklist.empty()) {
691  const SUnit *SU = Worklist.pop_back_val();
692  for (auto &SI : SU->Succs) {
693  SUnit *SuccSU = SI.getSUnit();
694  if (SI.getKind() == SDep::Order) {
695  if (Visited.count(SuccSU))
696  continue;
697  if (SuccSU == SUb)
698  return true;
699  Worklist.push_back(SuccSU);
700  Visited.insert(SuccSU);
701  }
702  }
703  }
704  return false;
705 }
706 
707 /// Return true if the instruction causes a chain between memory
708 /// references before and after it.
710  return MI.isCall() || MI.mayRaiseFPException() ||
711  MI.hasUnmodeledSideEffects() ||
712  (MI.hasOrderedMemoryRef() &&
713  (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
714 }
715 
716 /// Return the underlying objects for the memory references of an instruction.
717 /// This function calls the code in ValueTracking, but first checks that the
718 /// instruction has a memory operand.
721  if (!MI->hasOneMemOperand())
722  return;
723  MachineMemOperand *MM = *MI->memoperands_begin();
724  if (!MM->getValue())
725  return;
726  getUnderlyingObjects(MM->getValue(), Objs);
727  for (const Value *V : Objs) {
728  if (!isIdentifiedObject(V)) {
729  Objs.clear();
730  return;
731  }
732  Objs.push_back(V);
733  }
734 }
735 
736 /// Add a chain edge between a load and store if the store can be an
737 /// alias of the load on a subsequent iteration, i.e., a loop carried
738 /// dependence. This code is very similar to the code in ScheduleDAGInstrs
739 /// but that code doesn't create loop carried dependences.
740 void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
744  for (auto &SU : SUnits) {
745  MachineInstr &MI = *SU.getInstr();
746  if (isDependenceBarrier(MI, AA))
747  PendingLoads.clear();
748  else if (MI.mayLoad()) {
750  ::getUnderlyingObjects(&MI, Objs);
751  if (Objs.empty())
752  Objs.push_back(UnknownValue);
753  for (auto V : Objs) {
754  SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
755  SUs.push_back(&SU);
756  }
757  } else if (MI.mayStore()) {
759  ::getUnderlyingObjects(&MI, Objs);
760  if (Objs.empty())
761  Objs.push_back(UnknownValue);
762  for (auto V : Objs) {
764  PendingLoads.find(V);
765  if (I == PendingLoads.end())
766  continue;
767  for (auto Load : I->second) {
768  if (isSuccOrder(Load, &SU))
769  continue;
770  MachineInstr &LdMI = *Load->getInstr();
771  // First, perform the cheaper check that compares the base register.
772  // If they are the same and the load offset is less than the store
773  // offset, then mark the dependence as loop carried potentially.
774  const MachineOperand *BaseOp1, *BaseOp2;
775  int64_t Offset1, Offset2;
776  bool Offset1IsScalable, Offset2IsScalable;
777  if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1,
778  Offset1IsScalable, TRI) &&
779  TII->getMemOperandWithOffset(MI, BaseOp2, Offset2,
780  Offset2IsScalable, TRI)) {
781  if (BaseOp1->isIdenticalTo(*BaseOp2) &&
782  Offset1IsScalable == Offset2IsScalable &&
783  (int)Offset1 < (int)Offset2) {
785  "What happened to the chain edge?");
786  SDep Dep(Load, SDep::Barrier);
787  Dep.setLatency(1);
788  SU.addPred(Dep);
789  continue;
790  }
791  }
792  // Second, the more expensive check that uses alias analysis on the
793  // base registers. If they alias, and the load offset is less than
794  // the store offset, the mark the dependence as loop carried.
795  if (!AA) {
796  SDep Dep(Load, SDep::Barrier);
797  Dep.setLatency(1);
798  SU.addPred(Dep);
799  continue;
800  }
801  MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
802  MachineMemOperand *MMO2 = *MI.memoperands_begin();
803  if (!MMO1->getValue() || !MMO2->getValue()) {
804  SDep Dep(Load, SDep::Barrier);
805  Dep.setLatency(1);
806  SU.addPred(Dep);
807  continue;
808  }
809  if (MMO1->getValue() == MMO2->getValue() &&
810  MMO1->getOffset() <= MMO2->getOffset()) {
811  SDep Dep(Load, SDep::Barrier);
812  Dep.setLatency(1);
813  SU.addPred(Dep);
814  continue;
815  }
816  if (!AA->isNoAlias(
817  MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
819  MMO2->getAAInfo()))) {
820  SDep Dep(Load, SDep::Barrier);
821  Dep.setLatency(1);
822  SU.addPred(Dep);
823  }
824  }
825  }
826  }
827  }
828 }
829 
830 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
831 /// processes dependences for PHIs. This function adds true dependences
832 /// from a PHI to a use, and a loop carried dependence from the use to the
833 /// PHI. The loop carried dependence is represented as an anti dependence
834 /// edge. This function also removes chain dependences between unrelated
835 /// PHIs.
836 void SwingSchedulerDAG::updatePhiDependences() {
837  SmallVector<SDep, 4> RemoveDeps;
839 
840  // Iterate over each DAG node.
841  for (SUnit &I : SUnits) {
842  RemoveDeps.clear();
843  // Set to true if the instruction has an operand defined by a Phi.
844  unsigned HasPhiUse = 0;
845  unsigned HasPhiDef = 0;
846  MachineInstr *MI = I.getInstr();
847  // Iterate over each operand, and we process the definitions.
848  for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
849  MOE = MI->operands_end();
850  MOI != MOE; ++MOI) {
851  if (!MOI->isReg())
852  continue;
853  Register Reg = MOI->getReg();
854  if (MOI->isDef()) {
855  // If the register is used by a Phi, then create an anti dependence.
857  UI = MRI.use_instr_begin(Reg),
858  UE = MRI.use_instr_end();
859  UI != UE; ++UI) {
860  MachineInstr *UseMI = &*UI;
861  SUnit *SU = getSUnit(UseMI);
862  if (SU != nullptr && UseMI->isPHI()) {
863  if (!MI->isPHI()) {
864  SDep Dep(SU, SDep::Anti, Reg);
865  Dep.setLatency(1);
866  I.addPred(Dep);
867  } else {
868  HasPhiDef = Reg;
869  // Add a chain edge to a dependent Phi that isn't an existing
870  // predecessor.
871  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
872  I.addPred(SDep(SU, SDep::Barrier));
873  }
874  }
875  }
876  } else if (MOI->isUse()) {
877  // If the register is defined by a Phi, then create a true dependence.
879  if (DefMI == nullptr)
880  continue;
881  SUnit *SU = getSUnit(DefMI);
882  if (SU != nullptr && DefMI->isPHI()) {
883  if (!MI->isPHI()) {
884  SDep Dep(SU, SDep::Data, Reg);
885  Dep.setLatency(0);
886  ST.adjustSchedDependency(SU, 0, &I, MI->getOperandNo(MOI), Dep);
887  I.addPred(Dep);
888  } else {
889  HasPhiUse = Reg;
890  // Add a chain edge to a dependent Phi that isn't an existing
891  // predecessor.
892  if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
893  I.addPred(SDep(SU, SDep::Barrier));
894  }
895  }
896  }
897  }
898  // Remove order dependences from an unrelated Phi.
899  if (!SwpPruneDeps)
900  continue;
901  for (auto &PI : I.Preds) {
902  MachineInstr *PMI = PI.getSUnit()->getInstr();
903  if (PMI->isPHI() && PI.getKind() == SDep::Order) {
904  if (I.getInstr()->isPHI()) {
905  if (PMI->getOperand(0).getReg() == HasPhiUse)
906  continue;
907  if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
908  continue;
909  }
910  RemoveDeps.push_back(PI);
911  }
912  }
913  for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
914  I.removePred(RemoveDeps[i]);
915  }
916 }
917 
918 /// Iterate over each DAG node and see if we can change any dependences
919 /// in order to reduce the recurrence MII.
920 void SwingSchedulerDAG::changeDependences() {
921  // See if an instruction can use a value from the previous iteration.
922  // If so, we update the base and offset of the instruction and change
923  // the dependences.
924  for (SUnit &I : SUnits) {
925  unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
926  int64_t NewOffset = 0;
927  if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
928  NewOffset))
929  continue;
930 
931  // Get the MI and SUnit for the instruction that defines the original base.
932  Register OrigBase = I.getInstr()->getOperand(BasePos).getReg();
933  MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
934  if (!DefMI)
935  continue;
936  SUnit *DefSU = getSUnit(DefMI);
937  if (!DefSU)
938  continue;
939  // Get the MI and SUnit for the instruction that defins the new base.
940  MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
941  if (!LastMI)
942  continue;
943  SUnit *LastSU = getSUnit(LastMI);
944  if (!LastSU)
945  continue;
946 
947  if (Topo.IsReachable(&I, LastSU))
948  continue;
949 
950  // Remove the dependence. The value now depends on a prior iteration.
952  for (const SDep &P : I.Preds)
953  if (P.getSUnit() == DefSU)
954  Deps.push_back(P);
955  for (int i = 0, e = Deps.size(); i != e; i++) {
956  Topo.RemovePred(&I, Deps[i].getSUnit());
957  I.removePred(Deps[i]);
958  }
959  // Remove the chain dependence between the instructions.
960  Deps.clear();
961  for (auto &P : LastSU->Preds)
962  if (P.getSUnit() == &I && P.getKind() == SDep::Order)
963  Deps.push_back(P);
964  for (int i = 0, e = Deps.size(); i != e; i++) {
965  Topo.RemovePred(LastSU, Deps[i].getSUnit());
966  LastSU->removePred(Deps[i]);
967  }
968 
969  // Add a dependence between the new instruction and the instruction
970  // that defines the new base.
971  SDep Dep(&I, SDep::Anti, NewBase);
972  Topo.AddPred(LastSU, &I);
973  LastSU->addPred(Dep);
974 
975  // Remember the base and offset information so that we can update the
976  // instruction during code generation.
977  InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
978  }
979 }
980 
981 namespace {
982 
983 // FuncUnitSorter - Comparison operator used to sort instructions by
984 // the number of functional unit choices.
985 struct FuncUnitSorter {
986  const InstrItineraryData *InstrItins;
987  const MCSubtargetInfo *STI;
989 
990  FuncUnitSorter(const TargetSubtargetInfo &TSI)
991  : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
992 
993  // Compute the number of functional unit alternatives needed
994  // at each stage, and take the minimum value. We prioritize the
995  // instructions by the least number of choices first.
996  unsigned minFuncUnits(const MachineInstr *Inst,
997  InstrStage::FuncUnits &F) const {
998  unsigned SchedClass = Inst->getDesc().getSchedClass();
999  unsigned min = UINT_MAX;
1000  if (InstrItins && !InstrItins->isEmpty()) {
1001  for (const InstrStage &IS :
1002  make_range(InstrItins->beginStage(SchedClass),
1003  InstrItins->endStage(SchedClass))) {
1004  InstrStage::FuncUnits funcUnits = IS.getUnits();
1005  unsigned numAlternatives = countPopulation(funcUnits);
1006  if (numAlternatives < min) {
1007  min = numAlternatives;
1008  F = funcUnits;
1009  }
1010  }
1011  return min;
1012  }
1013  if (STI && STI->getSchedModel().hasInstrSchedModel()) {
1014  const MCSchedClassDesc *SCDesc =
1015  STI->getSchedModel().getSchedClassDesc(SchedClass);
1016  if (!SCDesc->isValid())
1017  // No valid Schedule Class Desc for schedClass, should be
1018  // Pseudo/PostRAPseudo
1019  return min;
1020 
1021  for (const MCWriteProcResEntry &PRE :
1022  make_range(STI->getWriteProcResBegin(SCDesc),
1023  STI->getWriteProcResEnd(SCDesc))) {
1024  if (!PRE.Cycles)
1025  continue;
1026  const MCProcResourceDesc *ProcResource =
1027  STI->getSchedModel().getProcResource(PRE.ProcResourceIdx);
1028  unsigned NumUnits = ProcResource->NumUnits;
1029  if (NumUnits < min) {
1030  min = NumUnits;
1031  F = PRE.ProcResourceIdx;
1032  }
1033  }
1034  return min;
1035  }
1036  llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
1037  }
1038 
1039  // Compute the critical resources needed by the instruction. This
1040  // function records the functional units needed by instructions that
1041  // must use only one functional unit. We use this as a tie breaker
1042  // for computing the resource MII. The instrutions that require
1043  // the same, highly used, functional unit have high priority.
1044  void calcCriticalResources(MachineInstr &MI) {
1045  unsigned SchedClass = MI.getDesc().getSchedClass();
1046  if (InstrItins && !InstrItins->isEmpty()) {
1047  for (const InstrStage &IS :
1048  make_range(InstrItins->beginStage(SchedClass),
1049  InstrItins->endStage(SchedClass))) {
1050  InstrStage::FuncUnits FuncUnits = IS.getUnits();
1051  if (countPopulation(FuncUnits) == 1)
1052  Resources[FuncUnits]++;
1053  }
1054  return;
1055  }
1056  if (STI && STI->getSchedModel().hasInstrSchedModel()) {
1057  const MCSchedClassDesc *SCDesc =
1058  STI->getSchedModel().getSchedClassDesc(SchedClass);
1059  if (!SCDesc->isValid())
1060  // No valid Schedule Class Desc for schedClass, should be
1061  // Pseudo/PostRAPseudo
1062  return;
1063 
1064  for (const MCWriteProcResEntry &PRE :
1065  make_range(STI->getWriteProcResBegin(SCDesc),
1066  STI->getWriteProcResEnd(SCDesc))) {
1067  if (!PRE.Cycles)
1068  continue;
1069  Resources[PRE.ProcResourceIdx]++;
1070  }
1071  return;
1072  }
1073  llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
1074  }
1075 
1076  /// Return true if IS1 has less priority than IS2.
1077  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
1078  InstrStage::FuncUnits F1 = 0, F2 = 0;
1079  unsigned MFUs1 = minFuncUnits(IS1, F1);
1080  unsigned MFUs2 = minFuncUnits(IS2, F2);
1081  if (MFUs1 == MFUs2)
1082  return Resources.lookup(F1) < Resources.lookup(F2);
1083  return MFUs1 > MFUs2;
1084  }
1085 };
1086 
1087 } // end anonymous namespace
1088 
1089 /// Calculate the resource constrained minimum initiation interval for the
1090 /// specified loop. We use the DFA to model the resources needed for
1091 /// each instruction, and we ignore dependences. A different DFA is created
1092 /// for each cycle that is required. When adding a new instruction, we attempt
1093 /// to add it to each existing DFA, until a legal space is found. If the
1094 /// instruction cannot be reserved in an existing DFA, we create a new one.
1095 unsigned SwingSchedulerDAG::calculateResMII() {
1096 
1097  LLVM_DEBUG(dbgs() << "calculateResMII:\n");
1100  Resources.push_back(new ResourceManager(&MF.getSubtarget()));
1101 
1102  // Sort the instructions by the number of available choices for scheduling,
1103  // least to most. Use the number of critical resources as the tie breaker.
1104  FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget());
1106  E = MBB->getFirstTerminator();
1107  I != E; ++I)
1108  FUS.calcCriticalResources(*I);
1110  FuncUnitOrder(FUS);
1111 
1113  E = MBB->getFirstTerminator();
1114  I != E; ++I)
1115  FuncUnitOrder.push(&*I);
1116 
1117  while (!FuncUnitOrder.empty()) {
1118  MachineInstr *MI = FuncUnitOrder.top();
1119  FuncUnitOrder.pop();
1120  if (TII->isZeroCost(MI->getOpcode()))
1121  continue;
1122  // Attempt to reserve the instruction in an existing DFA. At least one
1123  // DFA is needed for each cycle.
1124  unsigned NumCycles = getSUnit(MI)->Latency;
1125  unsigned ReservedCycles = 0;
1126  SmallVectorImpl<ResourceManager *>::iterator RI = Resources.begin();
1127  SmallVectorImpl<ResourceManager *>::iterator RE = Resources.end();
1128  LLVM_DEBUG({
1129  dbgs() << "Trying to reserve resource for " << NumCycles
1130  << " cycles for \n";
1131  MI->dump();
1132  });
1133  for (unsigned C = 0; C < NumCycles; ++C)
1134  while (RI != RE) {
1135  if ((*RI)->canReserveResources(*MI)) {
1136  (*RI)->reserveResources(*MI);
1137  ++ReservedCycles;
1138  break;
1139  }
1140  RI++;
1141  }
1142  LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
1143  << ", NumCycles:" << NumCycles << "\n");
1144  // Add new DFAs, if needed, to reserve resources.
1145  for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
1147  << "NewResource created to reserve resources"
1148  << "\n");
1149  ResourceManager *NewResource = new ResourceManager(&MF.getSubtarget());
1150  assert(NewResource->canReserveResources(*MI) && "Reserve error.");
1151  NewResource->reserveResources(*MI);
1152  Resources.push_back(NewResource);
1153  }
1154  }
1155  int Resmii = Resources.size();
1156  LLVM_DEBUG(dbgs() << "Return Res MII:" << Resmii << "\n");
1157  // Delete the memory for each of the DFAs that were created earlier.
1158  for (ResourceManager *RI : Resources) {
1159  ResourceManager *D = RI;
1160  delete D;
1161  }
1162  Resources.clear();
1163  return Resmii;
1164 }
1165 
1166 /// Calculate the recurrence-constrainted minimum initiation interval.
1167 /// Iterate over each circuit. Compute the delay(c) and distance(c)
1168 /// for each circuit. The II needs to satisfy the inequality
1169 /// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
1170 /// II that satisfies the inequality, and the RecMII is the maximum
1171 /// of those values.
1172 unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
1173  unsigned RecMII = 0;
1174 
1175  for (NodeSet &Nodes : NodeSets) {
1176  if (Nodes.empty())
1177  continue;
1178 
1179  unsigned Delay = Nodes.getLatency();
1180  unsigned Distance = 1;
1181 
1182  // ii = ceil(delay / distance)
1183  unsigned CurMII = (Delay + Distance - 1) / Distance;
1184  Nodes.setRecMII(CurMII);
1185  if (CurMII > RecMII)
1186  RecMII = CurMII;
1187  }
1188 
1189  return RecMII;
1190 }
1191 
1192 /// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1193 /// but we do this to find the circuits, and then change them back.
1194 static void swapAntiDependences(std::vector<SUnit> &SUnits) {
1196  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1197  SUnit *SU = &SUnits[i];
1198  for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
1199  IP != EP; ++IP) {
1200  if (IP->getKind() != SDep::Anti)
1201  continue;
1202  DepsAdded.push_back(std::make_pair(SU, *IP));
1203  }
1204  }
1205  for (std::pair<SUnit *, SDep> &P : DepsAdded) {
1206  // Remove this anti dependency and add one in the reverse direction.
1207  SUnit *SU = P.first;
1208  SDep &D = P.second;
1209  SUnit *TargetSU = D.getSUnit();
1210  unsigned Reg = D.getReg();
1211  unsigned Lat = D.getLatency();
1212  SU->removePred(D);
1213  SDep Dep(SU, SDep::Anti, Reg);
1214  Dep.setLatency(Lat);
1215  TargetSU->addPred(Dep);
1216  }
1217 }
1218 
1219 /// Create the adjacency structure of the nodes in the graph.
1220 void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
1221  SwingSchedulerDAG *DAG) {
1222  BitVector Added(SUnits.size());
1223  DenseMap<int, int> OutputDeps;
1224  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1225  Added.reset();
1226  // Add any successor to the adjacency matrix and exclude duplicates.
1227  for (auto &SI : SUnits[i].Succs) {
1228  // Only create a back-edge on the first and last nodes of a dependence
1229  // chain. This records any chains and adds them later.
1230  if (SI.getKind() == SDep::Output) {
1231  int N = SI.getSUnit()->NodeNum;
1232  int BackEdge = i;
1233  auto Dep = OutputDeps.find(BackEdge);
1234  if (Dep != OutputDeps.end()) {
1235  BackEdge = Dep->second;
1236  OutputDeps.erase(Dep);
1237  }
1238  OutputDeps[N] = BackEdge;
1239  }
1240  // Do not process a boundary node, an artificial node.
1241  // A back-edge is processed only if it goes to a Phi.
1242  if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||
1243  (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
1244  continue;
1245  int N = SI.getSUnit()->NodeNum;
1246  if (!Added.test(N)) {
1247  AdjK[i].push_back(N);
1248  Added.set(N);
1249  }
1250  }
1251  // A chain edge between a store and a load is treated as a back-edge in the
1252  // adjacency matrix.
1253  for (auto &PI : SUnits[i].Preds) {
1254  if (!SUnits[i].getInstr()->mayStore() ||
1255  !DAG->isLoopCarriedDep(&SUnits[i], PI, false))
1256  continue;
1257  if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
1258  int N = PI.getSUnit()->NodeNum;
1259  if (!Added.test(N)) {
1260  AdjK[i].push_back(N);
1261  Added.set(N);
1262  }
1263  }
1264  }
1265  }
1266  // Add back-edges in the adjacency matrix for the output dependences.
1267  for (auto &OD : OutputDeps)
1268  if (!Added.test(OD.second)) {
1269  AdjK[OD.first].push_back(OD.second);
1270  Added.set(OD.second);
1271  }
1272 }
1273 
1274 /// Identify an elementary circuit in the dependence graph starting at the
1275 /// specified node.
1276 bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
1277  bool HasBackedge) {
1278  SUnit *SV = &SUnits[V];
1279  bool F = false;
1280  Stack.insert(SV);
1281  Blocked.set(V);
1282 
1283  for (auto W : AdjK[V]) {
1284  if (NumPaths > MaxPaths)
1285  break;
1286  if (W < S)
1287  continue;
1288  if (W == S) {
1289  if (!HasBackedge)
1290  NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
1291  F = true;
1292  ++NumPaths;
1293  break;
1294  } else if (!Blocked.test(W)) {
1295  if (circuit(W, S, NodeSets,
1296  Node2Idx->at(W) < Node2Idx->at(V) ? true : HasBackedge))
1297  F = true;
1298  }
1299  }
1300 
1301  if (F)
1302  unblock(V);
1303  else {
1304  for (auto W : AdjK[V]) {
1305  if (W < S)
1306  continue;
1307  if (B[W].count(SV) == 0)
1308  B[W].insert(SV);
1309  }
1310  }
1311  Stack.pop_back();
1312  return F;
1313 }
1314 
1315 /// Unblock a node in the circuit finding algorithm.
1316 void SwingSchedulerDAG::Circuits::unblock(int U) {
1317  Blocked.reset(U);
1318  SmallPtrSet<SUnit *, 4> &BU = B[U];
1319  while (!BU.empty()) {
1321  assert(SI != BU.end() && "Invalid B set.");
1322  SUnit *W = *SI;
1323  BU.erase(W);
1324  if (Blocked.test(W->NodeNum))
1325  unblock(W->NodeNum);
1326  }
1327 }
1328 
1329 /// Identify all the elementary circuits in the dependence graph using
1330 /// Johnson's circuit algorithm.
1331 void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
1332  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
1333  // but we do this to find the circuits, and then change them back.
1335 
1336  Circuits Cir(SUnits, Topo);
1337  // Create the adjacency structure.
1338  Cir.createAdjacencyStructure(this);
1339  for (int i = 0, e = SUnits.size(); i != e; ++i) {
1340  Cir.reset();
1341  Cir.circuit(i, i, NodeSets);
1342  }
1343 
1344  // Change the dependences back so that we've created a DAG again.
1346 }
1347 
1348 // Create artificial dependencies between the source of COPY/REG_SEQUENCE that
1349 // is loop-carried to the USE in next iteration. This will help pipeliner avoid
1350 // additional copies that are needed across iterations. An artificial dependence
1351 // edge is added from USE to SOURCE of COPY/REG_SEQUENCE.
1352 
1353 // PHI-------Anti-Dep-----> COPY/REG_SEQUENCE (loop-carried)
1354 // SRCOfCopY------True-Dep---> COPY/REG_SEQUENCE
1355 // PHI-------True-Dep------> USEOfPhi
1356 
1357 // The mutation creates
1358 // USEOfPHI -------Artificial-Dep---> SRCOfCopy
1359 
1360 // This overall will ensure, the USEOfPHI is scheduled before SRCOfCopy
1361 // (since USE is a predecessor), implies, the COPY/ REG_SEQUENCE is scheduled
1362 // late to avoid additional copies across iterations. The possible scheduling
1363 // order would be
1364 // USEOfPHI --- SRCOfCopy--- COPY/REG_SEQUENCE.
1365 
1367  for (SUnit &SU : DAG->SUnits) {
1368  // Find the COPY/REG_SEQUENCE instruction.
1369  if (!SU.getInstr()->isCopy() && !SU.getInstr()->isRegSequence())
1370  continue;
1371 
1372  // Record the loop carried PHIs.
1373  SmallVector<SUnit *, 4> PHISUs;
1374  // Record the SrcSUs that feed the COPY/REG_SEQUENCE instructions.
1375  SmallVector<SUnit *, 4> SrcSUs;
1376 
1377  for (auto &Dep : SU.Preds) {
1378  SUnit *TmpSU = Dep.getSUnit();
1379  MachineInstr *TmpMI = TmpSU->getInstr();
1380  SDep::Kind DepKind = Dep.getKind();
1381  // Save the loop carried PHI.
1382  if (DepKind == SDep::Anti && TmpMI->isPHI())
1383  PHISUs.push_back(TmpSU);
1384  // Save the source of COPY/REG_SEQUENCE.
1385  // If the source has no pre-decessors, we will end up creating cycles.
1386  else if (DepKind == SDep::Data && !TmpMI->isPHI() && TmpSU->NumPreds > 0)
1387  SrcSUs.push_back(TmpSU);
1388  }
1389 
1390  if (PHISUs.size() == 0 || SrcSUs.size() == 0)
1391  continue;
1392 
1393  // Find the USEs of PHI. If the use is a PHI or REG_SEQUENCE, push back this
1394  // SUnit to the container.
1395  SmallVector<SUnit *, 8> UseSUs;
1396  // Do not use iterator based loop here as we are updating the container.
1397  for (size_t Index = 0; Index < PHISUs.size(); ++Index) {
1398  for (auto &Dep : PHISUs[Index]->Succs) {
1399  if (Dep.getKind() != SDep::Data)
1400  continue;
1401 
1402  SUnit *TmpSU = Dep.getSUnit();
1403  MachineInstr *TmpMI = TmpSU->getInstr();
1404  if (TmpMI->isPHI() || TmpMI->isRegSequence()) {
1405  PHISUs.push_back(TmpSU);
1406  continue;
1407  }
1408  UseSUs.push_back(TmpSU);
1409  }
1410  }
1411 
1412  if (UseSUs.size() == 0)
1413  continue;
1414 
1415  SwingSchedulerDAG *SDAG = cast<SwingSchedulerDAG>(DAG);
1416  // Add the artificial dependencies if it does not form a cycle.
1417  for (auto I : UseSUs) {
1418  for (auto Src : SrcSUs) {
1419  if (!SDAG->Topo.IsReachable(I, Src) && Src != I) {
1420  Src->addPred(SDep(I, SDep::Artificial));
1421  SDAG->Topo.AddPred(Src, I);
1422  }
1423  }
1424  }
1425  }
1426 }
1427 
1428 /// Return true for DAG nodes that we ignore when computing the cost functions.
1429 /// We ignore the back-edge recurrence in order to avoid unbounded recursion
1430 /// in the calculation of the ASAP, ALAP, etc functions.
1431 static bool ignoreDependence(const SDep &D, bool isPred) {
1432  if (D.isArtificial())
1433  return true;
1434  return D.getKind() == SDep::Anti && isPred;
1435 }
1436 
1437 /// Compute several functions need to order the nodes for scheduling.
1438 /// ASAP - Earliest time to schedule a node.
1439 /// ALAP - Latest time to schedule a node.
1440 /// MOV - Mobility function, difference between ALAP and ASAP.
1441 /// D - Depth of each node.
1442 /// H - Height of each node.
1443 void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
1444  ScheduleInfo.resize(SUnits.size());
1445 
1446  LLVM_DEBUG({
1447  for (int I : Topo) {
1448  const SUnit &SU = SUnits[I];
1449  dumpNode(SU);
1450  }
1451  });
1452 
1453  int maxASAP = 0;
1454  // Compute ASAP and ZeroLatencyDepth.
1455  for (int I : Topo) {
1456  int asap = 0;
1457  int zeroLatencyDepth = 0;
1458  SUnit *SU = &SUnits[I];
1459  for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
1460  EP = SU->Preds.end();
1461  IP != EP; ++IP) {
1462  SUnit *pred = IP->getSUnit();
1463  if (IP->getLatency() == 0)
1464  zeroLatencyDepth =
1465  std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
1466  if (ignoreDependence(*IP, true))
1467  continue;
1468  asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() -
1469  getDistance(pred, SU, *IP) * MII));
1470  }
1471  maxASAP = std::max(maxASAP, asap);
1472  ScheduleInfo[I].ASAP = asap;
1473  ScheduleInfo[I].ZeroLatencyDepth = zeroLatencyDepth;
1474  }
1475 
1476  // Compute ALAP, ZeroLatencyHeight, and MOV.
1478  E = Topo.rend();
1479  I != E; ++I) {
1480  int alap = maxASAP;
1481  int zeroLatencyHeight = 0;
1482  SUnit *SU = &SUnits[*I];
1483  for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
1484  ES = SU->Succs.end();
1485  IS != ES; ++IS) {
1486  SUnit *succ = IS->getSUnit();
1487  if (IS->getLatency() == 0)
1488  zeroLatencyHeight =
1489  std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
1490  if (ignoreDependence(*IS, true))
1491  continue;
1492  alap = std::min(alap, (int)(getALAP(succ) - IS->getLatency() +
1493  getDistance(SU, succ, *IS) * MII));
1494  }
1495 
1496  ScheduleInfo[*I].ALAP = alap;
1497  ScheduleInfo[*I].ZeroLatencyHeight = zeroLatencyHeight;
1498  }
1499 
1500  // After computing the node functions, compute the summary for each node set.
1501  for (NodeSet &I : NodeSets)
1502  I.computeNodeSetInfo(this);
1503 
1504  LLVM_DEBUG({
1505  for (unsigned i = 0; i < SUnits.size(); i++) {
1506  dbgs() << "\tNode " << i << ":\n";
1507  dbgs() << "\t ASAP = " << getASAP(&SUnits[i]) << "\n";
1508  dbgs() << "\t ALAP = " << getALAP(&SUnits[i]) << "\n";
1509  dbgs() << "\t MOV = " << getMOV(&SUnits[i]) << "\n";
1510  dbgs() << "\t D = " << getDepth(&SUnits[i]) << "\n";
1511  dbgs() << "\t H = " << getHeight(&SUnits[i]) << "\n";
1512  dbgs() << "\t ZLD = " << getZeroLatencyDepth(&SUnits[i]) << "\n";
1513  dbgs() << "\t ZLH = " << getZeroLatencyHeight(&SUnits[i]) << "\n";
1514  }
1515  });
1516 }
1517 
1518 /// Compute the Pred_L(O) set, as defined in the paper. The set is defined
1519 /// as the predecessors of the elements of NodeOrder that are not also in
1520 /// NodeOrder.
1523  const NodeSet *S = nullptr) {
1524  Preds.clear();
1525  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1526  I != E; ++I) {
1527  for (const SDep &Pred : (*I)->Preds) {
1528  if (S && S->count(Pred.getSUnit()) == 0)
1529  continue;
1530  if (ignoreDependence(Pred, true))
1531  continue;
1532  if (NodeOrder.count(Pred.getSUnit()) == 0)
1533  Preds.insert(Pred.getSUnit());
1534  }
1535  // Back-edges are predecessors with an anti-dependence.
1536  for (const SDep &Succ : (*I)->Succs) {
1537  if (Succ.getKind() != SDep::Anti)
1538  continue;
1539  if (S && S->count(Succ.getSUnit()) == 0)
1540  continue;
1541  if (NodeOrder.count(Succ.getSUnit()) == 0)
1542  Preds.insert(Succ.getSUnit());
1543  }
1544  }
1545  return !Preds.empty();
1546 }
1547 
1548 /// Compute the Succ_L(O) set, as defined in the paper. The set is defined
1549 /// as the successors of the elements of NodeOrder that are not also in
1550 /// NodeOrder.
1553  const NodeSet *S = nullptr) {
1554  Succs.clear();
1555  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
1556  I != E; ++I) {
1557  for (SDep &Succ : (*I)->Succs) {
1558  if (S && S->count(Succ.getSUnit()) == 0)
1559  continue;
1560  if (ignoreDependence(Succ, false))
1561  continue;
1562  if (NodeOrder.count(Succ.getSUnit()) == 0)
1563  Succs.insert(Succ.getSUnit());
1564  }
1565  for (SDep &Pred : (*I)->Preds) {
1566  if (Pred.getKind() != SDep::Anti)
1567  continue;
1568  if (S && S->count(Pred.getSUnit()) == 0)
1569  continue;
1570  if (NodeOrder.count(Pred.getSUnit()) == 0)
1571  Succs.insert(Pred.getSUnit());
1572  }
1573  }
1574  return !Succs.empty();
1575 }
1576 
1577 /// Return true if there is a path from the specified node to any of the nodes
1578 /// in DestNodes. Keep track and return the nodes in any path.
1579 static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
1580  SetVector<SUnit *> &DestNodes,
1581  SetVector<SUnit *> &Exclude,
1582  SmallPtrSet<SUnit *, 8> &Visited) {
1583  if (Cur->isBoundaryNode())
1584  return false;
1585  if (Exclude.contains(Cur))
1586  return false;
1587  if (DestNodes.contains(Cur))
1588  return true;
1589  if (!Visited.insert(Cur).second)
1590  return Path.contains(Cur);
1591  bool FoundPath = false;
1592  for (auto &SI : Cur->Succs)
1593  FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
1594  for (auto &PI : Cur->Preds)
1595  if (PI.getKind() == SDep::Anti)
1596  FoundPath |=
1597  computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
1598  if (FoundPath)
1599  Path.insert(Cur);
1600  return FoundPath;
1601 }
1602 
1603 /// Compute the live-out registers for the instructions in a node-set.
1604 /// The live-out registers are those that are defined in the node-set,
1605 /// but not used. Except for use operands of Phis.
1607  NodeSet &NS) {
1612  for (SUnit *SU : NS) {
1613  const MachineInstr *MI = SU->getInstr();
1614  if (MI->isPHI())
1615  continue;
1616  for (const MachineOperand &MO : MI->operands())
1617  if (MO.isReg() && MO.isUse()) {
1618  Register Reg = MO.getReg();
1620  Uses.insert(Reg);
1621  else if (MRI.isAllocatable(Reg))
1622  for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
1623  ++Units)
1624  Uses.insert(*Units);
1625  }
1626  }
1627  for (SUnit *SU : NS)
1628  for (const MachineOperand &MO : SU->getInstr()->operands())
1629  if (MO.isReg() && MO.isDef() && !MO.isDead()) {
1630  Register Reg = MO.getReg();
1632  if (!Uses.count(Reg))
1633  LiveOutRegs.push_back(RegisterMaskPair(Reg,
1635  } else if (MRI.isAllocatable(Reg)) {
1636  for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
1637  ++Units)
1638  if (!Uses.count(*Units))
1639  LiveOutRegs.push_back(RegisterMaskPair(*Units,
1641  }
1642  }
1643  RPTracker.addLiveRegs(LiveOutRegs);
1644 }
1645 
1646 /// A heuristic to filter nodes in recurrent node-sets if the register
1647 /// pressure of a set is too high.
1648 void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
1649  for (auto &NS : NodeSets) {
1650  // Skip small node-sets since they won't cause register pressure problems.
1651  if (NS.size() <= 2)
1652  continue;
1653  IntervalPressure RecRegPressure;
1654  RegPressureTracker RecRPTracker(RecRegPressure);
1655  RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
1656  computeLiveOuts(MF, RecRPTracker, NS);
1657  RecRPTracker.closeBottom();
1658 
1659  std::vector<SUnit *> SUnits(NS.begin(), NS.end());
1660  llvm::sort(SUnits, [](const SUnit *A, const SUnit *B) {
1661  return A->NodeNum > B->NodeNum;
1662  });
1663 
1664  for (auto &SU : SUnits) {
1665  // Since we're computing the register pressure for a subset of the
1666  // instructions in a block, we need to set the tracker for each
1667  // instruction in the node-set. The tracker is set to the instruction
1668  // just after the one we're interested in.
1670  RecRPTracker.setPos(std::next(CurInstI));
1671 
1672  RegPressureDelta RPDelta;
1673  ArrayRef<PressureChange> CriticalPSets;
1674  RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
1675  CriticalPSets,
1676  RecRegPressure.MaxSetPressure);
1677  if (RPDelta.Excess.isValid()) {
1678  LLVM_DEBUG(
1679  dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
1680  << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
1681  << ":" << RPDelta.Excess.getUnitInc());
1682  NS.setExceedPressure(SU);
1683  break;
1684  }
1685  RecRPTracker.recede();
1686  }
1687  }
1688 }
1689 
1690 /// A heuristic to colocate node sets that have the same set of
1691 /// successors.
1692 void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
1693  unsigned Colocate = 0;
1694  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
1695  NodeSet &N1 = NodeSets[i];
1697  if (N1.empty() || !succ_L(N1, S1))
1698  continue;
1699  for (int j = i + 1; j < e; ++j) {
1700  NodeSet &N2 = NodeSets[j];
1701  if (N1.compareRecMII(N2) != 0)
1702  continue;
1704  if (N2.empty() || !succ_L(N2, S2))
1705  continue;
1706  if (llvm::set_is_subset(S1, S2) && S1.size() == S2.size()) {
1707  N1.setColocate(++Colocate);
1708  N2.setColocate(Colocate);
1709  break;
1710  }
1711  }
1712  }
1713 }
1714 
1715 /// Check if the existing node-sets are profitable. If not, then ignore the
1716 /// recurrent node-sets, and attempt to schedule all nodes together. This is
1717 /// a heuristic. If the MII is large and all the recurrent node-sets are small,
1718 /// then it's best to try to schedule all instructions together instead of
1719 /// starting with the recurrent node-sets.
1720 void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
1721  // Look for loops with a large MII.
1722  if (MII < 17)
1723  return;
1724  // Check if the node-set contains only a simple add recurrence.
1725  for (auto &NS : NodeSets) {
1726  if (NS.getRecMII() > 2)
1727  return;
1728  if (NS.getMaxDepth() > MII)
1729  return;
1730  }
1731  NodeSets.clear();
1732  LLVM_DEBUG(dbgs() << "Clear recurrence node-sets\n");
1733 }
1734 
1735 /// Add the nodes that do not belong to a recurrence set into groups
1736 /// based upon connected componenets.
1737 void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
1738  SetVector<SUnit *> NodesAdded;
1739  SmallPtrSet<SUnit *, 8> Visited;
1740  // Add the nodes that are on a path between the previous node sets and
1741  // the current node set.
1742  for (NodeSet &I : NodeSets) {
1744  // Add the nodes from the current node set to the previous node set.
1745  if (succ_L(I, N)) {
1746  SetVector<SUnit *> Path;
1747  for (SUnit *NI : N) {
1748  Visited.clear();
1749  computePath(NI, Path, NodesAdded, I, Visited);
1750  }
1751  if (!Path.empty())
1752  I.insert(Path.begin(), Path.end());
1753  }
1754  // Add the nodes from the previous node set to the current node set.
1755  N.clear();
1756  if (succ_L(NodesAdded, N)) {
1757  SetVector<SUnit *> Path;
1758  for (SUnit *NI : N) {
1759  Visited.clear();
1760  computePath(NI, Path, I, NodesAdded, Visited);
1761  }
1762  if (!Path.empty())
1763  I.insert(Path.begin(), Path.end());
1764  }
1765  NodesAdded.insert(I.begin(), I.end());
1766  }
1767 
1768  // Create a new node set with the connected nodes of any successor of a node
1769  // in a recurrent set.
1770  NodeSet NewSet;
1772  if (succ_L(NodesAdded, N))
1773  for (SUnit *I : N)
1774  addConnectedNodes(I, NewSet, NodesAdded);
1775  if (!NewSet.empty())
1776  NodeSets.push_back(NewSet);
1777 
1778  // Create a new node set with the connected nodes of any predecessor of a node
1779  // in a recurrent set.
1780  NewSet.clear();
1781  if (pred_L(NodesAdded, N))
1782  for (SUnit *I : N)
1783  addConnectedNodes(I, NewSet, NodesAdded);
1784  if (!NewSet.empty())
1785  NodeSets.push_back(NewSet);
1786 
1787  // Create new nodes sets with the connected nodes any remaining node that
1788  // has no predecessor.
1789  for (SUnit &SU : SUnits) {
1790  if (NodesAdded.count(&SU) == 0) {
1791  NewSet.clear();
1792  addConnectedNodes(&SU, NewSet, NodesAdded);
1793  if (!NewSet.empty())
1794  NodeSets.push_back(NewSet);
1795  }
1796  }
1797 }
1798 
1799 /// Add the node to the set, and add all of its connected nodes to the set.
1800 void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
1801  SetVector<SUnit *> &NodesAdded) {
1802  NewSet.insert(SU);
1803  NodesAdded.insert(SU);
1804  for (auto &SI : SU->Succs) {
1805  SUnit *Successor = SI.getSUnit();
1806  if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
1807  addConnectedNodes(Successor, NewSet, NodesAdded);
1808  }
1809  for (auto &PI : SU->Preds) {
1810  SUnit *Predecessor = PI.getSUnit();
1811  if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
1812  addConnectedNodes(Predecessor, NewSet, NodesAdded);
1813  }
1814 }
1815 
1816 /// Return true if Set1 contains elements in Set2. The elements in common
1817 /// are returned in a different container.
1818 static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
1819  SmallSetVector<SUnit *, 8> &Result) {
1820  Result.clear();
1821  for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
1822  SUnit *SU = Set1[i];
1823  if (Set2.count(SU) != 0)
1824  Result.insert(SU);
1825  }
1826  return !Result.empty();
1827 }
1828 
1829 /// Merge the recurrence node sets that have the same initial node.
1830 void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
1831  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1832  ++I) {
1833  NodeSet &NI = *I;
1834  for (NodeSetType::iterator J = I + 1; J != E;) {
1835  NodeSet &NJ = *J;
1836  if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
1837  if (NJ.compareRecMII(NI) > 0)
1838  NI.setRecMII(NJ.getRecMII());
1839  for (SUnit *SU : *J)
1840  I->insert(SU);
1841  NodeSets.erase(J);
1842  E = NodeSets.end();
1843  } else {
1844  ++J;
1845  }
1846  }
1847  }
1848 }
1849 
1850 /// Remove nodes that have been scheduled in previous NodeSets.
1851 void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
1852  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
1853  ++I)
1854  for (NodeSetType::iterator J = I + 1; J != E;) {
1855  J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
1856 
1857  if (J->empty()) {
1858  NodeSets.erase(J);
1859  E = NodeSets.end();
1860  } else {
1861  ++J;
1862  }
1863  }
1864 }
1865 
1866 /// Compute an ordered list of the dependence graph nodes, which
1867 /// indicates the order that the nodes will be scheduled. This is a
1868 /// two-level algorithm. First, a partial order is created, which
1869 /// consists of a list of sets ordered from highest to lowest priority.
1870 void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
1872  NodeOrder.clear();
1873 
1874  for (auto &Nodes : NodeSets) {
1875  LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
1876  OrderKind Order;
1878  if (pred_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) {
1879  R.insert(N.begin(), N.end());
1880  Order = BottomUp;
1881  LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
1882  } else if (succ_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) {
1883  R.insert(N.begin(), N.end());
1884  Order = TopDown;
1885  LLVM_DEBUG(dbgs() << " Top down (succs) ");
1886  } else if (isIntersect(N, Nodes, R)) {
1887  // If some of the successors are in the existing node-set, then use the
1888  // top-down ordering.
1889  Order = TopDown;
1890  LLVM_DEBUG(dbgs() << " Top down (intersect) ");
1891  } else if (NodeSets.size() == 1) {
1892  for (auto &N : Nodes)
1893  if (N->Succs.size() == 0)
1894  R.insert(N);
1895  Order = BottomUp;
1896  LLVM_DEBUG(dbgs() << " Bottom up (all) ");
1897  } else {
1898  // Find the node with the highest ASAP.
1899  SUnit *maxASAP = nullptr;
1900  for (SUnit *SU : Nodes) {
1901  if (maxASAP == nullptr || getASAP(SU) > getASAP(maxASAP) ||
1902  (getASAP(SU) == getASAP(maxASAP) && SU->NodeNum > maxASAP->NodeNum))
1903  maxASAP = SU;
1904  }
1905  R.insert(maxASAP);
1906  Order = BottomUp;
1907  LLVM_DEBUG(dbgs() << " Bottom up (default) ");
1908  }
1909 
1910  while (!R.empty()) {
1911  if (Order == TopDown) {
1912  // Choose the node with the maximum height. If more than one, choose
1913  // the node wiTH the maximum ZeroLatencyHeight. If still more than one,
1914  // choose the node with the lowest MOV.
1915  while (!R.empty()) {
1916  SUnit *maxHeight = nullptr;
1917  for (SUnit *I : R) {
1918  if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
1919  maxHeight = I;
1920  else if (getHeight(I) == getHeight(maxHeight) &&
1922  maxHeight = I;
1923  else if (getHeight(I) == getHeight(maxHeight) &&
1925  getZeroLatencyHeight(maxHeight) &&
1926  getMOV(I) < getMOV(maxHeight))
1927  maxHeight = I;
1928  }
1929  NodeOrder.insert(maxHeight);
1930  LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
1931  R.remove(maxHeight);
1932  for (const auto &I : maxHeight->Succs) {
1933  if (Nodes.count(I.getSUnit()) == 0)
1934  continue;
1935  if (NodeOrder.contains(I.getSUnit()))
1936  continue;
1937  if (ignoreDependence(I, false))
1938  continue;
1939  R.insert(I.getSUnit());
1940  }
1941  // Back-edges are predecessors with an anti-dependence.
1942  for (const auto &I : maxHeight->Preds) {
1943  if (I.getKind() != SDep::Anti)
1944  continue;
1945  if (Nodes.count(I.getSUnit()) == 0)
1946  continue;
1947  if (NodeOrder.contains(I.getSUnit()))
1948  continue;
1949  R.insert(I.getSUnit());
1950  }
1951  }
1952  Order = BottomUp;
1953  LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
1955  if (pred_L(NodeOrder, N, &Nodes))
1956  R.insert(N.begin(), N.end());
1957  } else {
1958  // Choose the node with the maximum depth. If more than one, choose
1959  // the node with the maximum ZeroLatencyDepth. If still more than one,
1960  // choose the node with the lowest MOV.
1961  while (!R.empty()) {
1962  SUnit *maxDepth = nullptr;
1963  for (SUnit *I : R) {
1964  if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
1965  maxDepth = I;
1966  else if (getDepth(I) == getDepth(maxDepth) &&
1968  maxDepth = I;
1969  else if (getDepth(I) == getDepth(maxDepth) &&
1970  getZeroLatencyDepth(I) == getZeroLatencyDepth(maxDepth) &&
1971  getMOV(I) < getMOV(maxDepth))
1972  maxDepth = I;
1973  }
1974  NodeOrder.insert(maxDepth);
1975  LLVM_DEBUG(dbgs() << maxDepth->NodeNum << " ");
1976  R.remove(maxDepth);
1977  if (Nodes.isExceedSU(maxDepth)) {
1978  Order = TopDown;
1979  R.clear();
1980  R.insert(Nodes.getNode(0));
1981  break;
1982  }
1983  for (const auto &I : maxDepth->Preds) {
1984  if (Nodes.count(I.getSUnit()) == 0)
1985  continue;
1986  if (NodeOrder.contains(I.getSUnit()))
1987  continue;
1988  R.insert(I.getSUnit());
1989  }
1990  // Back-edges are predecessors with an anti-dependence.
1991  for (const auto &I : maxDepth->Succs) {
1992  if (I.getKind() != SDep::Anti)
1993  continue;
1994  if (Nodes.count(I.getSUnit()) == 0)
1995  continue;
1996  if (NodeOrder.contains(I.getSUnit()))
1997  continue;
1998  R.insert(I.getSUnit());
1999  }
2000  }
2001  Order = TopDown;
2002  LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
2004  if (succ_L(NodeOrder, N, &Nodes))
2005  R.insert(N.begin(), N.end());
2006  }
2007  }
2008  LLVM_DEBUG(dbgs() << "\nDone with Nodeset\n");
2009  }
2010 
2011  LLVM_DEBUG({
2012  dbgs() << "Node order: ";
2013  for (SUnit *I : NodeOrder)
2014  dbgs() << " " << I->NodeNum << " ";
2015  dbgs() << "\n";
2016  });
2017 }
2018 
2019 /// Process the nodes in the computed order and create the pipelined schedule
2020 /// of the instructions, if possible. Return true if a schedule is found.
2021 bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2022 
2023  if (NodeOrder.empty()){
2024  LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
2025  return false;
2026  }
2027 
2028  bool scheduleFound = false;
2029  // Keep increasing II until a valid schedule is found.
2030  for (unsigned II = MII; II <= MAX_II && !scheduleFound; ++II) {
2031  Schedule.reset();
2032  Schedule.setInitiationInterval(II);
2033  LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
2034 
2035  SetVector<SUnit *>::iterator NI = NodeOrder.begin();
2036  SetVector<SUnit *>::iterator NE = NodeOrder.end();
2037  do {
2038  SUnit *SU = *NI;
2039 
2040  // Compute the schedule time for the instruction, which is based
2041  // upon the scheduled time for any predecessors/successors.
2042  int EarlyStart = INT_MIN;
2043  int LateStart = INT_MAX;
2044  // These values are set when the size of the schedule window is limited
2045  // due to chain dependences.
2046  int SchedEnd = INT_MAX;
2047  int SchedStart = INT_MIN;
2048  Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
2049  II, this);
2050  LLVM_DEBUG({
2051  dbgs() << "\n";
2052  dbgs() << "Inst (" << SU->NodeNum << ") ";
2053  SU->getInstr()->dump();
2054  dbgs() << "\n";
2055  });
2056  LLVM_DEBUG({
2057  dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
2058  LateStart, SchedEnd, SchedStart);
2059  });
2060 
2061  if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
2062  SchedStart > LateStart)
2063  scheduleFound = false;
2064  else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
2065  SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
2066  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2067  } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
2068  SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
2069  scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
2070  } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
2071  SchedEnd =
2072  std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
2073  // When scheduling a Phi it is better to start at the late cycle and go
2074  // backwards. The default order may insert the Phi too far away from
2075  // its first dependence.
2076  if (SU->getInstr()->isPHI())
2077  scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
2078  else
2079  scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
2080  } else {
2081  int FirstCycle = Schedule.getFirstCycle();
2082  scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
2083  FirstCycle + getASAP(SU) + II - 1, II);
2084  }
2085  // Even if we find a schedule, make sure the schedule doesn't exceed the
2086  // allowable number of stages. We keep trying if this happens.
2087  if (scheduleFound)
2088  if (SwpMaxStages > -1 &&
2089  Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
2090  scheduleFound = false;
2091 
2092  LLVM_DEBUG({
2093  if (!scheduleFound)
2094  dbgs() << "\tCan't schedule\n";
2095  });
2096  } while (++NI != NE && scheduleFound);
2097 
2098  // If a schedule is found, check if it is a valid schedule too.
2099  if (scheduleFound)
2100  scheduleFound = Schedule.isValidSchedule(this);
2101  }
2102 
2103  LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound
2104  << " (II=" << Schedule.getInitiationInterval()
2105  << ")\n");
2106 
2107  if (scheduleFound) {
2108  Schedule.finalizeSchedule(this);
2109  Pass.ORE->emit([&]() {
2111  DEBUG_TYPE, "schedule", Loop.getStartLoc(), Loop.getHeader())
2112  << "Schedule found with Initiation Interval: "
2113  << ore::NV("II", Schedule.getInitiationInterval())
2114  << ", MaxStageCount: "
2115  << ore::NV("MaxStageCount", Schedule.getMaxStageCount());
2116  });
2117  } else
2118  Schedule.reset();
2119 
2120  return scheduleFound && Schedule.getMaxStageCount() > 0;
2121 }
2122 
2123 /// Return true if we can compute the amount the instruction changes
2124 /// during each iteration. Set Delta to the amount of the change.
2125 bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
2127  const MachineOperand *BaseOp;
2128  int64_t Offset;
2129  bool OffsetIsScalable;
2130  if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
2131  return false;
2132 
2133  // FIXME: This algorithm assumes instructions have fixed-size offsets.
2134  if (OffsetIsScalable)
2135  return false;
2136 
2137  if (!BaseOp->isReg())
2138  return false;
2139 
2140  Register BaseReg = BaseOp->getReg();
2141 
2143  // Check if there is a Phi. If so, get the definition in the loop.
2144  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
2145  if (BaseDef && BaseDef->isPHI()) {
2146  BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
2147  BaseDef = MRI.getVRegDef(BaseReg);
2148  }
2149  if (!BaseDef)
2150  return false;
2151 
2152  int D = 0;
2153  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
2154  return false;
2155 
2156  Delta = D;
2157  return true;
2158 }
2159 
2160 /// Check if we can change the instruction to use an offset value from the
2161 /// previous iteration. If so, return true and set the base and offset values
2162 /// so that we can rewrite the load, if necessary.
2163 /// v1 = Phi(v0, v3)
2164 /// v2 = load v1, 0
2165 /// v3 = post_store v1, 4, x
2166 /// This function enables the load to be rewritten as v2 = load v3, 4.
2167 bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
2168  unsigned &BasePos,
2169  unsigned &OffsetPos,
2170  unsigned &NewBase,
2171  int64_t &Offset) {
2172  // Get the load instruction.
2173  if (TII->isPostIncrement(*MI))
2174  return false;
2175  unsigned BasePosLd, OffsetPosLd;
2176  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
2177  return false;
2178  Register BaseReg = MI->getOperand(BasePosLd).getReg();
2179 
2180  // Look for the Phi instruction.
2181  MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
2182  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
2183  if (!Phi || !Phi->isPHI())
2184  return false;
2185  // Get the register defined in the loop block.
2186  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
2187  if (!PrevReg)
2188  return false;
2189 
2190  // Check for the post-increment load/store instruction.
2191  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
2192  if (!PrevDef || PrevDef == MI)
2193  return false;
2194 
2195  if (!TII->isPostIncrement(*PrevDef))
2196  return false;
2197 
2198  unsigned BasePos1 = 0, OffsetPos1 = 0;
2199  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
2200  return false;
2201 
2202  // Make sure that the instructions do not access the same memory location in
2203  // the next iteration.
2204  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
2205  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
2206  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2207  NewMI->getOperand(OffsetPosLd).setImm(LoadOffset + StoreOffset);
2208  bool Disjoint = TII->areMemAccessesTriviallyDisjoint(*NewMI, *PrevDef);
2209  MF.DeleteMachineInstr(NewMI);
2210  if (!Disjoint)
2211  return false;
2212 
2213  // Set the return value once we determine that we return true.
2214  BasePos = BasePosLd;
2215  OffsetPos = OffsetPosLd;
2216  NewBase = PrevReg;
2217  Offset = StoreOffset;
2218  return true;
2219 }
2220 
2221 /// Apply changes to the instruction if needed. The changes are need
2222 /// to improve the scheduling and depend up on the final schedule.
2224  SMSchedule &Schedule) {
2225  SUnit *SU = getSUnit(MI);
2227  InstrChanges.find(SU);
2228  if (It != InstrChanges.end()) {
2229  std::pair<unsigned, int64_t> RegAndOffset = It->second;
2230  unsigned BasePos, OffsetPos;
2231  if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
2232  return;
2233  Register BaseReg = MI->getOperand(BasePos).getReg();
2234  MachineInstr *LoopDef = findDefInLoop(BaseReg);
2235  int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
2236  int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
2237  int BaseStageNum = Schedule.stageScheduled(SU);
2238  int BaseCycleNum = Schedule.cycleScheduled(SU);
2239  if (BaseStageNum < DefStageNum) {
2240  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2241  int OffsetDiff = DefStageNum - BaseStageNum;
2242  if (DefCycleNum < BaseCycleNum) {
2243  NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
2244  if (OffsetDiff > 0)
2245  --OffsetDiff;
2246  }
2247  int64_t NewOffset =
2248  MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
2249  NewMI->getOperand(OffsetPos).setImm(NewOffset);
2250  SU->setInstr(NewMI);
2251  MISUnitMap[NewMI] = SU;
2252  NewMIs[MI] = NewMI;
2253  }
2254  }
2255 }
2256 
2257 /// Return the instruction in the loop that defines the register.
2258 /// If the definition is a Phi, then follow the Phi operand to
2259 /// the instruction in the loop.
2260 MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
2263  while (Def->isPHI()) {
2264  if (!Visited.insert(Def).second)
2265  break;
2266  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
2267  if (Def->getOperand(i + 1).getMBB() == BB) {
2268  Def = MRI.getVRegDef(Def->getOperand(i).getReg());
2269  break;
2270  }
2271  }
2272  return Def;
2273 }
2274 
2275 /// Return true for an order or output dependence that is loop carried
2276 /// potentially. A dependence is loop carried if the destination defines a valu
2277 /// that may be used or defined by the source in a subsequent iteration.
2279  bool isSucc) {
2280  if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
2281  Dep.isArtificial())
2282  return false;
2283 
2284  if (!SwpPruneLoopCarried)
2285  return true;
2286 
2287  if (Dep.getKind() == SDep::Output)
2288  return true;
2289 
2290  MachineInstr *SI = Source->getInstr();
2291  MachineInstr *DI = Dep.getSUnit()->getInstr();
2292  if (!isSucc)
2293  std::swap(SI, DI);
2294  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
2295 
2296  // Assume ordered loads and stores may have a loop carried dependence.
2297  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
2298  SI->mayRaiseFPException() || DI->mayRaiseFPException() ||
2299  SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
2300  return true;
2301 
2302  // Only chain dependences between a load and store can be loop carried.
2303  if (!DI->mayStore() || !SI->mayLoad())
2304  return false;
2305 
2306  unsigned DeltaS, DeltaD;
2307  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
2308  return true;
2309 
2310  const MachineOperand *BaseOpS, *BaseOpD;
2311  int64_t OffsetS, OffsetD;
2312  bool OffsetSIsScalable, OffsetDIsScalable;
2314  if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, OffsetSIsScalable,
2315  TRI) ||
2316  !TII->getMemOperandWithOffset(*DI, BaseOpD, OffsetD, OffsetDIsScalable,
2317  TRI))
2318  return true;
2319 
2320  assert(!OffsetSIsScalable && !OffsetDIsScalable &&
2321  "Expected offsets to be byte offsets");
2322 
2323  if (!BaseOpS->isIdenticalTo(*BaseOpD))
2324  return true;
2325 
2326  // Check that the base register is incremented by a constant value for each
2327  // iteration.
2328  MachineInstr *Def = MRI.getVRegDef(BaseOpS->getReg());
2329  if (!Def || !Def->isPHI())
2330  return true;
2331  unsigned InitVal = 0;
2332  unsigned LoopVal = 0;
2333  getPhiRegs(*Def, BB, InitVal, LoopVal);
2334  MachineInstr *LoopDef = MRI.getVRegDef(LoopVal);
2335  int D = 0;
2336  if (!LoopDef || !TII->getIncrementValue(*LoopDef, D))
2337  return true;
2338 
2339  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
2340  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
2341 
2342  // This is the main test, which checks the offset values and the loop
2343  // increment value to determine if the accesses may be loop carried.
2344  if (AccessSizeS == MemoryLocation::UnknownSize ||
2345  AccessSizeD == MemoryLocation::UnknownSize)
2346  return true;
2347 
2348  if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD)
2349  return true;
2350 
2351  return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
2352 }
2353 
2354 void SwingSchedulerDAG::postprocessDAG() {
2355  for (auto &M : Mutations)
2356  M->apply(this);
2357 }
2358 
2359 /// Try to schedule the node at the specified StartCycle and continue
2360 /// until the node is schedule or the EndCycle is reached. This function
2361 /// returns true if the node is scheduled. This routine may search either
2362 /// forward or backward for a place to insert the instruction based upon
2363 /// the relative values of StartCycle and EndCycle.
2364 bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
2365  bool forward = true;
2366  LLVM_DEBUG({
2367  dbgs() << "Trying to insert node between " << StartCycle << " and "
2368  << EndCycle << " II: " << II << "\n";
2369  });
2370  if (StartCycle > EndCycle)
2371  forward = false;
2372 
2373  // The terminating condition depends on the direction.
2374  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
2375  for (int curCycle = StartCycle; curCycle != termCycle;
2376  forward ? ++curCycle : --curCycle) {
2377 
2378  // Add the already scheduled instructions at the specified cycle to the
2379  // DFA.
2380  ProcItinResources.clearResources();
2381  for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
2382  checkCycle <= LastCycle; checkCycle += II) {
2383  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
2384 
2385  for (SUnit *CI : cycleInstrs) {
2386  if (ST.getInstrInfo()->isZeroCost(CI->getInstr()->getOpcode()))
2387  continue;
2388  assert(ProcItinResources.canReserveResources(*CI->getInstr()) &&
2389  "These instructions have already been scheduled.");
2390  ProcItinResources.reserveResources(*CI->getInstr());
2391  }
2392  }
2393  if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
2394  ProcItinResources.canReserveResources(*SU->getInstr())) {
2395  LLVM_DEBUG({
2396  dbgs() << "\tinsert at cycle " << curCycle << " ";
2397  SU->getInstr()->dump();
2398  });
2399 
2400  ScheduledInstrs[curCycle].push_back(SU);
2401  InstrToCycle.insert(std::make_pair(SU, curCycle));
2402  if (curCycle > LastCycle)
2403  LastCycle = curCycle;
2404  if (curCycle < FirstCycle)
2405  FirstCycle = curCycle;
2406  return true;
2407  }
2408  LLVM_DEBUG({
2409  dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
2410  SU->getInstr()->dump();
2411  });
2412  }
2413  return false;
2414 }
2415 
2416 // Return the cycle of the earliest scheduled instruction in the chain.
2418  SmallPtrSet<SUnit *, 8> Visited;
2419  SmallVector<SDep, 8> Worklist;
2420  Worklist.push_back(Dep);
2421  int EarlyCycle = INT_MAX;
2422  while (!Worklist.empty()) {
2423  const SDep &Cur = Worklist.pop_back_val();
2424  SUnit *PrevSU = Cur.getSUnit();
2425  if (Visited.count(PrevSU))
2426  continue;
2427  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
2428  if (it == InstrToCycle.end())
2429  continue;
2430  EarlyCycle = std::min(EarlyCycle, it->second);
2431  for (const auto &PI : PrevSU->Preds)
2432  if (PI.getKind() == SDep::Order || PI.getKind() == SDep::Output)
2433  Worklist.push_back(PI);
2434  Visited.insert(PrevSU);
2435  }
2436  return EarlyCycle;
2437 }
2438 
2439 // Return the cycle of the latest scheduled instruction in the chain.
2441  SmallPtrSet<SUnit *, 8> Visited;
2442  SmallVector<SDep, 8> Worklist;
2443  Worklist.push_back(Dep);
2444  int LateCycle = INT_MIN;
2445  while (!Worklist.empty()) {
2446  const SDep &Cur = Worklist.pop_back_val();
2447  SUnit *SuccSU = Cur.getSUnit();
2448  if (Visited.count(SuccSU))
2449  continue;
2450  std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
2451  if (it == InstrToCycle.end())
2452  continue;
2453  LateCycle = std::max(LateCycle, it->second);
2454  for (const auto &SI : SuccSU->Succs)
2455  if (SI.getKind() == SDep::Order || SI.getKind() == SDep::Output)
2456  Worklist.push_back(SI);
2457  Visited.insert(SuccSU);
2458  }
2459  return LateCycle;
2460 }
2461 
2462 /// If an instruction has a use that spans multiple iterations, then
2463 /// return true. These instructions are characterized by having a back-ege
2464 /// to a Phi, which contains a reference to another Phi.
2466  for (auto &P : SU->Preds)
2467  if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
2468  for (auto &S : P.getSUnit()->Succs)
2469  if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
2470  return P.getSUnit();
2471  return nullptr;
2472 }
2473 
2474 /// Compute the scheduling start slot for the instruction. The start slot
2475 /// depends on any predecessor or successor nodes scheduled already.
2476 void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
2477  int *MinEnd, int *MaxStart, int II,
2478  SwingSchedulerDAG *DAG) {
2479  // Iterate over each instruction that has been scheduled already. The start
2480  // slot computation depends on whether the previously scheduled instruction
2481  // is a predecessor or successor of the specified instruction.
2482  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
2483 
2484  // Iterate over each instruction in the current cycle.
2485  for (SUnit *I : getInstructions(cycle)) {
2486  // Because we're processing a DAG for the dependences, we recognize
2487  // the back-edge in recurrences by anti dependences.
2488  for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
2489  const SDep &Dep = SU->Preds[i];
2490  if (Dep.getSUnit() == I) {
2491  if (!DAG->isBackedge(SU, Dep)) {
2492  int EarlyStart = cycle + Dep.getLatency() -
2493  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
2494  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2495  if (DAG->isLoopCarriedDep(SU, Dep, false)) {
2496  int End = earliestCycleInChain(Dep) + (II - 1);
2497  *MinEnd = std::min(*MinEnd, End);
2498  }
2499  } else {
2500  int LateStart = cycle - Dep.getLatency() +
2501  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
2502  *MinLateStart = std::min(*MinLateStart, LateStart);
2503  }
2504  }
2505  // For instruction that requires multiple iterations, make sure that
2506  // the dependent instruction is not scheduled past the definition.
2507  SUnit *BE = multipleIterations(I, DAG);
2508  if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
2509  !SU->isPred(I))
2510  *MinLateStart = std::min(*MinLateStart, cycle);
2511  }
2512  for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {
2513  if (SU->Succs[i].getSUnit() == I) {
2514  const SDep &Dep = SU->Succs[i];
2515  if (!DAG->isBackedge(SU, Dep)) {
2516  int LateStart = cycle - Dep.getLatency() +
2517  DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
2518  *MinLateStart = std::min(*MinLateStart, LateStart);
2519  if (DAG->isLoopCarriedDep(SU, Dep)) {
2520  int Start = latestCycleInChain(Dep) + 1 - II;
2521  *MaxStart = std::max(*MaxStart, Start);
2522  }
2523  } else {
2524  int EarlyStart = cycle + Dep.getLatency() -
2525  DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
2526  *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
2527  }
2528  }
2529  }
2530  }
2531  }
2532 }
2533 
2534 /// Order the instructions within a cycle so that the definitions occur
2535 /// before the uses. Returns true if the instruction is added to the start
2536 /// of the list, or false if added to the end.
2538  std::deque<SUnit *> &Insts) {
2539  MachineInstr *MI = SU->getInstr();
2540  bool OrderBeforeUse = false;
2541  bool OrderAfterDef = false;
2542  bool OrderBeforeDef = false;
2543  unsigned MoveDef = 0;
2544  unsigned MoveUse = 0;
2545  int StageInst1 = stageScheduled(SU);
2546 
2547  unsigned Pos = 0;
2548  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
2549  ++I, ++Pos) {
2550  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
2551  MachineOperand &MO = MI->getOperand(i);
2552  if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
2553  continue;
2554 
2555  Register Reg = MO.getReg();
2556  unsigned BasePos, OffsetPos;
2557  if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
2558  if (MI->getOperand(BasePos).getReg() == Reg)
2559  if (unsigned NewReg = SSD->getInstrBaseReg(SU))
2560  Reg = NewReg;
2561  bool Reads, Writes;
2562  std::tie(Reads, Writes) =
2563  (*I)->getInstr()->readsWritesVirtualRegister(Reg);
2564  if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
2565  OrderBeforeUse = true;
2566  if (MoveUse == 0)
2567  MoveUse = Pos;
2568  } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
2569  // Add the instruction after the scheduled instruction.
2570  OrderAfterDef = true;
2571  MoveDef = Pos;
2572  } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
2573  if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
2574  OrderBeforeUse = true;
2575  if (MoveUse == 0)
2576  MoveUse = Pos;
2577  } else {
2578  OrderAfterDef = true;
2579  MoveDef = Pos;
2580  }
2581  } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
2582  OrderBeforeUse = true;
2583  if (MoveUse == 0)
2584  MoveUse = Pos;
2585  if (MoveUse != 0) {
2586  OrderAfterDef = true;
2587  MoveDef = Pos - 1;
2588  }
2589  } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
2590  // Add the instruction before the scheduled instruction.
2591  OrderBeforeUse = true;
2592  if (MoveUse == 0)
2593  MoveUse = Pos;
2594  } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
2595  isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
2596  if (MoveUse == 0) {
2597  OrderBeforeDef = true;
2598  MoveUse = Pos;
2599  }
2600  }
2601  }
2602  // Check for order dependences between instructions. Make sure the source
2603  // is ordered before the destination.
2604  for (auto &S : SU->Succs) {
2605  if (S.getSUnit() != *I)
2606  continue;
2607  if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
2608  OrderBeforeUse = true;
2609  if (Pos < MoveUse)
2610  MoveUse = Pos;
2611  }
2612  // We did not handle HW dependences in previous for loop,
2613  // and we normally set Latency = 0 for Anti deps,
2614  // so may have nodes in same cycle with Anti denpendent on HW regs.
2615  else if (S.getKind() == SDep::Anti && stageScheduled(*I) == StageInst1) {
2616  OrderBeforeUse = true;
2617  if ((MoveUse == 0) || (Pos < MoveUse))
2618  MoveUse = Pos;
2619  }
2620  }
2621  for (auto &P : SU->Preds) {
2622  if (P.getSUnit() != *I)
2623  continue;
2624  if (P.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
2625  OrderAfterDef = true;
2626  MoveDef = Pos;
2627  }
2628  }
2629  }
2630 
2631  // A circular dependence.
2632  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
2633  OrderBeforeUse = false;
2634 
2635  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
2636  // to a loop-carried dependence.
2637  if (OrderBeforeDef)
2638  OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
2639 
2640  // The uncommon case when the instruction order needs to be updated because
2641  // there is both a use and def.
2642  if (OrderBeforeUse && OrderAfterDef) {
2643  SUnit *UseSU = Insts.at(MoveUse);
2644  SUnit *DefSU = Insts.at(MoveDef);
2645  if (MoveUse > MoveDef) {
2646  Insts.erase(Insts.begin() + MoveUse);
2647  Insts.erase(Insts.begin() + MoveDef);
2648  } else {
2649  Insts.erase(Insts.begin() + MoveDef);
2650  Insts.erase(Insts.begin() + MoveUse);
2651  }
2652  orderDependence(SSD, UseSU, Insts);
2653  orderDependence(SSD, SU, Insts);
2654  orderDependence(SSD, DefSU, Insts);
2655  return;
2656  }
2657  // Put the new instruction first if there is a use in the list. Otherwise,
2658  // put it at the end of the list.
2659  if (OrderBeforeUse)
2660  Insts.push_front(SU);
2661  else
2662  Insts.push_back(SU);
2663 }
2664 
2665 /// Return true if the scheduled Phi has a loop carried operand.
2667  if (!Phi.isPHI())
2668  return false;
2669  assert(Phi.isPHI() && "Expecting a Phi.");
2670  SUnit *DefSU = SSD->getSUnit(&Phi);
2671  unsigned DefCycle = cycleScheduled(DefSU);
2672  int DefStage = stageScheduled(DefSU);
2673 
2674  unsigned InitVal = 0;
2675  unsigned LoopVal = 0;
2676  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
2677  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
2678  if (!UseSU)
2679  return true;
2680  if (UseSU->getInstr()->isPHI())
2681  return true;
2682  unsigned LoopCycle = cycleScheduled(UseSU);
2683  int LoopStage = stageScheduled(UseSU);
2684  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
2685 }
2686 
2687 /// Return true if the instruction is a definition that is loop carried
2688 /// and defines the use on the next iteration.
2689 /// v1 = phi(v2, v3)
2690 /// (Def) v3 = op v1
2691 /// (MO) = v1
2692 /// If MO appears before Def, then then v1 and v3 may get assigned to the same
2693 /// register.
2696  if (!MO.isReg())
2697  return false;
2698  if (Def->isPHI())
2699  return false;
2700  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
2701  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
2702  return false;
2703  if (!isLoopCarried(SSD, *Phi))
2704  return false;
2705  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
2706  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
2707  MachineOperand &DMO = Def->getOperand(i);
2708  if (!DMO.isReg() || !DMO.isDef())
2709  continue;
2710  if (DMO.getReg() == LoopReg)
2711  return true;
2712  }
2713  return false;
2714 }
2715 
2716 // Check if the generated schedule is valid. This function checks if
2717 // an instruction that uses a physical register is scheduled in a
2718 // different stage than the definition. The pipeliner does not handle
2719 // physical register values that may cross a basic block boundary.
2721  for (SUnit &SU : SSD->SUnits) {
2722  if (!SU.hasPhysRegDefs)
2723  continue;
2724  int StageDef = stageScheduled(&SU);
2725  assert(StageDef != -1 && "Instruction should have been scheduled.");
2726  for (auto &SI : SU.Succs)
2727  if (SI.isAssignedRegDep())
2728  if (Register::isPhysicalRegister(SI.getReg()))
2729  if (stageScheduled(SI.getSUnit()) != StageDef)
2730  return false;
2731  }
2732  return true;
2733 }
2734 
2735 /// A property of the node order in swing-modulo-scheduling is
2736 /// that for nodes outside circuits the following holds:
2737 /// none of them is scheduled after both a successor and a
2738 /// predecessor.
2739 /// The method below checks whether the property is met.
2740 /// If not, debug information is printed and statistics information updated.
2741 /// Note that we do not use an assert statement.
2742 /// The reason is that although an invalid node oder may prevent
2743 /// the pipeliner from finding a pipelined schedule for arbitrary II,
2744 /// it does not lead to the generation of incorrect code.
2745 void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
2746 
2747  // a sorted vector that maps each SUnit to its index in the NodeOrder
2748  typedef std::pair<SUnit *, unsigned> UnitIndex;
2749  std::vector<UnitIndex> Indices(NodeOrder.size(), std::make_pair(nullptr, 0));
2750 
2751  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i)
2752  Indices.push_back(std::make_pair(NodeOrder[i], i));
2753 
2754  auto CompareKey = [](UnitIndex i1, UnitIndex i2) {
2755  return std::get<0>(i1) < std::get<0>(i2);
2756  };
2757 
2758  // sort, so that we can perform a binary search
2759  llvm::sort(Indices, CompareKey);
2760 
2761  bool Valid = true;
2762  (void)Valid;
2763  // for each SUnit in the NodeOrder, check whether
2764  // it appears after both a successor and a predecessor
2765  // of the SUnit. If this is the case, and the SUnit
2766  // is not part of circuit, then the NodeOrder is not
2767  // valid.
2768  for (unsigned i = 0, s = NodeOrder.size(); i < s; ++i) {
2769  SUnit *SU = NodeOrder[i];
2770  unsigned Index = i;
2771 
2772  bool PredBefore = false;
2773  bool SuccBefore = false;
2774 
2775  SUnit *Succ;
2776  SUnit *Pred;
2777  (void)Succ;
2778  (void)Pred;
2779 
2780  for (SDep &PredEdge : SU->Preds) {
2781  SUnit *PredSU = PredEdge.getSUnit();
2782  unsigned PredIndex = std::get<1>(
2783  *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));
2784  if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
2785  PredBefore = true;
2786  Pred = PredSU;
2787  break;
2788  }
2789  }
2790 
2791  for (SDep &SuccEdge : SU->Succs) {
2792  SUnit *SuccSU = SuccEdge.getSUnit();
2793  // Do not process a boundary node, it was not included in NodeOrder,
2794  // hence not in Indices either, call to std::lower_bound() below will
2795  // return Indices.end().
2796  if (SuccSU->isBoundaryNode())
2797  continue;
2798  unsigned SuccIndex = std::get<1>(
2799  *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey));
2800  if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
2801  SuccBefore = true;
2802  Succ = SuccSU;
2803  break;
2804  }
2805  }
2806 
2807  if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
2808  // instructions in circuits are allowed to be scheduled
2809  // after both a successor and predecessor.
2810  bool InCircuit = llvm::any_of(
2811  Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
2812  if (InCircuit)
2813  LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
2814  else {
2815  Valid = false;
2816  NumNodeOrderIssues++;
2817  LLVM_DEBUG(dbgs() << "Predecessor ";);
2818  }
2819  LLVM_DEBUG(dbgs() << Pred->NodeNum << " and successor " << Succ->NodeNum
2820  << " are scheduled before node " << SU->NodeNum
2821  << "\n";);
2822  }
2823  }
2824 
2825  LLVM_DEBUG({
2826  if (!Valid)
2827  dbgs() << "Invalid node order found!\n";
2828  });
2829 }
2830 
2831 /// Attempt to fix the degenerate cases when the instruction serialization
2832 /// causes the register lifetimes to overlap. For example,
2833 /// p' = store_pi(p, b)
2834 /// = load p, offset
2835 /// In this case p and p' overlap, which means that two registers are needed.
2836 /// Instead, this function changes the load to use p' and updates the offset.
2837 void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
2838  unsigned OverlapReg = 0;
2839  unsigned NewBaseReg = 0;
2840  for (SUnit *SU : Instrs) {
2841  MachineInstr *MI = SU->getInstr();
2842  for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
2843  const MachineOperand &MO = MI->getOperand(i);
2844  // Look for an instruction that uses p. The instruction occurs in the
2845  // same cycle but occurs later in the serialized order.
2846  if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
2847  // Check that the instruction appears in the InstrChanges structure,
2848  // which contains instructions that can have the offset updated.
2850  InstrChanges.find(SU);
2851  if (It != InstrChanges.end()) {
2852  unsigned BasePos, OffsetPos;
2853  // Update the base register and adjust the offset.
2854  if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
2855  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
2856  NewMI->getOperand(BasePos).setReg(NewBaseReg);
2857  int64_t NewOffset =
2858  MI->getOperand(OffsetPos).getImm() - It->second.second;
2859  NewMI->getOperand(OffsetPos).setImm(NewOffset);
2860  SU->setInstr(NewMI);
2861  MISUnitMap[NewMI] = SU;
2862  NewMIs[MI] = NewMI;
2863  }
2864  }
2865  OverlapReg = 0;
2866  NewBaseReg = 0;
2867  break;
2868  }
2869  // Look for an instruction of the form p' = op(p), which uses and defines
2870  // two virtual registers that get allocated to the same physical register.
2871  unsigned TiedUseIdx = 0;
2872  if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
2873  // OverlapReg is p in the example above.
2874  OverlapReg = MI->getOperand(TiedUseIdx).getReg();
2875  // NewBaseReg is p' in the example above.
2876  NewBaseReg = MI->getOperand(i).getReg();
2877  break;
2878  }
2879  }
2880  }
2881 }
2882 
2883 /// After the schedule has been formed, call this function to combine
2884 /// the instructions from the different stages/cycles. That is, this
2885 /// function creates a schedule that represents a single iteration.
2887  // Move all instructions to the first stage from later stages.
2888  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
2889  for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
2890  ++stage) {
2891  std::deque<SUnit *> &cycleInstrs =
2892  ScheduledInstrs[cycle + (stage * InitiationInterval)];
2893  for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
2894  E = cycleInstrs.rend();
2895  I != E; ++I)
2896  ScheduledInstrs[cycle].push_front(*I);
2897  }
2898  }
2899 
2900  // Erase all the elements in the later stages. Only one iteration should
2901  // remain in the scheduled list, and it contains all the instructions.
2902  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
2903  ScheduledInstrs.erase(cycle);
2904 
2905  // Change the registers in instruction as specified in the InstrChanges
2906  // map. We need to use the new registers to create the correct order.
2907  for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
2908  SUnit *SU = &SSD->SUnits[i];
2909  SSD->applyInstrChange(SU->getInstr(), *this);
2910  }
2911 
2912  // Reorder the instructions in each cycle to fix and improve the
2913  // generated code.
2914  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
2915  std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
2916  std::deque<SUnit *> newOrderPhi;
2917  for (SUnit *SU : cycleInstrs) {
2918  if (SU->getInstr()->isPHI())
2919  newOrderPhi.push_back(SU);
2920  }
2921  std::deque<SUnit *> newOrderI;
2922  for (SUnit *SU : cycleInstrs) {
2923  if (!SU->getInstr()->isPHI())
2924  orderDependence(SSD, SU, newOrderI);
2925  }
2926  // Replace the old order with the new order.
2927  cycleInstrs.swap(newOrderPhi);
2928  llvm::append_range(cycleInstrs, newOrderI);
2929  SSD->fixupRegisterOverlaps(cycleInstrs);
2930  }
2931 
2932  LLVM_DEBUG(dump(););
2933 }
2934 
2935 void NodeSet::print(raw_ostream &os) const {
2936  os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
2937  << " depth " << MaxDepth << " col " << Colocate << "\n";
2938  for (const auto &I : Nodes)
2939  os << " SU(" << I->NodeNum << ") " << *(I->getInstr());
2940  os << "\n";
2941 }
2942 
2943 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2944 /// Print the schedule information to the given output.
2946  // Iterate over each cycle.
2947  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
2948  // Iterate over each instruction in the cycle.
2949  const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
2950  for (SUnit *CI : cycleInstrs->second) {
2951  os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
2952  os << "(" << CI->NodeNum << ") ";
2953  CI->getInstr()->print(os);
2954  os << "\n";
2955  }
2956  }
2957 }
2958 
2959 /// Utility function used for debugging to print the schedule.
2962 
2963 #endif
2964 
2966  const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) {
2967  unsigned ProcResourceID = 0;
2968 
2969  // We currently limit the resource kinds to 64 and below so that we can use
2970  // uint64_t for Masks
2971  assert(SM.getNumProcResourceKinds() < 64 &&
2972  "Too many kinds of resources, unsupported");
2973  // Create a unique bitmask for every processor resource unit.
2974  // Skip resource at index 0, since it always references 'InvalidUnit'.
2975  Masks.resize(SM.getNumProcResourceKinds());
2976  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
2977  const MCProcResourceDesc &Desc = *SM.getProcResource(I);
2978  if (Desc.SubUnitsIdxBegin)
2979  continue;
2980  Masks[I] = 1ULL << ProcResourceID;
2981  ProcResourceID++;
2982  }
2983  // Create a unique bitmask for every processor resource group.
2984  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
2985  const MCProcResourceDesc &Desc = *SM.getProcResource(I);
2986  if (!Desc.SubUnitsIdxBegin)
2987  continue;
2988  Masks[I] = 1ULL << ProcResourceID;
2989  for (unsigned U = 0; U < Desc.NumUnits; ++U)
2990  Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];
2991  ProcResourceID++;
2992  }
2993  LLVM_DEBUG({
2994  if (SwpShowResMask) {
2995  dbgs() << "ProcResourceDesc:\n";
2996  for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
2997  const MCProcResourceDesc *ProcResource = SM.getProcResource(I);
2998  dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
2999  ProcResource->Name, I, Masks[I],
3000  ProcResource->NumUnits);
3001  }
3002  dbgs() << " -----------------\n";
3003  }
3004  });
3005 }
3006 
3008 
3009  LLVM_DEBUG({
3010  if (SwpDebugResource)
3011  dbgs() << "canReserveResources:\n";
3012  });
3013  if (UseDFA)
3014  return DFAResources->canReserveResources(MID);
3015 
3016  unsigned InsnClass = MID->getSchedClass();
3017  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
3018  if (!SCDesc->isValid()) {
3019  LLVM_DEBUG({
3020  dbgs() << "No valid Schedule Class Desc for schedClass!\n";
3021  dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
3022  });
3023  return true;
3024  }
3025 
3026  const MCWriteProcResEntry *I = STI->getWriteProcResBegin(SCDesc);
3027  const MCWriteProcResEntry *E = STI->getWriteProcResEnd(SCDesc);
3028  for (; I != E; ++I) {
3029  if (!I->Cycles)
3030  continue;
3031  const MCProcResourceDesc *ProcResource =
3032  SM.getProcResource(I->ProcResourceIdx);
3033  unsigned NumUnits = ProcResource->NumUnits;
3034  LLVM_DEBUG({
3035  if (SwpDebugResource)
3036  dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
3037  ProcResource->Name, I->ProcResourceIdx,
3038  ProcResourceCount[I->ProcResourceIdx], NumUnits,
3039  I->Cycles);
3040  });
3041  if (ProcResourceCount[I->ProcResourceIdx] >= NumUnits)
3042  return false;
3043  }
3044  LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return true\n\n";);
3045  return true;
3046 }
3047 
3049  LLVM_DEBUG({
3050  if (SwpDebugResource)
3051  dbgs() << "reserveResources:\n";
3052  });
3053  if (UseDFA)
3054  return DFAResources->reserveResources(MID);
3055 
3056  unsigned InsnClass = MID->getSchedClass();
3057  const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
3058  if (!SCDesc->isValid()) {
3059  LLVM_DEBUG({
3060  dbgs() << "No valid Schedule Class Desc for schedClass!\n";
3061  dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
3062  });
3063  return;
3064  }
3065  for (const MCWriteProcResEntry &PRE :
3066  make_range(STI->getWriteProcResBegin(SCDesc),
3067  STI->getWriteProcResEnd(SCDesc))) {
3068  if (!PRE.Cycles)
3069  continue;
3070  ++ProcResourceCount[PRE.ProcResourceIdx];
3071  LLVM_DEBUG({
3072  if (SwpDebugResource) {
3073  const MCProcResourceDesc *ProcResource =
3074  SM.getProcResource(PRE.ProcResourceIdx);
3075  dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
3076  ProcResource->Name, PRE.ProcResourceIdx,
3077  ProcResourceCount[PRE.ProcResourceIdx],
3078  ProcResource->NumUnits, PRE.Cycles);
3079  }
3080  });
3081  }
3082  LLVM_DEBUG({
3083  if (SwpDebugResource)
3084  dbgs() << "reserveResources: done!\n\n";
3085  });
3086 }
3087 
3089  return canReserveResources(&MI.getDesc());
3090 }
3091 
3093  return reserveResources(&MI.getDesc());
3094 }
3095 
3097  if (UseDFA)
3098  return DFAResources->clearResources();
3099  std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0);
3100 }
i
i
Definition: README.txt:29
SDAG
QP Compare Ordered outs ins xscmpudp No SDAG
Definition: README_P9.txt:301
llvm::SwingSchedulerDAG
This class builds the dependence graph for the instructions in a loop, and attempts to schedule the i...
Definition: MachinePipeliner.h:106
computeLiveOuts
static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker, NodeSet &NS)
Compute the live-out registers for the instructions in a node-set.
Definition: MachinePipeliner.cpp:1606
llvm::cl::Option::getPosition
unsigned getPosition() const
Definition: CommandLine.h:302
llvm::ScheduleDAG::MRI
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:561
llvm::MCSubtargetInfo::getWriteProcResEnd
const MCWriteProcResEntry * getWriteProcResEnd(const MCSchedClassDesc *SC) const
Definition: MCSubtargetInfo.h:170
EnableSWP
static cl::opt< bool > EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Enable Software Pipelining"))
A command line option to turn software pipelining on or off.
isIntersect
static bool isIntersect(SmallSetVector< SUnit *, 8 > &Set1, const NodeSet &Set2, SmallSetVector< SUnit *, 8 > &Result)
Return true if Set1 contains elements in Set2.
Definition: MachinePipeliner.cpp:1818
llvm::ScheduleDAGTopologicalSort::RemovePred
void RemovePred(SUnit *M, SUnit *N)
Updates the topological ordering to accommodate an an edge to be removed from the specified node N fr...
Definition: ScheduleDAG.cpp:566
llvm::TargetInstrInfo::getMemOperandWithOffset
bool getMemOperandWithOffset(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const
Get the base operand and byte offset of an instruction that reads/writes memory.
Definition: TargetInstrInfo.cpp:1071
ScheduleDAG.h
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
MachineInstr.h
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:499
MathExtras.h
llvm
Definition: AllocatorList.h:23
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::RegPressureTracker::addLiveRegs
void addLiveRegs(ArrayRef< RegisterMaskPair > Regs)
Force liveness of virtual registers or physical register units.
Definition: RegisterPressure.cpp:707
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Pass::getAnalysis
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
Definition: PassAnalysisSupport.h:230
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:100
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::set_is_subset
bool set_is_subset(const S1Ty &S1, const S2Ty &S2)
set_is_subset(A, B) - Return true iff A in B
Definition: SetOperations.h:71
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:378
llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:237
ExperimentalCodeGen
static cl::opt< bool > ExperimentalCodeGen("pipeliner-experimental-cg", cl::Hidden, cl::init(false), cl::desc("Use the experimental peeling code generator for software pipelining"))
llvm::MachinePipeliner::LoopInfo::FBB
MachineBasicBlock * FBB
Definition: MachinePipeliner.h:79
llvm::SwingSchedulerDAG::getHeight
unsigned getHeight(SUnit *Node)
The height, in the dependence graph, for a node.
Definition: MachinePipeliner.h:229
llvm::SwingSchedulerDAG::finishBlock
void finishBlock() override
Clean up after the software pipeliner runs.
Definition: MachinePipeliner.cpp:651
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:202
llvm::NodeSet::print
void print(raw_ostream &os) const
Definition: MachinePipeliner.cpp:2935
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
llvm::ModuloScheduleExpander::cleanup
void cleanup()
Performs final cleanup after expansion.
Definition: ModuloSchedule.cpp:181
llvm::SDep::Artificial
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
llvm::NodeSet::dump
LLVM_DUMP_METHOD void dump() const
Definition: MachinePipeliner.cpp:2961
llvm::ModuloSchedule
Represents a schedule for a single-block loop.
Definition: ModuloSchedule.h:78
SwpLoopLimit
static cl::opt< int > SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1))
MCInstrDesc.h
SetOperations.h
llvm::MachinePipeliner::MF
MachineFunction * MF
Definition: MachinePipeliner.h:62
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::MCSubtargetInfo::getSchedModel
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
Definition: MCSubtargetInfo.h:162
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:529
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
llvm::ScheduleDAGInstrs::buildSchedGraph
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
Definition: ScheduleDAGInstrs.cpp:730
llvm::MachineInstr::memoperands_begin
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:697
llvm::lower_bound
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1615
llvm::MCInstrDesc::isPseudo
bool isPseudo() const
Return true if this is a pseudo instruction that doesn't correspond to a real machine instruction.
Definition: MCInstrDesc.h:265
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
it
Reference model for inliner Oz decision policy Note this model is also referenced by test Transforms Inline ML tests if replacing it
Definition: README.txt:3
llvm::MachinePipelinerID
char & MachinePipelinerID
This pass performs software pipelining on machine instructions.
Definition: MachinePipeliner.cpp:184
llvm::NodeSet::compareRecMII
int compareRecMII(NodeSet &RHS)
Definition: MachinePipeliner.h:371
llvm::MapVector::clear
void clear()
Definition: MapVector.h:88
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:130
llvm::ScheduleDAGTopologicalSort::IsReachable
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
Definition: ScheduleDAG.cpp:724
llvm::MCInstrDesc::getSchedClass
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:613
llvm::MachineMemOperand::getOffset
int64_t getOffset() const
For normal values, this is a byte offset added to the base address.
Definition: MachineMemOperand.h:216
getLoopPhiReg
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
Definition: MachinePipeliner.cpp:678
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:626
MapVector.h
llvm::AAResults::isNoAlias
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
Definition: AliasAnalysis.h:510
llvm::SUnit::const_pred_iterator
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:261
llvm::MachineRegisterInfo::getUniqueVRegDef
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition: MachineRegisterInfo.cpp:411
llvm::SDep::Anti
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
llvm::SwingSchedulerDAG::schedule
void schedule() override
We override the schedule function in ScheduleDAGInstrs to implement the scheduling part of the Swing ...
Definition: MachinePipeliner.cpp:472
llvm::MachineRegisterInfo::defusechain_instr_iterator
defusechain_iterator - This class provides iterator support for machine operands in the function that...
Definition: MachineRegisterInfo.h:269
ValueTracking.h
llvm::MCProcResourceDesc::NumUnits
unsigned NumUnits
Definition: MCSchedule.h:34
llvm::IntervalPressure
RegisterPressure computed within a region of instructions delimited by TopIdx and BottomIdx.
Definition: RegisterPressure.h:68
MachineBasicBlock.h
llvm::SetVector::iterator
typename vector_type::const_iterator iterator
Definition: SetVector.h:48
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::DenseMapBase::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:302
llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition: MachineOperand.h:652
llvm::MachineBasicBlock::findDebugLoc
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions.
Definition: MachineBasicBlock.cpp:1396
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:231
EnableSWPOptSize
static cl::opt< bool > EnableSWPOptSize("enable-pipeliner-opt-size", cl::desc("Enable SWP at Os."), cl::Hidden, cl::init(false))
A command line option to enable SWP at -Os.
llvm::SDep::Kind
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:52
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:312
llvm::DenseMapIterator
Definition: DenseMap.h:56
llvm::RegPressureTracker
Track the current register pressure at some position in the instruction stream, and remember the high...
Definition: RegisterPressure.h:358
llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:475
llvm::TargetInstrInfo::getIncrementValue
virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const
If the instruction is an increment of a constant value, return the amount.
Definition: TargetInstrInfo.h:1369
DenseMap.h
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::RegisterMaskPair
Definition: RegisterPressure.h:39
llvm::SwingSchedulerDAG::isLoopCarriedDep
bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc=true)
Return true for an order or output dependence that is loop carried potentially.
Definition: MachinePipeliner.cpp:2278
llvm::SmallSet
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
multipleIterations
static SUnit * multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG)
If an instruction has a use that spans multiple iterations, then return true.
Definition: MachinePipeliner.cpp:2465
llvm::SUnit::Succs
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
ignoreDependence
static bool ignoreDependence(const SDep &D, bool isPred)
Return true for DAG nodes that we ignore when computing the cost functions.
Definition: MachinePipeliner.cpp:1431
i1
Decimal Convert From to National Zoned Signed int_ppc_altivec_bcdcfno i1
Definition: README_P9.txt:147
llvm::MachineInstr::isCopy
bool isCopy() const
Definition: MachineInstr.h:1279
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:128
llvm::SwingSchedulerDAG::getZeroLatencyHeight
int getZeroLatencyHeight(SUnit *Node)
The maximum unweighted length of a path from the given node to an arbitrary node in which each edge h...
Definition: MachinePipeliner.h:233
llvm::MCWriteProcResEntry
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
llvm::NodeSet::empty
bool empty() const
Definition: MachinePipeliner.h:359
llvm::SMSchedule::insert
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II)
Try to schedule the node at the specified StartCycle and continue until the node is schedule or the E...
Definition: MachinePipeliner.cpp:2364
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::PeelingModuloScheduleExpander
A reimplementation of ModuloScheduleExpander.
Definition: ModuloSchedule.h:279
llvm::MachinePipeliner::LoopInfo::BrCond
SmallVector< MachineOperand, 4 > BrCond
Definition: MachinePipeliner.h:80
llvm::ModuloScheduleExpander::expand
void expand()
Performs the actual expansion.
Definition: ModuloSchedule.cpp:66
computePath
static bool computePath(SUnit *Cur, SetVector< SUnit * > &Path, SetVector< SUnit * > &DestNodes, SetVector< SUnit * > &Exclude, SmallPtrSet< SUnit *, 8 > &Visited)
Return true if there is a path from the specified node to any of the nodes in DestNodes.
Definition: MachinePipeliner.cpp:1579
llvm::MachineMemOperand::getAAInfo
AAMDNodes getAAInfo() const
Return the AA tags for the memory reference.
Definition: MachineMemOperand.h:243
SwpMaxMii
static cl::opt< int > SwpMaxMii("pipeliner-max-mii", cl::desc("Size limit for the MII."), cl::Hidden, cl::init(27))
A command line argument to limit minimum initial interval for pipelining.
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:141
INITIALIZE_PASS_END
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Definition: RegBankSelect.cpp:69
EmitTestAnnotations
static cl::opt< bool > EmitTestAnnotations("pipeliner-annotate-for-testing", cl::Hidden, cl::init(false), cl::desc("Instead of emitting the pipelined code, annotate instructions " "with the generated schedule for feeding into the " "-modulo-schedule-test pass"))
llvm::LoopBase::getNumBlocks
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:185
llvm::InstrItineraryData::endStage
const InstrStage * endStage(unsigned ItinClassIndx) const
Return the last+1 stage of the itinerary.
Definition: MCInstrItineraries.h:141
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::MCSchedClassDesc::isValid
bool isValid() const
Definition: MCSchedule.h:128
RegisterPressure.h
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:129
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
llvm::MachineInstr::hasOrderedMemoryRef
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
Definition: MachineInstr.cpp:1376
MachineRegisterInfo.h
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:583
llvm::MCSchedModel::getNumProcResourceKinds
unsigned getNumProcResourceKinds() const
Definition: MCSchedule.h:336
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1108
AliasAnalysis.h
llvm::SDep::isArtificial
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_END(MachinePipeliner
llvm::MachinePipeliner::LI
LoopInfo LI
Definition: MachinePipeliner.h:84
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::TargetInstrInfo::isZeroCost
bool isZeroCost(unsigned Opcode) const
Return true for pseudo instructions that don't consume any machine resources in their current form.
Definition: TargetInstrInfo.h:1577
llvm::MachineOptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: MachineOptimizationRemarkEmitter.h:56
CommandLine.h
llvm::SUnit::isBoundaryNode
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:344
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:565
MCInstrItineraries.h
llvm::SUnit::removePred
void removePred(const SDep &D)
Removes the specified edge as a pred of the current node if it exists.
Definition: ScheduleDAG.cpp:175
llvm::NodeOrder
@ NodeOrder
Definition: SIMachineScheduler.h:35
llvm::cl::apply
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1305
MachineLoopInfo.h
llvm::SetVector::begin
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:82
llvm::SMSchedule::dump
void dump() const
Utility function used for debugging to print the schedule.
Definition: MachinePipeliner.cpp:2960
llvm::AAResults
Definition: AliasAnalysis.h:456
llvm::SwingSchedulerDAG::getDistance
unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep)
The distance function, which indicates that operation V of iteration I depends on operations U of ite...
Definition: MachinePipeliner.h:250
llvm::SwingSchedulerDAG::isBackedge
bool isBackedge(SUnit *Source, const SDep &Dep)
Return true if the dependence is a back-edge in the data dependence graph.
Definition: MachinePipeliner.h:240
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:534
llvm::ScheduleDAGInstrs::UnknownValue
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
Definition: ScheduleDAGInstrs.h:236
llvm::MachineMemOperand::getValue
const Value * getValue() const
Return the base address of the memory access.
Definition: MachineMemOperand.h:200
llvm::SUnit::Latency
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
llvm::MachineOperand::isUse
bool isUse() const
Definition: MachineOperand.h:367
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:488
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::SUnit::NodeNum
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
llvm::MachineOperand::setSubReg
void setSubReg(unsigned subReg)
Definition: MachineOperand.h:468
llvm::SwingSchedulerDAG::getMOV
int getMOV(SUnit *Node)
The mobility function, which the number of slots in which an instruction may be scheduled.
Definition: MachinePipeliner.h:217
llvm::MCSchedClassDesc
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:110
llvm::MachineFunction::DeleteMachineInstr
void DeleteMachineInstr(MachineInstr *MI)
DeleteMachineInstr - Delete the given MachineInstr.
Definition: MachineFunction.cpp:393
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
SI
@ SI
Definition: SIInstrInfo.cpp:7342
llvm::LaneBitmask::getNone
static constexpr LaneBitmask getNone()
Definition: LaneBitmask.h:82
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::SMSchedule::getFinalCycle
int getFinalCycle() const
Return the last cycle in the finalized schedule.
Definition: MachinePipeliner.h:537
DFAPacketizer.h
IP
Definition: NVPTXLowerArgs.cpp:166
llvm::SMSchedule::getFirstCycle
int getFirstCycle() const
Return the first cycle in the completed schedule.
Definition: MachinePipeliner.h:534
llvm::ScheduleDAGInstrs::getSUnit
SUnit * getSUnit(MachineInstr *MI) const
Returns an existing SUnit for this MI, or nullptr.
Definition: ScheduleDAGInstrs.h:390
false
Definition: StackSlotColoring.cpp:142
TargetOpcodes.h
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::SUnit::NumPreds
unsigned NumPreds
Definition: ScheduleDAG.h:266
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:196
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::LoopBase::getBlocks
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:171
llvm::SUnit::setInstr
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:366
llvm::ScheduleDAGTopologicalSort::AddPred
void AddPred(SUnit *Y, SUnit *X)
Updates the topological ordering to accommodate an edge to be added from SUnit X to SUnit Y.
Definition: ScheduleDAG.cpp:548
llvm::SMSchedule::setInitiationInterval
void setInitiationInterval(int ii)
Set the initiation interval for this schedule.
Definition: MachinePipeliner.h:527
llvm::Instruction
Definition: Instruction.h:45
llvm::SMSchedule::isValidSchedule
bool isValidSchedule(SwingSchedulerDAG *SSD)
Definition: MachinePipeliner.cpp:2720
SwpPruneDeps
static cl::opt< bool > SwpPruneDeps("pipeliner-prune-deps", cl::desc("Prune dependences between unrelated Phi nodes."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of chain dependences due to an unrelated Phi.
llvm::MachineOptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: MachineOptimizationRemarkEmitter.h:108
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:50
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:606
llvm::SMSchedule::orderDependence
void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, std::deque< SUnit * > &Insts)
Order the instructions within a cycle so that the definitions occur before the uses.
Definition: MachinePipeliner.cpp:2537
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1770
BitVector.h
llvm::SDep::Output
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
llvm::SlotIndexes
SlotIndexes pass.
Definition: SlotIndexes.h:314
isDependenceBarrier
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA)
Return true if the instruction causes a chain between memory references before and after it.
Definition: MachinePipeliner.cpp:709
llvm::SDep::Data
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
llvm::SMSchedule::reset
void reset()
Definition: MachinePipeliner.h:518
DebugLoc.h
llvm::RegPressureDelta::Excess
PressureChange Excess
Definition: RegisterPressure.h:240
SmallPtrSet.h
llvm::BitVector
Definition: BitVector.h:74
llvm::ScheduleDAGTopologicalSort::const_reverse_iterator
std::vector< int >::const_reverse_iterator const_reverse_iterator
Definition: ScheduleDAG.h:778
llvm::NodeSet
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
Definition: MachinePipeliner.h:311
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:400
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:72
llvm::NodeSet::setColocate
void setColocate(unsigned c)
Definition: MachinePipeliner.h:365
llvm::SMSchedule::isLoopCarried
bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi)
Return true if the scheduled Phi has a loop carried operand.
Definition: MachinePipeliner.cpp:2666
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:277
DEBUG_TYPE
#define DEBUG_TYPE
Definition: MachinePipeliner.cpp:96
llvm::MachinePipeliner
The main class in the implementation of the target independent software pipeliner pass.
Definition: MachinePipeliner.h:60
llvm::SlotIndexes::insertMachineInstrInMaps
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:535
llvm::SDep::Order
@ Order
Any other ordering dependency.
Definition: ScheduleDAG.h:56
llvm::PriorityQueue
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
Definition: PriorityQueue.h:27
llvm::InstrStage::FuncUnits
uint64_t FuncUnits
Bitmask representing a set of functional units.
Definition: MCInstrItineraries.h:65
llvm::TargetInstrInfo::analyzeLoopForPipelining
virtual std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
Definition: TargetInstrInfo.h:739
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::ResourceManager::canReserveResources
bool canReserveResources(const MCInstrDesc *MID) const
Check if the resources occupied by a MCInstrDesc are available in the current state.
Definition: MachinePipeliner.cpp:3007
llvm::TargetInstrInfo::getBaseAndOffsetPosition
virtual bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const
Return true if the instruction contains a base register and offset.
Definition: TargetInstrInfo.h:1341
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:117
llvm::MDNode::getOperand
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1102
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:555
llvm::SMSchedule::computeStart
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG)
Compute the scheduling start slot for the instruction.
Definition: MachinePipeliner.cpp:2476
llvm::cl::opt< bool >
llvm::SUnit::isPred
bool isPred(const SUnit *N) const
Tests if node N is a predecessor of this node.
Definition: ScheduleDAG.h:431
llvm::getRegState
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.
Definition: MachineInstrBuilder.h:529
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
llvm::SetVector::contains
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:209
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:568
llvm::getUnderlyingObjects
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
Definition: ValueTracking.cpp:4341
llvm::MCProcResourceDesc
Define a kind of processor resource that will be modeled by the scheduler.
Definition: MCSchedule.h:32
llvm::MachinePipeliner::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachinePipeliner.cpp:446
llvm::MapVector::find
iterator find(const KeyT &Key)
Definition: MapVector.h:147
llvm::MachineOptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
Definition: MachineOptimizationRemarkEmitter.cpp:49
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1588
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:318
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::ModuloScheduleTestAnnotater::annotate
void annotate()
Performs the annotation.
Definition: ModuloSchedule.cpp:2187
LiveIntervals.h
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::SMSchedule::getInitiationInterval
int getInitiationInterval() const
Return the initiation interval for this schedule.
Definition: MachinePipeliner.h:530
llvm::MachineOptimizationRemarkMissed
Diagnostic information for missed-optimization remarks.
Definition: MachineOptimizationRemarkEmitter.h:82
llvm::SmallPtrSetImpl::end
iterator end() const
Definition: SmallPtrSet.h:407
llvm::MachineFunction::CloneMachineInstr
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
Definition: MachineFunction.cpp:358
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::MachineRegisterInfo::use_instr_begin
use_instr_iterator use_instr_begin(Register RegNo) const
Definition: MachineRegisterInfo.h:477
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
s
multiplies can be turned into SHL s
Definition: README.txt:370
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::NodeSet::getRecMII
int getRecMII()
Definition: MachinePipeliner.h:373
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:58
MemoryLocation.h
llvm::DenseMap
Definition: DenseMap.h:714
llvm::codeview::FrameCookieKind::Copy
@ Copy
llvm::PressureChange::isValid
bool isValid() const
Definition: RegisterPressure.h:113
llvm::MachinePipeliner::ORE
MachineOptimizationRemarkEmitter * ORE
Definition: MachinePipeliner.h:63
I
#define I(x, y, z)
Definition: MD5.cpp:59
PriorityQueue.h
llvm::SUnit::getInstr
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
llvm::SwingSchedulerDAG::getALAP
int getALAP(SUnit *Node)
Return the latest time an instruction my be scheduled.
Definition: MachinePipeliner.h:213
llvm::NodeSet::size
unsigned size() const
Definition: MachinePipeliner.h:357
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
MCRegisterInfo.h
llvm::MachineBasicBlock::getFirstNonPHI
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: MachineBasicBlock.cpp:199
llvm::LoopBase::getLoopPreheader
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:167
llvm::PressureChange::getUnitInc
int getUnitInc() const
Definition: RegisterPressure.h:125
llvm::SmallPtrSetImpl::begin
iterator begin() const
Definition: SmallPtrSet.h:402
Pipelining
Modulo Software Pipelining
Definition: MachinePipeliner.cpp:193
ArrayRef.h
MachineFunctionPass.h
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::pdb::PDB_MemoryType::Stack
@ Stack
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineRegisterInfo::isAllocatable
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
Definition: MachineRegisterInfo.h:918
llvm::TargetRegisterInfo::getRegPressureSetName
virtual const char * getRegPressureSetName(unsigned Idx) const =0
Get the name of this register unit pressure set.
llvm::MachinePipeliner::LoopInfo::LoopCompare
MachineInstr * LoopCompare
Definition: MachinePipeliner.h:82
llvm::RegisterPressure::MaxSetPressure
std::vector< unsigned > MaxSetPressure
Map of max reg pressure indexed by pressure set ID, not class ID.
Definition: RegisterPressure.h:50
getPhiRegs
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, unsigned &InitVal, unsigned &LoopVal)
Return the register values for the operands of a Phi instruction.
Definition: MachinePipeliner.cpp:662
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:239
llvm::MachinePipeliner::LoopInfo::LoopInductionVar
MachineInstr * LoopInductionVar
Definition: MachinePipeliner.h:81
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:958
llvm::SMSchedule::isLoopCarriedDefOfUse
bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def, MachineOperand &MO)
Return true if the instruction is a definition that is loop carried and defines the use on the next i...
Definition: MachinePipeliner.cpp:2694
isSuccOrder
static bool isSuccOrder(SUnit *SUa, SUnit *SUb)
Return true if SUb can be reached from SUa following the chain edges.
Definition: MachinePipeliner.cpp:686
llvm::NodeSet::setRecMII
void setRecMII(unsigned mii)
Definition: MachinePipeliner.h:363
iterator_range.h
llvm::MachineInstr::isPHI
bool isPHI() const
Definition: MachineInstr.h:1243
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:98
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:357
llvm::MDNode
Metadata node.
Definition: Metadata.h:897
llvm::PressureChange::getPSet
unsigned getPSet() const
Definition: RegisterPressure.h:115
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::SUnit::const_succ_iterator
SmallVectorImpl< SDep >::const_iterator const_succ_iterator
Definition: ScheduleDAG.h:262
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:254
llvm::SMSchedule::getInstructions
std::deque< SUnit * > & getInstructions(int cycle)
Return the instructions that are scheduled at the specified cycle.
Definition: MachinePipeliner.h:584
llvm::MachineFunction
Definition: MachineFunction.h:227
llvm::MachineInstr::dump
void dump() const
Definition: MachineInstr.cpp:1540
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1486
ScheduleDAGMutation.h
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:239
llvm::ScheduleDAG::TRI
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:559
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::SwingSchedulerDAG::getZeroLatencyDepth
int getZeroLatencyDepth(SUnit *Node)
The maximum unweighted length of a path from an arbitrary node to the given node in which each edge h...
Definition: MachinePipeliner.h:224
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:98
llvm::MCSubtargetInfo::getWriteProcResBegin
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
Definition: MCSubtargetInfo.h:166
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::ModuloScheduleTestAnnotater
Expander that simply annotates each scheduled instruction with a post-instr symbol that can be consum...
Definition: ModuloSchedule.h:375
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
SwpPruneLoopCarried
static cl::opt< bool > SwpPruneLoopCarried("pipeliner-prune-loop-carried", cl::desc("Prune loop carried order dependences."), cl::Hidden, cl::init(true))
A command line option to disable the pruning of loop carried order dependences.
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:549
llvm::SDep::getSUnit
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::MachinePipeliner::RegClassInfo
RegisterClassInfo RegClassInfo
Definition: MachinePipeliner.h:68
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1512
llvm::MachinePipeliner::ID
static char ID
Definition: MachinePipeliner.h:86
llvm::SwpEnableCopyToPhi
cl::opt< bool > SwpEnableCopyToPhi
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:478
llvm::ScheduleDAG::MF
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:560
ModuloSchedule.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::PeelingModuloScheduleExpander::expand
void expand()
Definition: ModuloSchedule.cpp:1988
llvm::MachineOptimizationRemarkEmitterPass
The analysis pass.
Definition: MachineOptimizationRemarkEmitter.h:212
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
Compiler.h
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1690
llvm::SDep::Barrier
@ Barrier
An unknown scheduling barrier.
Definition: ScheduleDAG.h:69
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MCProcResourceDesc::SubUnitsIdxBegin
const unsigned * SubUnitsIdxBegin
Definition: MCSchedule.h:54
llvm::MachineInstr::print
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
Definition: MachineInstr.cpp:1577
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:372
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::MCProcResourceDesc::Name
const char * Name
Definition: MCSchedule.h:33
llvm::ScheduleDAGInstrs::dump
void dump() const override
Definition: ScheduleDAGInstrs.cpp:1171
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
llvm::Pass::dump
void dump() const
Definition: Pass.cpp:131
llvm::NodeSet::count
unsigned count(SUnit *SU) const
Definition: MachinePipeliner.h:353
llvm::SMSchedule::finalizeSchedule
void finalizeSchedule(SwingSchedulerDAG *SSD)
After the schedule has been formed, call this function to combine the instructions from the different...
Definition: MachinePipeliner.cpp:2886
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:148
SwpShowResMask
static cl::opt< bool > SwpShowResMask("pipeliner-show-mask", cl::Hidden, cl::init(false))
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
llvm::ScheduleDAGInstrs::MISUnitMap
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
Definition: ScheduleDAGInstrs.h:158
llvm::ResourceManager::reserveResources
void reserveResources(const MCInstrDesc *MID)
Reserve the resources occupied by a MCInstrDesc and change the current state to reflect that change.
Definition: MachinePipeliner.cpp:3048
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition: MachineOperand.h:362
SwpDebugResource
static cl::opt< bool > SwpDebugResource("pipeliner-dbg-res", cl::Hidden, cl::init(false))
llvm::MapVector::end
iterator end()
Definition: MapVector.h:71
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::empty
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:97
llvm::InstrStage
These values represent a non-pipelined step in the execution of an instruction.
Definition: MCInstrItineraries.h:58
llvm::SDep
Scheduling dependency.
Definition: ScheduleDAG.h:49
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::clear
void clear()
Completely clear the SetVector.
Definition: SetVector.h:220
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
Attributes.h
llvm::RegPressureDelta
Store the effects of a change in pressure on things that MI scheduler cares about.
Definition: RegisterPressure.h:239
llvm::MachineInstr::isRegSequence
bool isRegSequence() const
Definition: MachineInstr.h:1271
j
return j(j<< 16)
llvm::ScheduleDAG::SUnits
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:562
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1640
llvm::isIdentifiedObject
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Definition: AliasAnalysis.cpp:973
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:521
llvm::TargetInstrInfo::isPostIncrement
virtual bool isPostIncrement(const MachineInstr &MI) const
Return true for post-incremented instructions.
Definition: TargetInstrInfo.h:1411
llvm::ResourceManager::initProcResourceVectors
void initProcResourceVectors(const MCSchedModel &SM, SmallVectorImpl< uint64_t > &Masks)
Definition: MachinePipeliner.cpp:2965
llvm::SwingSchedulerDAG::getDepth
unsigned getDepth(SUnit *Node)
The depth, in the dependence graph, for a node.
Definition: MachinePipeliner.h:220
swapAntiDependences
static void swapAntiDependences(std::vector< SUnit > &SUnits)
Swap all the anti dependences in the DAG.
Definition: MachinePipeliner.cpp:1194
SwpMaxStages
static cl::opt< int > SwpMaxStages("pipeliner-max-stages", cl::desc("Maximum stages allowed in the generated scheduled."), cl::Hidden, cl::init(3))
A command line argument to limit the number of stages in the pipeline.
llvm::Successor
@ Successor
Definition: SIMachineScheduler.h:33
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::NodeSet::clear
void clear()
Definition: MachinePipeliner.h:387
llvm::MachineInstr::mayStore
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
Definition: MachineInstr.h:1007
llvm::TargetInstrInfo::analyzeBranch
virtual bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
Definition: TargetInstrInfo.h:612
llvm::MachinePipeliner::TII
const TargetInstrInfo * TII
Definition: MachinePipeliner.h:67
llvm::SDep::setLatency
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
llvm::MCSchedModel::getProcResource
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition: MCSchedule.h:340
llvm::MachineInstr::hasUnmodeledSideEffects
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
Definition: MachineInstr.cpp:1457
llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:558
llvm::MachineLoop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: MachineLoopInfo.cpp:101
Function.h
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1446
llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::ResourceManager::clearResources
void clearResources()
Reset the state.
Definition: MachinePipeliner.cpp:3096
llvm::ScheduleDAGTopologicalSort::InitDAGTopologicalSorting
void InitDAGTopologicalSorting()
Creates the initial topological ordering from the DAG to be scheduled.
Definition: ScheduleDAG.cpp:438
llvm::SUnit::addPred
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
Definition: ScheduleDAG.cpp:107
llvm::TargetInstrInfo::areMemAccessesTriviallyDisjoint
virtual bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const
Sometimes, it is possible for the target to tell, even without aliasing information,...
Definition: TargetInstrInfo.h:1779
llvm::MCSchedModel
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:245
llvm::MemoryLocation::UnknownSize
@ UnknownSize
Definition: MemoryLocation.h:214
llvm::SUnit::hasPhysRegDefs
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
llvm::LiveIntervals
Definition: LiveIntervals.h:54
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
pred_L
static bool pred_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Preds, const NodeSet *S=nullptr)
Compute the Pred_L(O) set, as defined in the paper.
Definition: MachinePipeliner.cpp:1521
llvm::SMSchedule::print
void print(raw_ostream &os) const
Print the schedule information to the given output.
Definition: MachinePipeliner.cpp:2945
llvm::SwingSchedulerDAG::applyInstrChange
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule)
Apply changes to the instruction if needed.
Definition: MachinePipeliner.cpp:2223
llvm::SDep::getKind
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
llvm::ScheduleDAGInstrs::BB
MachineBasicBlock * BB
The block in which to insert instructions.
Definition: ScheduleDAGInstrs.h:145
llvm::MachinePipeliner::disabledByPragma
bool disabledByPragma
Definition: MachinePipeliner.h:69
llvm::SMSchedule::earliestCycleInChain
int earliestCycleInChain(const SDep &Dep)
Return the cycle of the earliest scheduled instruction in the dependence chain.
Definition: MachinePipeliner.cpp:2417
llvm::AttributeList::FunctionIndex
@ FunctionIndex
Definition: Attributes.h:379
llvm::MCRegUnitIterator
Definition: MCRegisterInfo.h:677
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:187
llvm::SetVector::end
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:92
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::MachinePipeliner::NumTries
static int NumTries
Definition: MachinePipeliner.h:73
llvm::ScheduleDAGInstrs::dumpNode
void dumpNode(const SUnit &SU) const override
Definition: ScheduleDAGInstrs.cpp:1163
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
llvm::rdf::NodeSet
std::set< NodeId > NodeSet
Definition: RDFGraph.h:513
MachineInstrBuilder.h
llvm::ScheduleDAGInstrs::finishBlock
virtual void finishBlock()
Cleans up after scheduling in the given block.
Definition: ScheduleDAGInstrs.cpp:182
succ_L
static bool succ_L(SetVector< SUnit * > &NodeOrder, SmallSetVector< SUnit *, 8 > &Succs, const NodeSet *S=nullptr)
Compute the Succ_L(O) set, as defined in the paper.
Definition: MachinePipeliner.cpp:1551
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:329
llvm::MemoryLocation::getAfter
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
Definition: MemoryLocation.h:267
llvm::SmallVectorImpl::iterator
typename SuperClass::iterator iterator
Definition: SmallVector.h:562
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
N
#define N
llvm::MCRegisterInfo::DiffListIterator::isValid
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
Definition: MCRegisterInfo.h:224
llvm::MachineOperand::setReg
void setReg(Register Reg)
Change the register this operand corresponds to.
Definition: MachineOperand.cpp:55
DefMI
MachineInstrBuilder MachineInstrBuilder & DefMI
Definition: AArch64ExpandPseudoInsts.cpp:101
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1269
llvm::MachineInstr::getNumOperands
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:481
llvm::MachineLoop::getTopBlock
MachineBasicBlock * getTopBlock()
Return the "top" block in the loop, which is the first block in the linear layout,...
Definition: MachineLoopInfo.cpp:61
llvm::MachinePipeliner::LoopInfo::TBB
MachineBasicBlock * TBB
Definition: MachinePipeliner.h:78
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
LaneBitmask.h
llvm::MCSchedModel::getSchedClassDesc
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition: MCSchedule.h:347
llvm::NodeSet::getNode
SUnit * getNode(unsigned i) const
Definition: MachinePipeliner.h:361
MachineMemOperand.h
llvm::SmallVectorImpl< const Value * >
MachineOperand.h
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::NodeSet::insert
bool insert(SUnit *SU)
Definition: MachinePipeliner.h:345
pred
hexagon gen pred
Definition: HexagonGenPredicate.cpp:134
llvm::SUnit::Preds
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
llvm::MachinePipeliner::II_setByPragma
unsigned II_setByPragma
Definition: MachinePipeliner.h:70
llvm::ModuloScheduleExpander
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
Definition: ModuloSchedule.h:159
llvm::MCInstrInfo::get
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:62
llvm::SUnit::pred_iterator
SmallVectorImpl< SDep >::iterator pred_iterator
Definition: ScheduleDAG.h:259
MachinePipeliner.h
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::InstrItineraryData::beginStage
const InstrStage * beginStage(unsigned ItinClassIndx) const
Return the first stage of the itinerary.
Definition: MCInstrItineraries.h:135
llvm::SMSchedule::getMaxStageCount
unsigned getMaxStageCount()
Return the maximum stage count needed for this schedule.
Definition: MachinePipeliner.h:579
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
SwpIgnoreRecMII
static cl::opt< bool > SwpIgnoreRecMII("pipeliner-ignore-recmii", cl::ReallyHidden, cl::init(false), cl::ZeroOrMore, cl::desc("Ignore RecMII"))
llvm::MCSchedModel::hasInstrSchedModel
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition: MCSchedule.h:321
llvm::cl::desc
Definition: CommandLine.h:411
llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:119
raw_ostream.h
llvm::SMSchedule::latestCycleInChain
int latestCycleInChain(const SDep &Dep)
Return the cycle of the latest scheduled instruction in the dependence chain.
Definition: MachinePipeliner.cpp:2440
llvm::SMSchedule::cycleScheduled
unsigned cycleScheduled(SUnit *SU) const
Return the cycle for a scheduled instruction.
Definition: MachinePipeliner.h:572
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:45
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
MachineFunction.h
llvm::MachineInstr::mayRaiseFPException
bool mayRaiseFPException() const
Return true if this instruction could possibly raise a floating-point exception.
Definition: MachineInstr.h:1027
llvm::MDString
A single uniqued string.
Definition: Metadata.h:611
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::InstrItineraryData::isEmpty
bool isEmpty() const
Returns true if there are no itineraries.
Definition: MCInstrItineraries.h:126
llvm::SwingSchedulerDAG::fixupRegisterOverlaps
void fixupRegisterOverlaps(std::deque< SUnit * > &Instrs)
Attempt to fix the degenerate cases when the instruction serialization causes the register lifetimes ...
Definition: MachinePipeliner.cpp:2837
llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition: MCSubtargetInfo.h:75
llvm::ResourceManager
Definition: MachinePipeliner.h:434
llvm::MachineInstr::operands
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:607
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::MachineRegisterInfo::use_instr_end
static use_instr_iterator use_instr_end()
Definition: MachineRegisterInfo.h:480
llvm::SwingSchedulerDAG::getASAP
int getASAP(SUnit *Node)
Return the earliest time an instruction may be scheduled.
Definition: MachinePipeliner.h:210
TargetRegisterInfo.h
Debug.h
llvm::InstrItineraryData
Itinerary data supplied by a subtarget to be used by a target.
Definition: MCInstrItineraries.h:109
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
llvm::SDep::getLatency
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Definition: ScheduleDAG.h:142
llvm::SMSchedule::stageScheduled
int stageScheduled(SUnit *SU) const
Return the stage for a scheduled instruction.
Definition: MachinePipeliner.h:563
SetVector.h
llvm::SMSchedule
This class represents the scheduled code.
Definition: MachinePipeliner.h:488
llvm::MachineOperand::isIdenticalTo
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
Definition: MachineOperand.cpp:282
MachineDominators.h
llvm::SwingSchedulerDAG::getInstrBaseReg
unsigned getInstrBaseReg(SUnit *SU)
Return the new base register that was stored away for the changed instruction.
Definition: MachinePipeliner.h:264
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364